xarray-contrib · brendancol · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv
@@ -1,3 +1,3 @@
 module,last_inspected,issue,severity_max,categories_found,notes
-geotiff,2026-05-11,1631,MEDIUM,3,"Filed write_vrt and write_geotiff_gpu signature/docstring drift vs to_geotiff (MEDIUM, #1631). Fix in PR (TBD): explicit write_vrt(relative, crs_wkt, nodata) signature (was **kwargs); 'cubic' added to write_geotiff_gpu overview_resampling docstring; write_geotiff_gpu(data) typed xr.DataArray|cupy.ndarray to match to_geotiff. Prior 1605/1606/1611/1612/1613/1615/1623 all CLOSED."
+geotiff,2026-05-11,1644,MEDIUM,3,"Filed write_geotiff_gpu compression docstring drift vs to_geotiff (MEDIUM Cat 3, #1644). Fix on deep-sweep-api-consistency-geotiff-2026-05-11-1778545740: sync the full 9-codec list into the docstring and note GPU vs CPU encode paths; regression test test_compression_docstring_1644.py pins the codec list and exercises each CPU-fallback codec end-to-end. Other potential drifts surveyed: write_vrt returns str while to_geotiff/write_geotiff_gpu return None (LOW, intentional backward-compat); write_vrt nodata typed float|None vs int-accepting siblings (LOW, PEP 484 int->float compat); kwarg-only ordering drift across read functions (LOW, no user impact). Prior issues 1631/1637/1615/1560/1541/1562 all CLOSED."
 reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)."
diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py
@@ -2723,8 +2723,35 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
     nodata : float, int, or None
         NoData value.
     compression : str
-        'zstd' (default, fastest on GPU), 'deflate', 'jpeg', or 'none'.
-        JPEG uses nvJPEG when available, falling back to Pillow.
+        Codec name. Accepts the same set ``to_geotiff`` lists in its
+        own signature: ``'none'``, ``'deflate'``, ``'lzw'``, ``'jpeg'``,
+        ``'packbits'``, ``'zstd'``, ``'lz4'``, ``'jpeg2000'`` (alias
+        ``'j2k'``), or ``'lerc'``.
+
+        Routing per codec:
+
+        - ``'zstd'`` (default) and ``'deflate'``: nvCOMP batch
+          compression on the GPU -- the fastest paths and the reason to
+          use this entry point.
+        - ``'jpeg'``: nvJPEG when libnvjpeg is loadable, Pillow
+          otherwise. Note that ``to_geotiff`` rejects
+          ``compression='jpeg'`` at runtime because its CPU encoder
+          omits the required TIFF JPEGTables tag (347); this GPU entry
+          point instead emits self-contained JFIF tiles. The two
+          writers therefore disagree about JPEG-in-TIFF interop. Files
+          produced here decode through this library's own reader but
+          may not round-trip through GDAL, rasterio, or libtiff
+          readers that require the JPEGTables tag. Treat the JPEG path
+          as experimental and internal-reader-only until the
+          JPEGTables fix lands.
+        - ``'jpeg2000'`` and ``'j2k'``: nvJPEG2K GPU encode when
+          available, glymur CPU encode otherwise. The two paths are
+          not byte-for-byte identical (different libraries, different
+          default parameters); use ``to_geotiff`` if you need exact
+          CPU-writer parity.
+        - ``'lerc'``, ``'lzw'``, ``'packbits'``, and ``'lz4'``: no
+          nvCOMP/CUDA accelerator, so these fall through to the CPU
+          encoder for byte-stable parity with ``to_geotiff``.
     compression_level : int or None
         Compression effort level. Accepted for API compatibility but
         currently ignored -- nvCOMP does not expose level control.

diff --git a/xrspatial/geotiff/tests/test_compression_docstring_1644.py b/xrspatial/geotiff/tests/test_compression_docstring_1644.py
@@ -0,0 +1,133 @@
+"""Regression test for #1644: ``write_geotiff_gpu`` compression docstring
+parity vs ``to_geotiff``.
+
+The api-consistency sweep on 2026-05-11 flagged that
+``write_geotiff_gpu.__doc__`` listed only four codecs (``'zstd'``,
+``'deflate'``, ``'jpeg'``, ``'none'``) under the ``compression``
+parameter, while the implementation actually accepts every codec
+``to_geotiff`` does.
+
+Routing for the additional codecs:
+
+* ``'lzw'``, ``'packbits'``, ``'lz4'``, ``'lerc'`` -- not nvCOMP-
+  accelerated and have no GPU library, so they fall through to the
+  CPU encoder. Byte-for-byte identical to ``to_geotiff``.
+* ``'jpeg2000'`` / ``'j2k'`` -- attempts an nvJPEG2K *GPU* encode
+  first via ``_nvjpeg2k_batch_encode`` and falls back to the CPU
+  ``glymur`` encoder only when libnvjpeg2k is unavailable. The two
+  paths are NOT byte-stable against each other; this module pins the
+  acceptance contract (the codec name is accepted and a file gets
+  written), not output-byte parity with the CPU writer.
+* ``'jpeg'`` -- accepted here even though ``to_geotiff`` rejects it
+  (the CPU writer omits the JPEGTables tag, so its output doesn't
+  round-trip through GDAL). The GPU path emits self-contained JFIF
+  tiles. Covered separately by
+  ``test_gpu_writer_compression_modes_2026_05_11.py``; this module
+  excludes it from the parametrized fallback list because the test
+  data needs to be uint8 with sensible pixel content.
+
+This module pins the full codec list against future drift and confirms
+the underlying entry point accepts the codec names that the docstring
+now advertises.
+"""
+from __future__ import annotations
+
+import importlib.util
+import os
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from xrspatial.geotiff import write_geotiff_gpu
+
+
+def _gpu_available() -> bool:
+    """True when cupy imports and CUDA is initialised."""
+    if importlib.util.find_spec("cupy") is None:
+        return False
+    try:
+        import cupy
+
+        return bool(cupy.cuda.is_available())
+    except Exception:
+        return False
+
+
+_HAS_GPU = _gpu_available()
+_gpu_only = pytest.mark.skipif(
+    not _HAS_GPU, reason="cupy + CUDA required",
+)
+
+
+# Codecs to exercise end-to-end through the GPU writer to confirm they
+# accept the docstring's advertised names. Excludes ``jpeg`` because
+# (a) ``to_geotiff`` rejects it at runtime and (b) the JPEG round-trip
+# is covered with appropriate uint8 RGB data in
+# ``test_gpu_writer_compression_modes_2026_05_11.py``; keeping it out of
+# this parametrize avoids exercising the JPEG path on dtype/shape
+# combinations that aren't representative.
+_GPU_FALLBACK_CODECS = (
+    "lzw", "packbits", "lz4", "lerc", "jpeg2000", "j2k",
+)
+
+
+def test_write_geotiff_gpu_docstring_lists_full_codec_set():
+    """The ``compression`` docstring lists every codec ``to_geotiff`` accepts.
+
+    Prior to #1644 the docstring listed only ``'zstd'``, ``'deflate'``,
+    ``'jpeg'``, and ``'none'``, which made the GPU writer look much
+    more restrictive than it actually is. The block below pins the
+    canonical wording.
+    """
+    doc = write_geotiff_gpu.__doc__
+    assert doc is not None, "write_geotiff_gpu lost its docstring"
+    block_start = doc.index("compression : str")
+    block_end = doc.index("compression_level", block_start)
+    block = doc[block_start:block_end]
+    # Every codec name in the canonical list must appear in the block.
+    # Use single-quoted form because that is how the docstring writes them.
+    for codec in (
+        "'none'", "'deflate'", "'lzw'", "'jpeg'", "'packbits'",
+        "'zstd'", "'lz4'", "'jpeg2000'", "'j2k'", "'lerc'",
+    ):
+        assert codec in block, (
+            f"compression docstring missing {codec}; current block:\n{block}"
+        )
+
+
+@_gpu_only
+@pytest.mark.parametrize("codec", _GPU_FALLBACK_CODECS)
+def test_write_geotiff_gpu_accepts_cpu_fallback_codecs(tmp_path, codec):
+    """Codecs without a GPU encoder still write successfully via CPU.
+
+    Confirms the docstring's promise that the GPU writer accepts the
+    same codec set as ``to_geotiff``. ``jpeg`` is exercised separately
+    by ``test_gpu_writer_compression_modes_2026_05_11.py`` because the
+    test data must be uint8 with sensible content. ``jpeg2000`` /
+    ``j2k`` will attempt nvJPEG2K if available and fall back to
+    ``glymur`` otherwise; either way the encoder needs uint8/uint16
+    input, so pick a uint16 source for those codecs so the encode path
+    is the one users actually hit, not a dtype-rejected pre-check.
+    """
+    import cupy
+
+    if codec in ("jpeg2000", "j2k"):
+        arr_cpu = np.random.RandomState(0).randint(
+            0, 65535, size=(64, 64), dtype=np.uint16,
+        )
+    else:
+        arr_cpu = np.random.RandomState(0).rand(64, 64).astype(np.float32)
+    da = xr.DataArray(
+        cupy.asarray(arr_cpu), dims=["y", "x"],
+        coords={"y": np.arange(64.0, 0, -1), "x": np.arange(64.0)},
+        attrs={"crs": 4326,
+               "transform": (1.0, 0.0, 0.0, 0.0, -1.0, 64.0)},
+    )
+    path = str(tmp_path / f"out_{codec}.tif")
+    write_geotiff_gpu(da, path, compression=codec)
+    assert os.path.exists(path), (
+        f"write_geotiff_gpu(compression={codec!r}) failed to write a file"
+    )
+    # File must be non-empty so we know the encode path actually ran
+    assert os.path.getsize(path) > 0