Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-api-consistency-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-11,1631,MEDIUM,3,"Filed write_vrt and write_geotiff_gpu signature/docstring drift vs to_geotiff (MEDIUM, #1631). Fix in PR (TBD): explicit write_vrt(relative, crs_wkt, nodata) signature (was **kwargs); 'cubic' added to write_geotiff_gpu overview_resampling docstring; write_geotiff_gpu(data) typed xr.DataArray|cupy.ndarray to match to_geotiff. Prior 1605/1606/1611/1612/1613/1615/1623 all CLOSED."
geotiff,2026-05-11,1644,MEDIUM,3,"Filed write_geotiff_gpu compression docstring drift vs to_geotiff (MEDIUM Cat 3, #1644). Fix on deep-sweep-api-consistency-geotiff-2026-05-11-1778545740: sync the full 9-codec list into the docstring and note GPU vs CPU encode paths; regression test test_compression_docstring_1644.py pins the codec list and exercises each CPU-fallback codec end-to-end. Other potential drifts surveyed: write_vrt returns str while to_geotiff/write_geotiff_gpu return None (LOW, intentional backward-compat); write_vrt nodata typed float|None vs int-accepting siblings (LOW, PEP 484 int->float compat); kwarg-only ordering drift across read functions (LOW, no user impact). Prior issues 1631/1637/1615/1560/1541/1562 all CLOSED."
reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)."
31 changes: 29 additions & 2 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2723,8 +2723,35 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
nodata : float, int, or None
NoData value.
compression : str
'zstd' (default, fastest on GPU), 'deflate', 'jpeg', or 'none'.
JPEG uses nvJPEG when available, falling back to Pillow.
Codec name. Accepts the same set ``to_geotiff`` lists in its
own signature: ``'none'``, ``'deflate'``, ``'lzw'``, ``'jpeg'``,
``'packbits'``, ``'zstd'``, ``'lz4'``, ``'jpeg2000'`` (alias
``'j2k'``), or ``'lerc'``.

Routing per codec:

- ``'zstd'`` (default) and ``'deflate'``: nvCOMP batch
compression on the GPU -- the fastest paths and the reason to
use this entry point.
- ``'jpeg'``: nvJPEG when libnvjpeg is loadable, Pillow
otherwise. Note that ``to_geotiff`` rejects
``compression='jpeg'`` at runtime because its CPU encoder
omits the required TIFF JPEGTables tag (347); this GPU entry
point instead emits self-contained JFIF tiles. The two
writers therefore disagree about JPEG-in-TIFF interop. Files
produced here decode through this library's own reader but
may not round-trip through GDAL, rasterio, or libtiff
readers that require the JPEGTables tag. Treat the JPEG path
as experimental and internal-reader-only until the
JPEGTables fix lands.
- ``'jpeg2000'`` and ``'j2k'``: nvJPEG2K GPU encode when
available, glymur CPU encode otherwise. The two paths are
not byte-for-byte identical (different libraries, different
default parameters); use ``to_geotiff`` if you need exact
CPU-writer parity.
- ``'lerc'``, ``'lzw'``, ``'packbits'``, and ``'lz4'``: no
nvCOMP/CUDA accelerator, so these fall through to the CPU
encoder for byte-stable parity with ``to_geotiff``.
compression_level : int or None
Compression effort level. Accepted for API compatibility but
currently ignored -- nvCOMP does not expose level control.
Expand Down
133 changes: 133 additions & 0 deletions xrspatial/geotiff/tests/test_compression_docstring_1644.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""Regression test for #1644: ``write_geotiff_gpu`` compression docstring
parity vs ``to_geotiff``.

The api-consistency sweep on 2026-05-11 flagged that
``write_geotiff_gpu.__doc__`` listed only four codecs (``'zstd'``,
``'deflate'``, ``'jpeg'``, ``'none'``) under the ``compression``
parameter, while the implementation actually accepts every codec
``to_geotiff`` does.

Routing for the additional codecs:

* ``'lzw'``, ``'packbits'``, ``'lz4'``, ``'lerc'`` -- not nvCOMP-
accelerated and have no GPU library, so they fall through to the
CPU encoder. Byte-for-byte identical to ``to_geotiff``.
* ``'jpeg2000'`` / ``'j2k'`` -- attempts an nvJPEG2K *GPU* encode
first via ``_nvjpeg2k_batch_encode`` and falls back to the CPU
``glymur`` encoder only when libnvjpeg2k is unavailable. The two
paths are NOT byte-stable against each other; this module pins the
acceptance contract (the codec name is accepted and a file gets
written), not output-byte parity with the CPU writer.
* ``'jpeg'`` -- accepted here even though ``to_geotiff`` rejects it
(the CPU writer omits the JPEGTables tag, so its output doesn't
round-trip through GDAL). The GPU path emits self-contained JFIF
tiles. Covered separately by
``test_gpu_writer_compression_modes_2026_05_11.py``; this module
excludes it from the parametrized fallback list because the test
data needs to be uint8 with sensible pixel content.

This module pins the full codec list against future drift and confirms
the underlying entry point accepts the codec names that the docstring
now advertises.
"""
from __future__ import annotations

import importlib.util
import os

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import write_geotiff_gpu


def _gpu_available() -> bool:
"""True when cupy imports and CUDA is initialised."""
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy

return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _gpu_available()
_gpu_only = pytest.mark.skipif(
not _HAS_GPU, reason="cupy + CUDA required",
)


# Codecs to exercise end-to-end through the GPU writer to confirm they
# accept the docstring's advertised names. Excludes ``jpeg`` because
# (a) ``to_geotiff`` rejects it at runtime and (b) the JPEG round-trip
# is covered with appropriate uint8 RGB data in
# ``test_gpu_writer_compression_modes_2026_05_11.py``; keeping it out of
# this parametrize avoids exercising the JPEG path on dtype/shape
# combinations that aren't representative.
_GPU_FALLBACK_CODECS = (
"lzw", "packbits", "lz4", "lerc", "jpeg2000", "j2k",
)


def test_write_geotiff_gpu_docstring_lists_full_codec_set():
"""The ``compression`` docstring lists every codec ``to_geotiff`` accepts.

Prior to #1644 the docstring listed only ``'zstd'``, ``'deflate'``,
``'jpeg'``, and ``'none'``, which made the GPU writer look much
more restrictive than it actually is. The block below pins the
canonical wording.
"""
doc = write_geotiff_gpu.__doc__
assert doc is not None, "write_geotiff_gpu lost its docstring"
block_start = doc.index("compression : str")
block_end = doc.index("compression_level", block_start)
block = doc[block_start:block_end]
# Every codec name in the canonical list must appear in the block.
# Use single-quoted form because that is how the docstring writes them.
for codec in (
"'none'", "'deflate'", "'lzw'", "'jpeg'", "'packbits'",
"'zstd'", "'lz4'", "'jpeg2000'", "'j2k'", "'lerc'",
):
assert codec in block, (
f"compression docstring missing {codec}; current block:\n{block}"
)


@_gpu_only
@pytest.mark.parametrize("codec", _GPU_FALLBACK_CODECS)
def test_write_geotiff_gpu_accepts_cpu_fallback_codecs(tmp_path, codec):
"""Codecs without a GPU encoder still write successfully via CPU.

Confirms the docstring's promise that the GPU writer accepts the
same codec set as ``to_geotiff``. ``jpeg`` is exercised separately
by ``test_gpu_writer_compression_modes_2026_05_11.py`` because the
test data must be uint8 with sensible content. ``jpeg2000`` /
``j2k`` will attempt nvJPEG2K if available and fall back to
``glymur`` otherwise; either way the encoder needs uint8/uint16
input, so pick a uint16 source for those codecs so the encode path
is the one users actually hit, not a dtype-rejected pre-check.
"""
import cupy

if codec in ("jpeg2000", "j2k"):
arr_cpu = np.random.RandomState(0).randint(
0, 65535, size=(64, 64), dtype=np.uint16,
)
else:
arr_cpu = np.random.RandomState(0).rand(64, 64).astype(np.float32)
da = xr.DataArray(
cupy.asarray(arr_cpu), dims=["y", "x"],
coords={"y": np.arange(64.0, 0, -1), "x": np.arange(64.0)},
attrs={"crs": 4326,
"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 64.0)},
)
path = str(tmp_path / f"out_{codec}.tif")
write_geotiff_gpu(da, path, compression=codec)
assert os.path.exists(path), (
f"write_geotiff_gpu(compression={codec!r}) failed to write a file"
)
# File must be non-empty so we know the encode path actually ran
assert os.path.getsize(path) > 0
Loading