diff --git a/doc/whats-new.rst b/doc/whats-new.rst index effb199f18e..0425452de8d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,6 +29,10 @@ Bug Fixes - Fix a major performance regression in :py:meth:`Coordinates.to_index` (and consequently :py:meth:`Dataset.to_dataframe`) caused by converting the cached code ndarrays into Python lists (:issue:`11305`). +- Preserve the Zarr array ``fill_value`` in the variable ``encoding`` when reading + a ``zarr_format=3`` store with ``use_zarr_fill_value_as_mask=False``, so it is no + longer silently lost on round-trip (:issue:`10269`). + By `Davis Bennett `_. Documentation diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d9279dc2de9..6f3e1ad4eb4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -938,10 +938,19 @@ def open_store_variable(self, name): # by interpreting Zarr's fill_value to mean the same as netCDF's _FillValue if zarr_array.fill_value is not None: attributes["_FillValue"] = zarr_array.fill_value - elif "_FillValue" in attributes: - attributes["_FillValue"] = FillValueCoder.decode( - attributes["_FillValue"], zarr_array.dtype - ) + else: + # Preserve the Zarr array fill_value in the encoding so it is not + # lost on round-trip. The write path reads it back from here. + # Only zarr_format 3 supports `fill_value` as an encoding key + # (in zarr_format 2 the fill_value is set via `_FillValue`). + # See https://github.com/pydata/xarray/issues/10269 + zarr_format_3 = _zarr_v3() and self.zarr_group.metadata.zarr_format == 3 + if zarr_format_3: + encoding["fill_value"] = zarr_array.fill_value + if "_FillValue" in attributes: + attributes["_FillValue"] = FillValueCoder.decode( + attributes["_FillValue"], zarr_array.dtype + ) return Variable(dimensions, data, attributes, encoding) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e42bfc2cd9f..6f9034249d0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3869,6 +3869,27 @@ def test_zarr_fill_value_setting(self, dtype): # ``raise_on_invalid=vn in check_encoding_set`` line in zarr.py # ds.foo.encoding["fill_value"] = fv + def test_zarr_fill_value_in_encoding_on_read(self) -> None: + # GH #10269: the Zarr array fill_value should be preserved in the + # variable encoding on read, so that it is not lost on round-trip. + # `fill_value` is an independent encoding key only for zarr_format 3; + # for zarr_format 2 the fill_value is set via `_FillValue`. + if not has_zarr_v3 or zarr.config.get("default_zarr_format") != 3: + pytest.skip("fill_value is only an encoding key for zarr_format 3") + + ds = xr.Dataset({"foo": ("x", [1, 2, 3])}) + ds.foo.encoding = {"fill_value": -99} + + open_kwargs = {"consolidated": False, "use_zarr_fill_value_as_mask": False} + with self.roundtrip(ds, open_kwargs=open_kwargs) as actual: + assert actual.foo.encoding["fill_value"] == -99 + + # the fill_value must survive an open -> write -> open round-trip even + # when the user never touches the encoding explicitly + with self.roundtrip(ds, open_kwargs=open_kwargs) as opened: + with self.roundtrip(opened, open_kwargs=open_kwargs) as actual: + assert actual.foo.encoding["fill_value"] == -99 + @requires_zarr @pytest.mark.skipif(