From 32f2da41cad75bdaca3e4bb324708c1dc868ab84 Mon Sep 17 00:00:00 2001 From: Dhruva Kumar Kaushal <120594589+dhruvak001@users.noreply.github.com> Date: Mon, 29 Dec 2025 23:21:18 +0530 Subject: [PATCH 1/2] documenting how xarray.dot() interacts with coordinates (#10958) * documenting how xarray.dot() interacts with coordinates * minor fixes * simplifying --------- Co-authored-by: DHRUVA KUMAR KAUSHAL Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/computation/computation.py | 39 +++++++++++++++++++++++++++++++ xarray/core/dataarray.py | 13 +++++++++++ 2 files changed, 52 insertions(+) diff --git a/xarray/computation/computation.py b/xarray/computation/computation.py index 1563455fa91..f05158b60b7 100644 --- a/xarray/computation/computation.py +++ b/xarray/computation/computation.py @@ -515,6 +515,14 @@ def dot( We recommend installing the optional ``opt_einsum`` package, or alternatively passing ``optimize=True``, which is passed through to ``np.einsum``, and works for most array backends. + **Coordinate Handling** + + Like all xarray operations, ``dot`` automatically aligns array coordinates. + Coordinates are aligned by their **values**, not their order. By default, xarray uses + an inner join, so only overlapping coordinate values are included. With the default + ``arithmetic_join="inner"``, ``dot(a, b)`` is mathematically equivalent to ``(a * b).sum()`` + over the specified dimensions. See :ref:`math automatic alignment` for more details. + Examples -------- >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=["a", "b"]) @@ -572,6 +580,37 @@ def dot( >>> xr.dot(da_a, da_b, dim=...) Size: 8B array(235) + + **Coordinate alignment examples:** + + Coordinates are aligned by their values, not their order: + + >>> x = xr.DataArray([1, 10], coords=[("foo", ["a", "b"])]) + >>> y = xr.DataArray([2, 20], coords=[("foo", ["b", "a"])]) + >>> xr.dot(x, y) + Size: 8B + array(40) + + Non-overlapping coordinates are excluded from the computation: + + >>> x = xr.DataArray([1, 10], coords=[("foo", ["a", "b"])]) + >>> y = xr.DataArray([2, 30], coords=[("foo", ["b", "c"])]) + >>> xr.dot(x, y) # only 'b' overlaps: 10 * 2 = 20 + Size: 8B + array(20) + + Dimensions not involved in the dot product keep their coordinates: + + >>> x = xr.DataArray( + ... [[1, 2], [3, 4]], + ... coords=[("time", [0, 1]), ("space", ["IA", "IL"])], + ... ) + >>> y = xr.DataArray([10, 20], coords=[("space", ["IA", "IL"])]) + >>> xr.dot(x, y, dim="space") # time coordinates are preserved + Size: 16B + array([ 50, 110]) + Coordinates: + * time (time) int64 16B 0 1 """ from xarray.core.dataarray import DataArray diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2ef30b4e826..fcfa0317131 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5170,6 +5170,11 @@ def dot( dot numpy.tensordot + Notes + ----- + This method automatically aligns coordinates by their values (not their order). + See :ref:`math automatic alignment` and :py:func:`xarray.dot` for more details. + Examples -------- >>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) @@ -5187,6 +5192,14 @@ def dot( >>> dot_result.dims ('x', 'y') + Coordinates are aligned by their values: + + >>> x = xr.DataArray([1, 10], coords=[("foo", ["a", "b"])]) + >>> y = xr.DataArray([2, 20], coords=[("foo", ["b", "a"])]) + >>> x.dot(y) + Size: 8B + array(40) + """ if isinstance(other, Dataset): raise NotImplementedError( From 18ebe981d2924a07bff4cefec2f0904eb763bd4c Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 29 Dec 2025 22:18:49 +0100 Subject: [PATCH 2/2] Remove special mapping of `auto` to {} in `open_zarr` (#11010) Make the handling of `chunks="auto"` consistent between `open_zarr` and `open_dataset(..., engine="zarr")`. The handling of chunks still differs in `open_zarr` vs `open_dataset(..., engine="zarr")` in that the default in `open_zarr` is to use `chunks={}` and a chunk manager (aka dask) when available in your env. And in `open_dataset` the default is to use `chunks=None` (aka no chunks). Co-authored-by: Justus Magin --------- Co-authored-by: Justus Magin --- doc/whats-new.rst | 5 +++++ xarray/backends/zarr.py | 10 ++++++---- xarray/tests/test_backends.py | 12 ++++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a49564649cf..dd141d4bf5a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,11 @@ New Features Breaking Changes ~~~~~~~~~~~~~~~~ +- Change the default value for ``chunk`` in ``open_zarr`` to ``_default`` and remove special mapping of ``"auto"`` + to ``{}`` or ``None`` in ``open_zarr``. If ``chunks`` is not set, the default behavior is the same as before. + Explicitly setting ``chunks="auto"`` will match the behavior of ``chunks="auto"`` in + ``open_dataset(..., engine="zarr")`` (:issue:`11002` :pull:`11010`). + By `Julia Signell `_. - :py:meth:`Dataset.identical`,` :py:meth:`DataArray.identical`, and :py:func:`testings.assert_identical` now compare indexes (xindexes). Two objects with identical data but different indexes will no longer diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index fe004c212b6..410a6a49a7b 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -30,6 +30,7 @@ from xarray.core.utils import ( FrozenDict, HiddenKeyDict, + _default, attempt_import, close_on_error, emit_user_level_warning, @@ -1400,7 +1401,7 @@ def open_zarr( store, group=None, synchronizer=None, - chunks="auto", + chunks=_default, decode_cf=True, mask_and_scale=True, decode_times=True, @@ -1436,8 +1437,9 @@ def open_zarr( Array synchronizer provided to zarr group : str, optional Group path. (a.k.a. `path` in zarr terminology.) - chunks : int, dict, 'auto' or None, default: 'auto' - If provided, used to load the data into dask arrays. + chunks : int, dict, "auto" or None, optional + Used to load the data into dask arrays. Default behavior is to use + ``chunks={}`` if dask is available, otherwise ``chunks=None``. - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the engine preferred chunks. @@ -1558,7 +1560,7 @@ def open_zarr( if from_array_kwargs is None: from_array_kwargs = {} - if chunks == "auto": + if chunks is _default: try: guess_chunkmanager( chunked_array_type diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 63fe51b1600..fdc7fdc8edb 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3637,6 +3637,18 @@ def test_chunk_encoding_with_larger_dask_chunks(self) -> None: ) as ds1: assert_equal(ds1, original) + @requires_dask + def test_chunk_auto_with_small_dask_chunks(self) -> None: + original = Dataset({"u": (("x",), np.zeros(10))}).chunk({"x": 2}) + with self.create_zarr_target() as store: + original.to_zarr(store, **self.version_kwargs) + with xr.open_zarr(store, **self.version_kwargs) as default: + assert default.chunks == {"x": (2, 2, 2, 2, 2)} + with xr.open_zarr(store, chunks="auto", **self.version_kwargs) as auto: + assert_identical(auto, original) + assert auto.chunks == {"x": (10,)} + assert auto.chunks != default.chunks + @requires_cftime def test_open_zarr_use_cftime(self) -> None: ds = create_test_data()