Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ New Features
Breaking Changes
~~~~~~~~~~~~~~~~

- Change the default value for ``chunk`` in ``open_zarr`` to ``_default`` and remove special mapping of ``"auto"``
to ``{}`` or ``None`` in ``open_zarr``. If ``chunks`` is not set, the default behavior is the same as before.
Explicitly setting ``chunks="auto"`` will match the behavior of ``chunks="auto"`` in
``open_dataset(..., engine="zarr")`` (:issue:`11002` :pull:`11010`).
By `Julia Signell <https://github.com/jsignell>`_.
- :py:meth:`Dataset.identical`,` :py:meth:`DataArray.identical`, and
:py:func:`testings.assert_identical` now compare indexes (xindexes).
Two objects with identical data but different indexes will no longer
Expand Down
10 changes: 6 additions & 4 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from xarray.core.utils import (
FrozenDict,
HiddenKeyDict,
_default,
attempt_import,
close_on_error,
emit_user_level_warning,
Expand Down Expand Up @@ -1400,7 +1401,7 @@ def open_zarr(
store,
group=None,
synchronizer=None,
chunks="auto",
chunks=_default,
decode_cf=True,
mask_and_scale=True,
decode_times=True,
Expand Down Expand Up @@ -1436,8 +1437,9 @@ def open_zarr(
Array synchronizer provided to zarr
group : str, optional
Group path. (a.k.a. `path` in zarr terminology.)
chunks : int, dict, 'auto' or None, default: 'auto'
If provided, used to load the data into dask arrays.
chunks : int, dict, "auto" or None, optional
Used to load the data into dask arrays. Default behavior is to use
``chunks={}`` if dask is available, otherwise ``chunks=None``.

- ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks.
Expand Down Expand Up @@ -1558,7 +1560,7 @@ def open_zarr(
if from_array_kwargs is None:
from_array_kwargs = {}

if chunks == "auto":
if chunks is _default:
try:
guess_chunkmanager(
chunked_array_type
Expand Down
39 changes: 39 additions & 0 deletions xarray/computation/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,14 @@ def dot(
We recommend installing the optional ``opt_einsum`` package, or alternatively passing ``optimize=True``,
which is passed through to ``np.einsum``, and works for most array backends.

**Coordinate Handling**

Like all xarray operations, ``dot`` automatically aligns array coordinates.
Coordinates are aligned by their **values**, not their order. By default, xarray uses
an inner join, so only overlapping coordinate values are included. With the default
``arithmetic_join="inner"``, ``dot(a, b)`` is mathematically equivalent to ``(a * b).sum()``
over the specified dimensions. See :ref:`math automatic alignment` for more details.

Examples
--------
>>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=["a", "b"])
Expand Down Expand Up @@ -572,6 +580,37 @@ def dot(
>>> xr.dot(da_a, da_b, dim=...)
<xarray.DataArray ()> Size: 8B
array(235)

**Coordinate alignment examples:**

Coordinates are aligned by their values, not their order:

>>> x = xr.DataArray([1, 10], coords=[("foo", ["a", "b"])])
>>> y = xr.DataArray([2, 20], coords=[("foo", ["b", "a"])])
>>> xr.dot(x, y)
<xarray.DataArray ()> Size: 8B
array(40)

Non-overlapping coordinates are excluded from the computation:

>>> x = xr.DataArray([1, 10], coords=[("foo", ["a", "b"])])
>>> y = xr.DataArray([2, 30], coords=[("foo", ["b", "c"])])
>>> xr.dot(x, y) # only 'b' overlaps: 10 * 2 = 20
<xarray.DataArray ()> Size: 8B
array(20)

Dimensions not involved in the dot product keep their coordinates:

>>> x = xr.DataArray(
... [[1, 2], [3, 4]],
... coords=[("time", [0, 1]), ("space", ["IA", "IL"])],
... )
>>> y = xr.DataArray([10, 20], coords=[("space", ["IA", "IL"])])
>>> xr.dot(x, y, dim="space") # time coordinates are preserved
<xarray.DataArray (time: 2)> Size: 16B
array([ 50, 110])
Coordinates:
* time (time) int64 16B 0 1
"""
from xarray.core.dataarray import DataArray

Expand Down
13 changes: 13 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5170,6 +5170,11 @@ def dot(
dot
numpy.tensordot

Notes
-----
This method automatically aligns coordinates by their values (not their order).
See :ref:`math automatic alignment` and :py:func:`xarray.dot` for more details.

Examples
--------
>>> da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4))
Expand All @@ -5187,6 +5192,14 @@ def dot(
>>> dot_result.dims
('x', 'y')

Coordinates are aligned by their values:

>>> x = xr.DataArray([1, 10], coords=[("foo", ["a", "b"])])
>>> y = xr.DataArray([2, 20], coords=[("foo", ["b", "a"])])
>>> x.dot(y)
<xarray.DataArray ()> Size: 8B
array(40)

"""
if isinstance(other, Dataset):
raise NotImplementedError(
Expand Down
12 changes: 12 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3637,6 +3637,18 @@ def test_chunk_encoding_with_larger_dask_chunks(self) -> None:
) as ds1:
assert_equal(ds1, original)

@requires_dask
def test_chunk_auto_with_small_dask_chunks(self) -> None:
original = Dataset({"u": (("x",), np.zeros(10))}).chunk({"x": 2})
with self.create_zarr_target() as store:
original.to_zarr(store, **self.version_kwargs)
with xr.open_zarr(store, **self.version_kwargs) as default:
assert default.chunks == {"x": (2, 2, 2, 2, 2)}
with xr.open_zarr(store, chunks="auto", **self.version_kwargs) as auto:
assert_identical(auto, original)
assert auto.chunks == {"x": (10,)}
assert auto.chunks != default.chunks

@requires_cftime
def test_open_zarr_use_cftime(self) -> None:
ds = create_test_data()
Expand Down
Loading