From b8bfeca3275045ca82adc3401c38444b1ed12c4a Mon Sep 17 00:00:00 2001 From: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com> Date: Wed, 22 Apr 2026 11:23:19 -0500 Subject: [PATCH] Fix Coordinates.to_index performance regression (#11306) The codes passed to pd.MultiIndex were being converted from cache-friendly ndarrays into Python lists to silence a mypy arg-type error introduced in #10694. The extra per-element conversion dominates runtime for large indexes (~13s on a 100x2000x300 array). Pass the ndarrays directly and suppress the type error the same way as for `levels` just above. Fixes #11305 Co-authored-by: Claude --- doc/whats-new.rst | 4 ++++ xarray/core/coordinates.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 93f335e625b..effb199f18e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,10 @@ Deprecations Bug Fixes ~~~~~~~~~ +- Fix a major performance regression in :py:meth:`Coordinates.to_index` (and + consequently :py:meth:`Dataset.to_dataframe`) caused by converting the cached + code ndarrays into Python lists (:issue:`11305`). + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 85a9d97abeb..a8d5b724746 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -194,7 +194,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] | None = None) -> pd.Index: return pd.MultiIndex( levels=level_list, # type: ignore[arg-type,unused-ignore] - codes=[list(c) for c in code_list], + codes=code_list, # type: ignore[arg-type,unused-ignore] names=names, )