From 34c8949f6cddb0ff93551ee8b5ba9d6cf2365a23 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 29 Jan 2026 12:02:32 -0500 Subject: [PATCH 01/32] Support zarr 3 --- pyproject.toml | 6 +- requirements.txt | 37 ++++----- scallops/cli/pooled_if_sbs.py | 15 ++-- scallops/io.py | 8 +- scallops/tests/test_io.py | 12 +-- scallops/zarr_io.py | 138 +++++++++++++++++++++------------- 6 files changed, 128 insertions(+), 88 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3bf9154..2d4d87d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ classifiers = [# https://pypi.python.org/pypi?%3Aaction=list_classifiers dependencies = [ "adjustText", "anndata>=0.12.4", # https://github.com/scverse/anndata/issues/2166 - "bioio<2", + "bioio", "bioio-nd2", "bioio-tifffile", "centrosome", @@ -69,9 +69,9 @@ dependencies = [ "stardist", "statsmodels", "tensorflow", - "tifffile==2024.8.30", + "tifffile", "xarray", - "zarr<3" + "zarr" ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index a5128fa..830a127 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,43 +1,44 @@ -anndata==0.12.7 +anndata==0.12.9 adjustText==1.3.0 -bioio-nd2==1.1.0 -bioio-tifffile==1.1.0 -bioio-base==1.0.7 -bioio==1.6.1 +bioio-nd2==1.6.0 +bioio-tifffile==1.3.0 +bioio-base==3.2.0 +bioio-ome-tiff +bioio==3.2.0 centrosome==1.3.3 cp-measure==0.1.13 cython==3.2.4 dask-image==2025.11.0 -dask==2025.11.0 +dask==2026.1.1 decorator==5.2.1 filelock==3.20.3 flox==0.10.8 fsspec==2026.1.0 igraph==1.0.0 itk-elastix==0.23.0 -itk==5.4.3 +itk==5.4.5 joblib==1.5.3 kneed==0.8.5 mahotas==1.4.18 matplotlib==3.10.8 natsort==8.4.0 -numcodecs==0.15.1 +numcodecs==0.16.5 numpy==1.26.4 -ome-zarr==0.10.3 +ome-zarr==0.12.2 pandas==2.3.3 pint==0.25.2 -psutil==7.2.1 -pyarrow==22.0.0 -pydantic==2.11.7 +psutil==7.2.2 +pyarrow==23.0.0 +pydantic==2.12.5 pysam==0.23.3 scikit-image==0.26.0 scikit-learn==1.8.0 -scipy==1.17.0 +scipy==1.16.3 seaborn==0.13.2 shapely==2.1.2 -stardist==0.9.1 +stardist==0.9.2 statsmodels==0.14.6 -tensorflow==2.19.0 -tifffile==2024.8.30 -xarray==2025.12.0 -zarr==2.18.7 +tensorflow==2.20.0 +tifffile==2026.1.28 +xarray==2026.1.0 +zarr==3.1.5 diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index 57b7675..b1f9c30 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -76,6 +76,7 @@ from scallops.zarr_io import ( _get_fs, _get_sep, + _get_store_path, _write_zarr_image, open_ome_zarr, read_ome_zarr_array, @@ -226,7 +227,9 @@ def spot_detection_pipeline( _, file_list, metadata = image_tuple image_key = metadata["id"] if not force: - points_path = f"{root.store.path.rstrip(_get_sep(root))}{_get_sep(root)}points" + points_path = ( + f"{_get_store_path(root).rstrip(_get_sep(root))}{_get_sep(root)}points" + ) points_protocol = _get_fs_protocol(_get_fs(root)) if points_protocol != "file": points_path = f"{points_protocol}://{points_path}" @@ -334,7 +337,9 @@ def spot_detection_pipeline( else: del maxed if "peaks" in save_keys: - points_path = f"{root.store.path.rstrip(_get_sep(root))}{_get_sep(root)}points" + points_path = ( + f"{_get_store_path(root).rstrip(_get_sep(root))}{_get_sep(root)}points" + ) protocol = _get_fs_protocol(_get_fs(root)) if protocol != "file": points_path = f"{protocol}://{points_path}" @@ -913,7 +918,7 @@ def reads_pipeline( logger.info(f"Running reads for {image_key}") spots_sep = _get_sep(spots_root) - points_path = f"{spots_root.store.path.rstrip(spots_sep)}{spots_sep}points" + points_path = f"{_get_store_path(spots_root).rstrip(spots_sep)}{spots_sep}points" spots_protocol = _get_fs_protocol(_get_fs(spots_root)) if spots_protocol != "file": points_path = f"{spots_protocol}://{points_path}" @@ -1231,8 +1236,8 @@ def reads_main(arguments: argparse.Namespace): for key in image_keys: reads_pipeline( key, - spots_root=zarr.open(spots, "r"), - labels_root=zarr.open(labels + labels_fs.sep + "labels", "r"), + spots_root=zarr.open(spots, mode="r"), + labels_root=zarr.open(labels + labels_fs.sep + "labels", mode="r"), barcodes_file=barcodes_file, file_separator=output_fs.sep, threshold_peaks=threshold_peaks, diff --git a/scallops/io.py b/scallops/io.py index 026ab13..7f82e8b 100644 --- a/scallops/io.py +++ b/scallops/io.py @@ -59,7 +59,7 @@ from scallops.externals.tifffile2014 import imsave from scallops.utils import forceTCZYX, mlcs from scallops.xr import _crop -from scallops.zarr_io import _read_zarr_experiment, read_ome_zarr_array +from scallops.zarr_io import _get_store_path, _read_zarr_experiment, read_ome_zarr_array logger = logging.getLogger("scallops") @@ -1362,7 +1362,7 @@ def _images2fov( name = ( os.path.basename(file_list[i]) if not isinstance(file_list[i], zarr.Group) - else file_list[i].store.path + else _get_store_path(file_list[i]) ) src_metadata.append(dict(attrs=image_attrs[i], name=name)) @@ -1788,7 +1788,9 @@ def file_sort_key(x): src=file_list, common_src=mlcs( [ - Path(x).stem if not isinstance(x, zarr.Group) else x.store.path + Path(x).stem + if not isinstance(x, zarr.Group) + else _get_store_path(x) for x in file_list ] ), diff --git a/scallops/tests/test_io.py b/scallops/tests/test_io.py index 56b89b9..f505aa4 100644 --- a/scallops/tests/test_io.py +++ b/scallops/tests/test_io.py @@ -185,12 +185,14 @@ def test_write_non_ome_zarr_image(tmp_path, dask): image.attrs["physical_pixel_sizes"] = (1, 1, 1) image.attrs["physical_pixel_units"] = ("mm", "mm", "mm") zarr_path = str(tmp_path / "test.zarr") - _write_zarr_image("foo", open_ome_zarr(zarr_path), image, zarr_format="zarr") - _write_zarr_image("foo2", open_ome_zarr(zarr_path), image) + _write_zarr_image("img_zarr", open_ome_zarr(zarr_path), image, zarr_format="zarr") + _write_zarr_image("img_ome_zarr", open_ome_zarr(zarr_path), image) + + data_zarr = read_image(f"{zarr_path}/images/img_zarr", dask=False) + data_ome_zarr = read_image(f"{zarr_path}/images/img_ome_zarr", dask=False) - data_zarr = read_image(f"{zarr_path}/images/foo", dask=False) - data_ome_zarr = read_image(f"{zarr_path}/images/foo2", dask=False) xr.testing.assert_equal(data_zarr, data_ome_zarr) + xr.testing.assert_equal(image, data_ome_zarr) @pytest.mark.io @@ -312,7 +314,7 @@ def test_read_write_labels(tmp_path, array_A1_102_nuclei): _write_zarr_labels( name="test", root=open_ome_zarr(str(tmp_path), "w"), labels=nuclei ) - test = read_ome_zarr_array(zarr.open(str(tmp_path / "labels" / "test"), "r")) + test = read_ome_zarr_array(zarr.open(str(tmp_path / "labels" / "test"), mode="r")) np.testing.assert_equal(nuclei, test.data) diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index c28aa2b..8511a2b 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -65,13 +65,21 @@ def is_ome_zarr_array(node: zarr.Group) -> bool: result = is_ome_zarr_array(root) print(result) # Output: True """ - return node is not None and "multiscales" in node.attrs + return node is not None and ("ome" in node.attrs or "multiscales" in node.attrs) def _get_fs(group: zarr.Group): if hasattr(group.store, "fs"): return group.store.fs - return fsspec.url_to_fs(group.store.path)[0] + return fsspec.url_to_fs(_get_store_path(group))[0] + + +def _get_store_path(group: zarr.Group): + if hasattr(group.store, "root"): + return str(group.store.root) + if hasattr(group.store, "path"): + return group.store.path + return "" def _get_sep(group: zarr.Group) -> str: @@ -170,7 +178,7 @@ def _fix_attrs(d: dict) -> None: elif isinstance(value, ome_types.OME): # Hack to prevent OverflowError: # Overlong 4 byte UTF-8 sequence detected when encoding string - d[key] = d[key].dict() + d[key] = d[key].model_dump() elif isinstance(value, zarr.Group): d[key] = str(value) elif isinstance(value, list): @@ -401,41 +409,70 @@ def write_zarr( ) dask_delayed = [] if zarr_format == "zarr": # No axis validation + fmt = CurrentFormat() + + kwargs = dict() + zarr_version = 2 if not hasattr(fmt, "zarr_format") else fmt.zarr_format + if zarr_version == 2: + kwargs["dimension_separator"] = "/" + else: + kwargs["chunk_key_encoding"] = fmt.chunk_key_encoding if isinstance(data, da.Array): d = da.to_zarr( arr=data, url=grp.store, component=str(Path(grp.path, "0")), compute=compute, - dimension_separator=grp._store._dimension_separator, + **kwargs, ) if not compute: dask_delayed.append(d) elif not isinstance(data, zarr.Array): - grp.create_dataset("0", data=data, overwrite=True) - + if zarr_version == 2: + grp.create_dataset("0", data=data, overwrite=True, **kwargs) + else: + grp.create_array("0", data=data, overwrite=True, **kwargs) + # v3 + # ome/omero for channel metadata + # ome/multiscales[0]/metadata for other metadata + + # v2: + # omero for channel metadata + # multiscales[0]/metadata for other metadata datasets = [{"path": "0"}] if coordinate_transformations is not None: datasets[0]["coordinateTransformations"] = coordinate_transformations - multiscales = [ - dict(version=CurrentFormat().version, datasets=datasets, name=grp.name) - ] - d = {"multiscales": multiscales} + multiscales = [dict(version=fmt.version, datasets=datasets, name=grp.name)] + zarr_attrs = ( + {"multiscales": multiscales} + if zarr_version == 2 + else {"ome": {"multiscales": multiscales}} + ) + if axes is not None: multiscales[0]["axes"] = axes if image_attrs is not None: - multiscales[0]["metadata"] = image_attrs if "omero" in image_attrs: - d["omero"] = image_attrs["omero"] + if zarr_version == 2: + omero = zarr_attrs.get("omero", {}) + omero.update(image_attrs.pop("omero")) + zarr_attrs["omero"] = omero + else: + omero = zarr_attrs["ome"].get("omero", {}) + omero.update(image_attrs.pop("omero")) + zarr_attrs["ome"]["omero"] = omero + multiscales[0]["metadata"] = image_attrs if len(dask_delayed) > 0: @dask.delayed def _write_metadata_delayed(grp, d): grp.attrs.update(d) - return dask_delayed + [bind(_write_metadata_delayed, dask_delayed)(grp, d)] + return dask_delayed + [ + bind(_write_metadata_delayed, dask_delayed)(grp, zarr_attrs) + ] else: - grp.attrs.update(d) + grp.attrs.update(zarr_attrs) return dask_delayed else: return write_image( @@ -550,53 +587,47 @@ def _write_zarr_labels( axes=label_axes, metadata=metadata, compute=compute, + coordinate_transformations=None, storage_options=storage_options, ) -def _read_zarr_attrs(multiscale0: zarr.Group) -> tuple[dict, dict, list[str]]: - """Read attributes from a Zarr multiscale group. +def _read_zarr_attrs(attrs) -> tuple[dict, dict, list[str]]: + """Read attributes from Zarr. This function reads and processes the attributes, coordinates, and dimensions from the first multiscale dataset in a Zarr group. It also handles physical pixel sizes and units if available. - :param multiscale0: The Zarr group containing the multiscale dataset. + :param attrs: Zarr attributes. :return: A tuple containing: - coords: Dictionary of coordinates. - attrs: Dictionary of attributes. - dims: List of dimension names. - - :example: - - .. code-block:: python - - import zarr - from scallops.zarr_io import _read_zarr_attrs - - # Create a Zarr group with multiscale attributes - store = zarr.DirectoryStore("example.zarr") - root = zarr.group(store=store) - multiscale0 = root.create_group("multiscales") - multiscale0.attrs["axes"] = [{"name": "x"}, {"name": "y"}, {"name": "z"}] - multiscale0.attrs["datasets"] = [ - {"coordinateTransformations": [{"scale": [1.0, 0.5, 0.5]}]} - ] - - # Read attributes from the multiscale group - coords, attrs, dims = _read_zarr_attrs(multiscale0) - print(coords) - print(attrs) - print(dims) """ - attrs = multiscale0.get("metadata") - if attrs is None: - attrs = {} + # v3 + # ome/omero for channel metadata + # ome/multiscales[0]/metadata for other metadata + + # v2: + # omero for channel metadata + # multiscales[0]/metadata for other metadata + + if "ome" in attrs: + attrs = attrs["ome"] + multiscales = attrs["multiscales"] + if len(multiscales) > 0: + multiscale0 = multiscales[0] + else: + return None, None, None + axes = multiscale0["axes"] dims = [axis["name"] for axis in axes] - - coords = {d: attrs[d] for d in dims if d in attrs and d != "c"} - if "omero" in attrs and "c" in dims: + metadata = multiscale0.get("metadata") + if metadata is None: + metadata = {} + coords = {d: metadata[d] for d in dims if d in metadata and d != "c"} + if "c" in dims and "omero" in attrs: channel_names = attrs["omero"].get("channels") if channel_names is not None: coords["c"] = [c["label"] for c in channel_names] @@ -613,9 +644,9 @@ def _read_zarr_attrs(multiscale0: zarr.Group) -> tuple[dict, dict, list[str]]: if len(space_indices_with_units) > 0: scale = multiscale0["datasets"][0]["coordinateTransformations"][0]["scale"] physical_pixel_sizes = tuple([scale[d] for d in space_indices_with_units]) - attrs["physical_pixel_sizes"] = physical_pixel_sizes - attrs["physical_pixel_units"] = tuple(units) - return coords, attrs, dims + metadata["physical_pixel_sizes"] = physical_pixel_sizes + metadata["physical_pixel_units"] = tuple(units) + return coords, metadata, dims def _read_ome_zarr_array( @@ -632,14 +663,13 @@ def _read_ome_zarr_array( node = zarr.open(node, mode="r") if node is None: raise ValueError(f"{_node} not found") - if "multiscales" in node.attrs: + # For zarr v3, everything is under the "ome" namespace + if "ome" in node.attrs or "multiscales" in node.attrs: dims = None coords = {} attrs = {} - multiscales = node.attrs["multiscales"] - if len(multiscales) > 0: - multiscale0 = multiscales[0] - coords, attrs, dims = _read_zarr_attrs(multiscale0) + coords, attrs, dims = _read_zarr_attrs(node.attrs) + array = node["0"] return array, dims, coords, attrs else: # see if user passed test.zarr and zarr file only has one image @@ -648,7 +678,7 @@ def _read_ome_zarr_array( image_keys = list(images.keys()) if len(image_keys) == 1: return _read_ome_zarr_array(images[image_keys[0]]) - logger.warning("multiscales not found in attrs") + logger.warning(f"multiscales not found in attrs for {node} ") def read_ome_zarr_array( From b0bfdc889612f9a880465e192a70b2837c31f209 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 29 Jan 2026 16:34:57 -0500 Subject: [PATCH 02/32] Support zarr 3 --- pyproject.toml | 21 +- requirements.txt | 6 +- scallops/_bioio_zarr_reader.py | 625 +++++++++++++----- scallops/cli/register.py | 2 +- scallops/io.py | 6 +- scallops/registration/itk.py | 48 +- scallops/stitch/_stitch.py | 32 +- scallops/stitch/fuse.py | 38 +- scallops/tests/test_features.py | 6 +- .../tests/test_illumination_correction.py | 4 +- scallops/zarr_io.py | 22 +- 11 files changed, 592 insertions(+), 218 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2d4d87d..bbc1702 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,16 +30,27 @@ classifiers = [# https://pypi.python.org/pypi?%3Aaction=list_classifiers "Topic :: Scientific/Engineering :: Bio-Informatics", ] + + + + + + + + + dependencies = [ - "adjustText", "anndata>=0.12.4", # https://github.com/scverse/anndata/issues/2166 - "bioio", + "adjustText", "bioio-nd2", "bioio-tifffile", + "bioio-base", + "bioio-ome-tiff", + "bioio", "centrosome", "cp-measure", - "dask-image", "dask", + "dask-image", "decorator", "filelock", "flox", @@ -60,7 +71,6 @@ dependencies = [ "psutil", "pyarrow", "pydantic", - "pysam", "scikit-image", "scikit-learn", "scipy", @@ -88,6 +98,9 @@ cellpose = [ ufish = [ "ufish" ] +pysam = [ + "pysam" +] test = [ "miniwdl", "pytest", diff --git a/requirements.txt b/requirements.txt index 830a127..ec942ad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,9 @@ anndata==0.12.9 adjustText==1.3.0 +bioio==3.2.0 bioio-nd2==1.6.0 +bioio-ome-tiff==1.4.0 bioio-tifffile==1.3.0 -bioio-base==3.2.0 -bioio-ome-tiff -bioio==3.2.0 centrosome==1.3.3 cp-measure==0.1.13 cython==3.2.4 @@ -30,7 +29,6 @@ pint==0.25.2 psutil==7.2.2 pyarrow==23.0.0 pydantic==2.12.5 -pysam==0.23.3 scikit-image==0.26.0 scikit-learn==1.8.0 scipy==1.16.3 diff --git a/scallops/_bioio_zarr_reader.py b/scallops/_bioio_zarr_reader.py index 6118c46..584115d 100644 --- a/scallops/_bioio_zarr_reader.py +++ b/scallops/_bioio_zarr_reader.py @@ -1,110 +1,165 @@ import logging +import warnings from pathlib import Path from typing import Any, Dict, List, Optional, Tuple +import bioio_ome_zarr.utils as metadata_utils +import dask.array as da import xarray as xr +import zarr from bioio_base import constants, dimensions, exceptions, io, reader, types from fsspec.spec import AbstractFileSystem -from ome_zarr.io import parse_url -from ome_zarr.reader import Reader as ZarrReader +from ome_types import OME +from ome_types.model import Channel, Image, Pixels, PixelType +from zarr.core.group import GroupMetadata -logger = logging.getLogger("scallops") +############################################################################### + +log = logging.getLogger(__name__) + + +############################################################################### -# Same as https://github.com/bioio-devs/bioio-ome-zarr/blob/main/bioio_ome_zarr/reader.py but fixes bug in channel names -# Also checks to see if zarr path is {zarr_path}/images/image1 with only 1 image -# See https://github.com/bioio-devs/bioio-ome-zarr/pull/22 class ScallopsZarrReader(reader.Reader): - """The main class of each reader plugin. This class is subclass of the abstract class reader - (BaseReader) in bioio-base. + """ + The main class of the `bioio_ome_zarr` plugin. This class is a subclass + of the abstract class `reader` (`BaseReader`) in `bioio-base`. Parameters ---------- - image: types.PathLike - String or Path to the ZARR root - fs_kwargs: Dict[str, Any] - Ignored + image : types.PathLike + String or Path to the Zarr top directory (v2 or v3 store). + fs_kwargs : Dict[str, Any] + Passed to fsspec when constructing the filesystem + (e.g. {"anon": True} for public S3). """ - _xarray_dask_data: Optional["xr.DataArray"] = None - _xarray_data: Optional["xr.DataArray"] = None - _mosaic_xarray_dask_data: Optional["xr.DataArray"] = None - _mosaic_xarray_data: Optional["xr.DataArray"] = None - _dims: Optional[dimensions.Dimensions] = None - _metadata: Optional[Any] = None + _channel_names: Optional[List[str]] = None _scenes: Optional[Tuple[str, ...]] = None - _current_scene_index: int = 0 - # Do not provide default value because - # they may not need to be used by your reader (i.e. input param is an array) - _fs: "AbstractFileSystem" + _zarr: zarr.Group + _ome_metadata: OME + + _fs: AbstractFileSystem _path: str - # Required Methods + _current_scene_index: int = 0 def __init__( self, image: types.PathLike, fs_kwargs: Dict[str, Any] = {}, ): - # Expand details of provided image + # Expand details of provided image. self._fs, self._path = io.pathlike_to_fs( image, enforce_exists=False, fs_kwargs=fs_kwargs, ) + self._path = self._fs.unstrip_protocol(self._path) - # Enforce valid image - if not self._is_supported_image(self._fs, self._path): - raise exceptions.UnsupportedFileFormatError( - self.__class__.__name__, - self._path, - "Could not find a .zgroup or .zarray file at the provided path.", - ) + # Validate the store – this will raise if unsupported + self._is_supported_image(fs=self._fs, path=self._path, fs_kwargs=fs_kwargs) + + store = self._fs.get_mapper(self._path) # type: ignore[attr-defined] + self._zarr = zarr.open_group(store=store, mode="r") - self._zarr = get_zarr_reader(self._fs, self._path).zarr - self._physical_pixel_sizes: Optional[types.PhysicalPixelSizes] = None - self._channel_names: Optional[List[str]] = None + self._multiscales_metadata = self._zarr.attrs.get("ome", {}).get( + "multiscales" + ) or self._zarr.attrs.get("multiscales", []) + + self._channel_metadata = ( + self._zarr.attrs.get("ome", {}).get("omero", {}).get("channels") + or self._zarr.attrs.get("omero", {}).get("channels") + or [] + ) @staticmethod - def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> bool: + def _is_supported_image( + fs: AbstractFileSystem, path: str, fs_kwargs: Dict[str, Any], **kwargs: Any + ) -> bool: try: - get_zarr_reader(fs, path) + store = fs.get_mapper(path) + group = zarr.open_group(store=store, mode="r") + attrs = group.attrs.asdict() + + # Check for transitional metadata key + if ("bioformats2raw.layout" in attrs) or ( + isinstance(attrs.get("ome"), dict) + and "bioformats2raw.layout" in attrs["ome"] + ): + raise exceptions.UnsupportedFileFormatError( + reader.Reader.__name__, + path, + ( + "Detected transitional layout metadata key " + "'bioformats2raw.layout'. This layout describes multiple " + "image series, not a single image. BioIO does *not* support " + "reading stores using this format." + ), + ) + return True - except AttributeError: - return False + + except Exception as e: + raise exceptions.UnsupportedFileFormatError( + reader.Reader.__name__, + path, + f"Could not parse a Zarr store at the provided path: {e}", + ) from e @classmethod def is_supported_image( - cls, - image: types.ImageLike, - fs_kwargs: Dict[str, Any] = {}, - **kwargs: Any, + cls, image: types.PathLike, fs_kwargs: Dict[str, Any] = {}, **kwargs: Any ) -> bool: if isinstance(image, (str, Path)): - return cls._is_supported_image(None, str(image), **kwargs) - else: - return reader.Reader.is_supported_image( - cls, image, fs_kwargs=fs_kwargs, **kwargs + fs, path = io.pathlike_to_fs( + image, + enforce_exists=False, + fs_kwargs=fs_kwargs, + ) + path = fs.unstrip_protocol(path) + + return cls._is_supported_image( + fs=fs, path=path, fs_kwargs=fs_kwargs, **kwargs ) + return reader.Reader.is_supported_image( + cls, image, fs_kwargs=fs_kwargs, **kwargs + ) + @property def scenes(self) -> Tuple[str, ...]: + """ + Returns + ------- + scenes: Tuple[str, ...] + A tuple of valid scene ids in the file. + """ if self._scenes is None: - scenes = self._zarr.root_attrs["multiscales"] + scenes = [scene.get("name") for scene in self._multiscales_metadata] - # if (each scene has a name) and (that name is unique) use name. - # otherwise generate scene names. - if all("name" in scene for scene in scenes) and ( - len({scene["name"] for scene in scenes}) == len(scenes) - ): - self._scenes = tuple(str(scene["name"]) for scene in scenes) + # Check that every name exists and that they're all unique + if all(scenes) and len(set(scenes)) == len(scenes): + self._scenes = tuple(scenes) else: + # Otherwise generate default IDs by index self._scenes = tuple( - f"scene_{i}" - for i in range(len(self._zarr.root_attrs["multiscales"])) + metadata_utils.generate_ome_image_id(i) + for i in range(len(self._multiscales_metadata)) ) + return self._scenes + def _get_ome_dims(self) -> Tuple[str, ...]: + multiscales = self._multiscales_metadata[self._current_scene_index] + axes = multiscales.get("axes", []) + if axes: + return tuple(ax["name"].upper() for ax in axes) + datasets = multiscales.get("datasets", []) + arr = self._zarr[datasets[0]["path"]] + return tuple(reader.Reader._guess_dim_order(arr.shape)) + @property def resolution_levels(self) -> Tuple[int, ...]: """ @@ -115,16 +170,8 @@ def resolution_levels(self) -> Tuple[int, ...]: By default these are ordered from highest resolution to lowest resolution. """ - return tuple( - rl - for rl in range( - len( - self._zarr.root_attrs["multiscales"][self.current_scene_index][ - "datasets" - ] - ) - ) - ) + multiscales = self._multiscales_metadata[self._current_scene_index] + return tuple(range(len(multiscales.get("datasets", [])))) def _read_delayed(self) -> xr.DataArray: return self._xarr_format(delayed=True) @@ -133,106 +180,49 @@ def _read_immediate(self) -> xr.DataArray: return self._xarr_format(delayed=False) def _xarr_format(self, delayed: bool) -> xr.DataArray: - data_path = self._zarr.root_attrs["multiscales"][self.current_scene_index][ - "datasets" - ][self.current_resolution_level]["path"] - image_data = self._zarr.load(data_path) + """ + Build an xarray.DataArray for the current scene and resolution level. - axes = self._zarr.root_attrs["multiscales"][self.current_scene_index].get( - "axes" - ) - if axes: - dims = [sub["name"].upper() for sub in axes] - else: - dims = list(reader.Reader._guess_dim_order(image_data.shape)) + Parameters + ---------- + delayed : bool + If True, wrap the Zarr array in a Dask array (lazy loading). If False, + load the entire dataset into memory as a NumPy array. - if not delayed: - image_data = image_data.compute() + Returns + ------- + xr.DataArray + The image data with proper dims, coords, and raw metadata attr. + + Notes + ----- + * Chooses the dataset path according to `self._current_resolution_level`. + * Attaches the original Zarr attributes under + `constants.METADATA_UNPROCESSED`. + """ + multiscales = self._multiscales_metadata[self._current_scene_index] + datasets = multiscales.get("datasets", []) + data_path = datasets[self._current_resolution_level].get("path") + arr = self._zarr[data_path] + + if delayed: + data = da.from_array(arr, chunks=arr.chunks) + else: + data = arr[:] coords = self._get_coords( - dims, - image_data.shape, + list(self._get_ome_dims()), + data.shape, scene=self.current_scene, channel_names=self.channel_names, ) - return xr.DataArray( - image_data, - dims=dims, + data, + dims=self._get_ome_dims(), coords=coords, - attrs={constants.METADATA_UNPROCESSED: self._zarr.root_attrs}, - ) - - # Optional Methods - @property - def physical_pixel_sizes(self) -> types.PhysicalPixelSizes: - """Return the physical pixel sizes of the image.""" - if self._physical_pixel_sizes is None: - try: - z_size, y_size, x_size = self._get_pixel_size( - list(self.dims.order), - ) - except Exception as e: - logger.warning(f"Could not parse zarr pixel size: {e}") - z_size, y_size, x_size = None, None, None - - self._physical_pixel_sizes = types.PhysicalPixelSizes( - z_size, y_size, x_size - ) - return self._physical_pixel_sizes - - def _get_pixel_size( - self, - dims: List[str], - ) -> Tuple[Optional[float], Optional[float], Optional[float]]: - # OmeZarr file may contain an additional set of "coordinateTransformations" - # these coefficents are applied to all resolution levels. - if ( - "coordinateTransformations" - in self._zarr.root_attrs["multiscales"][self.current_scene_index] - ): - universal_res_consts = self._zarr.root_attrs["multiscales"][ - self.current_scene_index - ]["coordinateTransformations"][0]["scale"] - else: - universal_res_consts = [1.0 for _ in range(len(dims))] - - coord_transform = self._zarr.root_attrs["multiscales"][ - self.current_scene_index - ]["datasets"][self.current_resolution_level]["coordinateTransformations"] - - spatial_coeffs = {} - - for dim in [ - dimensions.DimensionNames.SpatialX, - dimensions.DimensionNames.SpatialY, - dimensions.DimensionNames.SpatialZ, - ]: - if dim in dims: - dim_index = dims.index(dim) - spatial_coeffs[dim] = ( - coord_transform[0]["scale"][dim_index] - * universal_res_consts[dim_index] - ) - else: - spatial_coeffs[dim] = None - - return ( - spatial_coeffs[dimensions.DimensionNames.SpatialZ], - spatial_coeffs[dimensions.DimensionNames.SpatialY], - spatial_coeffs[dimensions.DimensionNames.SpatialX], + attrs={constants.METADATA_UNPROCESSED: self.metadata}, ) - @property - def channel_names(self) -> Optional[List[str]]: - if self._channel_names is None: - if "omero" in self._zarr.root_attrs: - self._channel_names = [ - str(channel["label"]) - for channel in self._zarr.root_attrs["omero"]["channels"] - ] - return self._channel_names - @staticmethod def _get_coords( dims: List[str], @@ -240,24 +230,335 @@ def _get_coords( scene: str, channel_names: Optional[List[str]], ) -> Dict[str, Any]: - coords: Dict[str, Any] = {} + """ + Construct coordinate mappings for each dimension, currently only Channel. + + Parameters + ---------- + dims : list of str + The dimension names in order (e.g. ["T","C","Z","Y","X"]). + shape : tuple of int + The lengths of each dimension in `dims`. + scene : str + Identifier for the current scene, used in default channel IDs. + channel_names : list of str or None + If provided, use these names for the Channel coordinate; otherwise + generate default OME channel IDs. - # Use dims for coord determination + Returns + ------- + coords : dict + A mapping from dimension name to coordinate values. Only includes + entries for Channel if present in `dims`. + """ + coords = {} if dimensions.DimensionNames.Channel in dims: # Generate channel names if no existing channel names if channel_names is None: coords[dimensions.DimensionNames.Channel] = [ - f"channel_{i}" + metadata_utils.generate_ome_channel_id(scene, i) for i in range(shape[dims.index(dimensions.DimensionNames.Channel)]) ] else: coords[dimensions.DimensionNames.Channel] = channel_names - return coords + def _get_scale_array(self, dims: Tuple[str, ...]) -> List[float]: + """ + Compute combined scale factors for each dimension by merging the + overall and per-dataset coordinate transformations. + + Parameters + ---------- + dims : tuple of str + The dimension names in order (e.g. ("T","C","Z","Y","X")). + + Returns + ------- + scale : list of float + The elementwise product of the global and dataset-specific scales. + """ + multiscales = self._multiscales_metadata[self._current_scene_index] + overall_scale = multiscales.get( + "coordinateTransformations", [{"scale": [1.0] * len(dims)}] + )[0]["scale"] + dataset_scale = multiscales["datasets"][self._current_resolution_level][ + "coordinateTransformations" + ][0]["scale"] + return [o * d for o, d in zip(overall_scale, dataset_scale)] + + @property + def time_interval(self) -> Optional[types.TimeInterval]: + """ + Returns + ------- + sizes: Time Interval + Using available metadata, this float represents the time interval for + dimension T. + + """ + try: + if dimensions.DimensionNames.Time in self._get_ome_dims(): + return self._get_scale_array(self._get_ome_dims())[ + self._get_ome_dims().index(dimensions.DimensionNames.Time) + ] + except Exception as e: + warnings.warn(f"Could not parse time interval: {e}") + return None + + @property + def physical_pixel_sizes(self) -> Optional[types.PhysicalPixelSizes]: + """ + Returns + ------- + sizes: PhysicalPixelSizes or None + Physical pixel sizes for Z, Y, and X if any are available; + otherwise None. Warns and returns None on parse errors. + """ + try: + dims = self._get_ome_dims() + arr = self._get_scale_array(dims) + + Z = ( + arr[dims.index(dimensions.DimensionNames.SpatialZ)] + if dimensions.DimensionNames.SpatialZ in dims + else None + ) + Y = ( + arr[dims.index(dimensions.DimensionNames.SpatialY)] + if dimensions.DimensionNames.SpatialY in dims + else None + ) + X = ( + arr[dims.index(dimensions.DimensionNames.SpatialX)] + if dimensions.DimensionNames.SpatialX in dims + else None + ) + except Exception as e: + warnings.warn(f"Could not parse pixel sizes: {e}") + return None + + # If none of the spatial axes were found, return None + if X is None and Y is None and X is None: + return None + + return types.PhysicalPixelSizes(Z=Z, Y=Y, X=X) + + @property + def channel_names(self) -> Optional[List[str]]: + """ + Returns + ------- + channel_names: List[str] + Using available metadata, the list of strings representing channel names. + If no channel dimension present in the data, returns None. + """ + if self._channel_names is None: + channels_meta = self._zarr.attrs.get("ome", {}).get("omero", {}).get( + "channels" + ) or self._zarr.attrs.get("omero", {}).get("channels") + if channels_meta: + self._channel_names = [str(ch.get("label", "")) for ch in channels_meta] + return self._channel_names + + @property + def metadata(self) -> GroupMetadata: + return self._zarr.metadata + + @property + def scale(self) -> types.Scale: + """ + Returns + ------- + scale: Scale + A Scale object constructed from the Reader's time_interval and + physical_pixel_sizes. + + Notes + ----- + * Combines temporal and spatial scaling information into a single object. + """ + # build a mapping from each dim → its scale value + dims = self._get_ome_dims() + arr = self._get_scale_array(dims) + scale_map = dict(zip(dims, arr)) + + return types.Scale( + T=self.time_interval, + C=scale_map.get(dimensions.DimensionNames.Channel), + Z=scale_map.get(dimensions.DimensionNames.SpatialZ), + Y=scale_map.get(dimensions.DimensionNames.SpatialY), + X=scale_map.get(dimensions.DimensionNames.SpatialX), + ) + + @property + def dimension_properties(self) -> types.DimensionProperties: + """ + Returns + ------- + dimension_properties: DimensionProperties + Per-dimension properties for T, C, Z, Y, X. + + This augments the base Reader's DimensionProperties with NGFF + multiscales axis metadata, if present: + + * axis["type"] → DimensionProperty.type (e.g. "space", "time", "channel") + * axis["unit"] → DimensionProperty.unit (parsed via types.ureg) + + Only dimensions with an explicit NGFF axis definition are populated. + Dimensions without an axis entry are cleared to (type=None, unit=None). + + Additionally, if a channel axis is present with no unit, this reader + assigns a default dimensionless unit for C. + """ + base_dp = super().dimension_properties + + multiscales = self._multiscales_metadata[self._current_scene_index] + axes = multiscales.get("axes") or [] + + axis_by_name: Dict[str, Dict[str, Any]] = {} + for ax in axes: + name = ax.get("name") + if name is not None: + axis_by_name[name.upper()] = ax + + def _from_axis( + dim_letter: str, base_prop: types.DimensionProperty + ) -> types.DimensionProperty: + """ + Build a DimensionProperty for a single dim based on its NGFF axis. + """ + ax = axis_by_name.get(dim_letter) + if ax is None: + # No axis defined for this dim → treat as absent + return types.DimensionProperty(type=None, unit=None) + + axis_type = ax.get("type", base_prop.type) + + unit = base_prop.unit + unit_str = ax.get("unit") + if unit_str is not None: + try: + unit = types.ureg.Unit(unit_str) + except Exception as e: + warnings.warn( + f"Could not parse unit {unit_str!r} for axis " + f"{ax.get('name')!r}: {e}. Leaving unit unset.", + UserWarning, + ) + + return types.DimensionProperty( + type=axis_type, + unit=unit, + ) + + dp = types.DimensionProperties( + T=_from_axis("T", base_dp.T), + C=_from_axis("C", base_dp.C), + Z=_from_axis("Z", base_dp.Z), + Y=_from_axis("Y", base_dp.Y), + X=_from_axis("X", base_dp.X), + ) + + # If a channel axis exists and still has no unit → default to dimensionless. + # We check axis_by_name to ensure "C" is actually defined for this store. + if "C" in axis_by_name and dp.C.type == "channel": + if dp.C.unit is None: + dp = types.DimensionProperties( + T=dp.T, + C=types.DimensionProperty( + type=dp.C.type, + unit=types.ureg.dimensionless, + ), + Z=dp.Z, + Y=dp.Y, + X=dp.X, + ) + elif dp.C.unit != types.ureg.dimensionless: + # Warn if C is ever assigned a non-dimensionless unit + warnings.warn( + f"Channel axis has a non-dimensionless unit {dp.C.unit!r}. " + "Channel dimensions should be unitless; this is likely a " + "metadata error in the NGFF store.", + UserWarning, + ) + + return dp + + @property + def ome_metadata(self) -> OME: + """ + Build multi-scene OME object with one Image per scene. + Raises ValueError on unsupported dtype, because Pixels is required. + """ + if hasattr(self, "_ome_metadata") and self._ome_metadata is not None: + return self._ome_metadata + + # Dynamic PixelType lookup + dtype_key = str(self.dtype).upper() + try: + pixel_type_enum = PixelType[dtype_key] + except KeyError: + raise ValueError( + f"Unsupported dtype '{self.dtype}' for Pixels.type. " + "Cannot build OME metadata without a supported pixel type." + ) + + original_scene = self.current_scene_index + images = [] + + for idx, scene_id in enumerate(self.scenes): + self.set_scene(idx) + + ch_meta = self._channel_metadata + + size_x = getattr(self.dims, "X", 1) + size_y = getattr(self.dims, "Y", 1) + size_z = getattr(self.dims, "Z", 1) + size_c = getattr(self.dims, "C", 1) + size_t = getattr(self.dims, "T", 1) + + channels = [] + for i in range(size_c): + meta = ch_meta[i] if i < len(ch_meta) else {} + contrast = [] + if not meta.get("active", True): + contrast.append("Off") + if meta.get("inverted"): + contrast.append("inverted") + color = meta.get("color") + channel_args = dict() + if contrast: + channel_args["contrast_method"] = contrast + if color is not None: + channel_args["color"] = color + + channels.append( + Channel( + id=f"Channel:{i}", + name=meta.get("label", f"Channel {i}"), + **channel_args, + ) + ) + + pixels = Pixels( + id=f"Pixels:{idx}", + size_x=size_x, + size_y=size_y, + size_z=size_z, + size_c=size_c, + size_t=size_t, + type=pixel_type_enum, + dimension_order="XYZCT", + channels=channels, + physical_size_x=self.scale.X, + physical_size_y=self.scale.Y, + physical_size_z=self.scale.Z, + time_increment=None, + ) -def get_zarr_reader(fs: AbstractFileSystem, path: str) -> ZarrReader: - if fs is not None: - path = fs.unstrip_protocol(path) + images.append(Image(id=f"Image:{idx}", name=scene_id, pixels=pixels)) - return ZarrReader(parse_url(path, mode="r")) + self.set_scene(original_scene) + self._ome_metadata = OME(images=images) + return self._ome_metadata diff --git a/scallops/cli/register.py b/scallops/cli/register.py index ed081d9..bf9e89f 100644 --- a/scallops/cli/register.py +++ b/scallops/cli/register.py @@ -464,7 +464,7 @@ def get_matching_names( results = [] for path in paths: name = os.path.basename(path) - if not name.startswith(".") and is_ome_zarr_array(zarr.open(path, "r")): + if not name.startswith(".") and is_ome_zarr_array(zarr.open(path, mode="r")): results.append(path) return results diff --git a/scallops/io.py b/scallops/io.py index 7f82e8b..b4e7fa0 100644 --- a/scallops/io.py +++ b/scallops/io.py @@ -1603,6 +1603,7 @@ def _get_image_key_func(group_by): lambda: [] ) # key is tuple -> value is tuple of group, dict maxdepth = None + for image_path in image_paths: if isinstance(image_path, Path): # IF URI DO NOT PROVIDE AS PATH @@ -1615,6 +1616,7 @@ def _get_image_key_func(group_by): pass else: root = image_path + if root is not None: if "0" not in root: # format: "path.zarr/images/" if "images" in root: @@ -1668,6 +1670,7 @@ def _get_image_key_func(group_by): if image_path in [".", "./"] and _get_fs_protocol(fs) == "file": image_path = fs.info(image_path)["name"].rstrip(".") image_prefix = None + if fs.isdir(image_path): image_path = image_path.rstrip(fs.sep) if maxdepth is None: @@ -1685,6 +1688,7 @@ def _get_image_key_func(group_by): withdirs=True, ) ) + paths = [p for p in all_paths if p.lower().endswith(extension)] if len(paths) == 0: # try with no maxdepth @@ -1722,7 +1726,7 @@ def _get_image_key_func(group_by): group_to_matches[group].append((x, d)) if len(group_to_matches) == 0: - message = [f"No files found matching pattern: {file_regex.pattern}"] + message = [f"No files found matching pattern: {files_pattern}"] if subset_ is not None: message.append(f", subset: {', '.join([str(s) for s in subset_])}") if len(group_by) > 0: diff --git a/scallops/registration/itk.py b/scallops/registration/itk.py index 205c15b..f6257b2 100644 --- a/scallops/registration/itk.py +++ b/scallops/registration/itk.py @@ -33,7 +33,7 @@ from scallops.registration.landmarks import _get_translation, find_landmarks from scallops.utils import _dask_from_array_no_copy from scallops.xr import _get_dims -from scallops.zarr_io import open_ome_zarr, write_zarr +from scallops.zarr_io import _zarr_v3, open_ome_zarr, write_zarr logger = logging.getLogger("scallops") @@ -331,12 +331,23 @@ def _init_callback(init_params: dict[str, Any]) -> dict[str, Any]: group = images_group.create_group( image_name.replace("/", "-"), overwrite=True ) - zarr_dataset = group.create_dataset( - "0", - shape=shape, - chunks=(1,) * (len(shape) - 2) + chunk_size, - dtype=dtype, - overwrite=True, + + zarr_dataset = ( + group.create_array( + "0", + shape=shape, + chunks=(1,) * (len(shape) - 2) + chunk_size, + dtype=dtype, + overwrite=True, + ) + if _zarr_v3() + else group.create_dataset( + "0", + shape=shape, + chunks=(1,) * (len(shape) - 2) + chunk_size, + dtype=dtype, + overwrite=True, + ) ) return { @@ -1164,12 +1175,23 @@ def _itk_transform_image_zarr( image_name.replace("/", "-"), overwrite=True ) chunks = (1,) * len(transform_dims) + (chunksize or (1024, 1024)) - data = group.create_dataset( - "0", - shape=dim_sizes + output_size, - chunks=chunks, - dtype=image.dtype, - overwrite=True, + + data = ( + group.create_array( + "0", + shape=dim_sizes + output_size, + chunks=chunks, + dtype=image.dtype, + overwrite=True, + ) + if _zarr_v3() + else group.create_dataset( + "0", + shape=dim_sizes + output_size, + chunks=chunks, + dtype=image.dtype, + overwrite=True, + ) ) _itk_transform_image( diff --git a/scallops/stitch/_stitch.py b/scallops/stitch/_stitch.py index acd7401..c53e3a4 100644 --- a/scallops/stitch/_stitch.py +++ b/scallops/stitch/_stitch.py @@ -14,7 +14,6 @@ import pyarrow.parquet as pq import zarr from sklearn.cluster import AgglomerativeClustering -from zarr.errors import PathNotFoundError from scallops.cli.util import _get_cli_logger, cli_metadata from scallops.io import is_parquet_file, read_image @@ -32,7 +31,7 @@ tile_source_labels, ) from scallops.utils import _dask_from_array_no_copy -from scallops.zarr_io import is_ome_zarr_array +from scallops.zarr_io import _zarr_v3, is_ome_zarr_array logger = _get_cli_logger() @@ -82,14 +81,14 @@ def _single_stitch( if is_ome_zarr_array(image_output_root.get(f"images/{image_key}")): logger.info(f"Skipping stitching for {image_key}.") return - except PathNotFoundError: + except: # noqa: E722 pass elif not no_save_labels: try: if is_ome_zarr_array(image_output_root.get(f"labels/{image_key}-mask")): logger.info(f"Skipping stitching for {image_key}.") return - except PathNotFoundError: + except: # noqa: E722 pass elif is_parquet_file(f"{other_output_path}{image_key}-positions.parquet"): logger.info(f"Skipping stitching for {image_key}.") @@ -405,13 +404,24 @@ def _write_arrays( labels_group = image_output_root.require_group("labels") group = labels_group.create_group(image_key + "-mask", overwrite=True) - array = group.create_dataset( - name="0", - shape=(fused_y_size, fused_x_size), - chunks=chunk_size, - dtype=np.uint8, - dimension_separator="/", - overwrite=True, + array = ( + group.create_array( + name="0", + shape=(fused_y_size, fused_x_size), + chunks=chunk_size, + dtype=np.uint8, + chunk_key_encoding="/", + overwrite=True, + ) + if _zarr_v3() + else group.create_dataset( + name="0", + shape=(fused_y_size, fused_x_size), + chunks=chunk_size, + dtype=np.uint8, + dimension_separator="/", + overwrite=True, + ) ) da.to_zarr( diff --git a/scallops/stitch/fuse.py b/scallops/stitch/fuse.py index a7343ae..d5c47d1 100644 --- a/scallops/stitch/fuse.py +++ b/scallops/stitch/fuse.py @@ -22,6 +22,7 @@ from scallops.stitch._radial import radial_correct from scallops.stitch.utils import dtype_convert from scallops.utils import _cpu_count, _dask_from_array_no_copy +from scallops.zarr_io import _zarr_v3 logger = logging.getLogger("scallops") @@ -223,17 +224,32 @@ def _fuse( locks = np.array(locks) partition_tree = shapely.STRtree(partition_boxes) - result = group.create_dataset( - shape=( - len(output_channels), # c - fused_y_size, - fused_x_size, - ), - dtype=target_dtype, - chunks=(1,) + chunk_size, - name="0", - dimension_separator="/", - overwrite=True, + result = ( + group.create_dataset( + shape=( + len(output_channels), # c + fused_y_size, + fused_x_size, + ), + dtype=target_dtype, + chunks=(1,) + chunk_size, + name="0", + chunk_key_encoding="/", + overwrite=True, + ) + if _zarr_v3() + else group.create_dataset( + shape=( + len(output_channels), # c + fused_y_size, + fused_x_size, + ), + dtype=target_dtype, + chunks=(1,) + chunk_size, + name="0", + dimension_separator="/", + overwrite=True, + ) ) _fuse_image_delayed = delayed(_fuse_image) diff --git a/scallops/tests/test_features.py b/scallops/tests/test_features.py index 4f4c294..08e8bdf 100644 --- a/scallops/tests/test_features.py +++ b/scallops/tests/test_features.py @@ -60,12 +60,10 @@ def test_to_label_crops(tmp_path, array_A1_102_cells, array_A1_102_alnpheno): assert len(result_df) == 1 and result_df.index.values[0] == 2603 group = zarr.group() - intensity_image_zarr = group.create_dataset( - name="image", shape=intensity_image.shape - ) + intensity_image_zarr = group.create_array(name="image", shape=intensity_image.shape) intensity_image_zarr[:] = intensity_image.compute() - label_image_zarr = group.create_dataset(name="label", shape=label_image.shape) + label_image_zarr = group.create_array(name="label", shape=label_image.shape) label_image_zarr[:] = label_image.compute() to_label_crops( diff --git a/scallops/tests/test_illumination_correction.py b/scallops/tests/test_illumination_correction.py index d997bc4..3c6c12f 100644 --- a/scallops/tests/test_illumination_correction.py +++ b/scallops/tests/test_illumination_correction.py @@ -28,8 +28,8 @@ def test_illumination_correction_cli(tmp_path): ] subprocess.check_call(args) - store = zarr.ZipStore("scallops/tests/data/ops-illum-corr.zip", mode="r") - root = zarr.group(store=store) + store = zarr.storage.ZipStore("scallops/tests/data/ops-illum-corr.zip", mode="r") + root = zarr.open(store=store, mode="r") np.testing.assert_equal( root["data"][...], read_image(os.path.join(tmp_path, "images", "A1")).values.squeeze(), diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index 8511a2b..7b3a38d 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -10,6 +10,7 @@ import logging from collections.abc import Callable, Hashable +from functools import lru_cache from pathlib import Path from typing import Any, Literal @@ -24,11 +25,12 @@ from dask.delayed import Delayed from dask.graph_manipulation import bind from ome_zarr.axes import KNOWN_AXES -from ome_zarr.format import CurrentFormat +from ome_zarr.format import CurrentFormat, FormatV04 from ome_zarr.io import parse_url from ome_zarr.scale import Scaler from ome_zarr.types import JSONDict from ome_zarr.writer import write_image +from packaging.version import Version from xarray.core.coordinates import DataArrayCoordinates from zarr.storage import StoreLike @@ -330,6 +332,16 @@ def _write_zarr_image( ) +@lru_cache +def _zarr_v3() -> bool: + try: + import zarr + except ImportError: + return False + else: + return Version(zarr.__version__).major >= 3 + + def write_zarr( grp: zarr.Group, data: np.ndarray | da.Array | xr.DataArray, @@ -409,10 +421,9 @@ def write_zarr( ) dask_delayed = [] if zarr_format == "zarr": # No axis validation - fmt = CurrentFormat() - kwargs = dict() - zarr_version = 2 if not hasattr(fmt, "zarr_format") else fmt.zarr_format + zarr_version = 3 if _zarr_v3() else 2 + fmt = CurrentFormat() if zarr_version else FormatV04() if zarr_version == 2: kwargs["dimension_separator"] = "/" else: @@ -461,6 +472,7 @@ def write_zarr( omero = zarr_attrs["ome"].get("omero", {}) omero.update(image_attrs.pop("omero")) zarr_attrs["ome"]["omero"] = omero + multiscales[0]["metadata"] = image_attrs if len(dask_delayed) > 0: @@ -481,7 +493,7 @@ def _write_metadata_delayed(grp, d): scaler=scaler, axes=axes, compute=compute, - metadata=image_attrs, + metadata=image_attrs if image_attrs is not None else {}, coordinate_transformations=( [coordinate_transformations] if coordinate_transformations is not None From 7a14f80860ec988ebc2bf8d34b872df8b1b924be Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 29 Jan 2026 16:43:46 -0500 Subject: [PATCH 03/32] Support zarr 3 --- pyproject.toml | 11 +---------- requirements.txt | 1 + 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bbc1702..dfd9fb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,22 +30,13 @@ classifiers = [# https://pypi.python.org/pypi?%3Aaction=list_classifiers "Topic :: Scientific/Engineering :: Bio-Informatics", ] - - - - - - - - - dependencies = [ "anndata>=0.12.4", # https://github.com/scverse/anndata/issues/2166 "adjustText", "bioio-nd2", "bioio-tifffile", - "bioio-base", "bioio-ome-tiff", + "bioio-ome-zarr", "bioio", "centrosome", "cp-measure", diff --git a/requirements.txt b/requirements.txt index ec942ad..1b408ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ adjustText==1.3.0 bioio==3.2.0 bioio-nd2==1.6.0 bioio-ome-tiff==1.4.0 +bioio-ome-zarr==3.2.1 bioio-tifffile==1.3.0 centrosome==1.3.3 cp-measure==0.1.13 From 8cfdafb09ed680febf5fcd01104c46bfae81d013 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Fri, 30 Jan 2026 09:10:43 -0500 Subject: [PATCH 04/32] Support zarr 3 --- scallops/stitch/_stitch.py | 32 +++++--- scallops/stitch/fuse.py | 43 +++++----- scallops/stitch/utils.py | 163 +++++++++++++++++++++---------------- scallops/zarr_io.py | 15 +--- 4 files changed, 141 insertions(+), 112 deletions(-) diff --git a/scallops/stitch/_stitch.py b/scallops/stitch/_stitch.py index c53e3a4..527dd65 100644 --- a/scallops/stitch/_stitch.py +++ b/scallops/stitch/_stitch.py @@ -340,10 +340,10 @@ def _single_stitch( tile_shape_no_crop[0] - fuse_crop_width * 2, tile_shape_no_crop[1] - fuse_crop_width * 2, ) - fused_y_size = ( + fused_y_size = int( np.round(stitch_positions_df["y"].max()).astype(int) + fused_tile_shape[0] ) - fused_x_size = ( + fused_x_size = int( np.round(stitch_positions_df["x"].max()).astype(int) + fused_tile_shape[1] ) @@ -410,7 +410,7 @@ def _write_arrays( shape=(fused_y_size, fused_x_size), chunks=chunk_size, dtype=np.uint8, - chunk_key_encoding="/", + chunk_key_encoding={"name": "default", "separator": "/"}, overwrite=True, ) if _zarr_v3() @@ -442,13 +442,24 @@ def _write_arrays( ) if blend == "none": group = labels_group.create_group(image_key + "-tile", overwrite=True) - array = group.create_dataset( - name="0", - shape=(fused_y_size, fused_x_size), - chunks=chunk_size, - dtype=np.uint16, - dimension_separator="/", - overwrite=True, + array = ( + group.create_array( + name="0", + shape=(fused_y_size, fused_x_size), + chunks=chunk_size, + dtype=np.uint16, + chunk_key_encoding={"name": "default", "separator": "/"}, + overwrite=True, + ) + if _zarr_v3() + else group.create_dataset( + name="0", + shape=(fused_y_size, fused_x_size), + chunks=chunk_size, + dtype=np.uint16, + dimension_separator="/", + overwrite=True, + ) ) da.to_zarr( @@ -458,7 +469,6 @@ def _write_arrays( ), url=array, compute=True, - dimension_separator="/", ) label_metadata = _create_label_ome_metadata( image_spacing, image_key + "-tile" diff --git a/scallops/stitch/fuse.py b/scallops/stitch/fuse.py index d5c47d1..9863071 100644 --- a/scallops/stitch/fuse.py +++ b/scallops/stitch/fuse.py @@ -28,7 +28,7 @@ def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: str): - return { + d = { "multiscales": [ { "axes": [ @@ -39,10 +39,10 @@ def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: s { "coordinateTransformations": [ { - "scale": [ + "scale": ( float(image_spacing[0]), float(image_spacing[1]), - ], + ), "type": "scale", } ], @@ -50,10 +50,13 @@ def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: s } ], "name": f"/labels/{label_name}", - "version": "0.4", + "version": "0.5" if _zarr_v3() else "0.4", } ] } + if _zarr_v3(): + return {"ome": d} + return d def _create_ome_metadata( @@ -67,7 +70,7 @@ def _create_ome_metadata( metadata["stitch_coords"] = dict() for c in stitch_coords: # convert to dict metadata["stitch_coords"][c] = stitch_coords[c].to_list() - return { + d = { "multiscales": [ { "metadata": metadata, @@ -80,11 +83,11 @@ def _create_ome_metadata( { "coordinateTransformations": [ { - "scale": [ + "scale": ( 1.0, float(image_spacing[0]), float(image_spacing[1]), - ], + ), "type": "scale", } ], @@ -92,10 +95,13 @@ def _create_ome_metadata( } ], "name": f"/images/{image_key}", - "version": "0.4", + "version": "0.5" if _zarr_v3() else "0.4", } ] } + if _zarr_v3(): + return {"ome": d} + return d def _fuse( @@ -175,8 +181,8 @@ def _fuse( df["x"] = df["x"].round().values.astype(int) df["y"] = df["y"].round().values.astype(int) - fused_y_size = (df["y"] + ysize).max() - fused_x_size = (df["x"] + xsize).max() + fused_y_size = int((df["y"] + ysize).max()) + fused_x_size = int((df["x"] + xsize).max()) if channels_per_batch is None: if blend == "none": @@ -223,27 +229,20 @@ def _fuse( locks.append(threading.Lock()) locks = np.array(locks) partition_tree = shapely.STRtree(partition_boxes) + output_shape = (len(output_channels), fused_y_size, fused_x_size) result = ( - group.create_dataset( - shape=( - len(output_channels), # c - fused_y_size, - fused_x_size, - ), + group.create_array( + shape=output_shape, dtype=target_dtype, chunks=(1,) + chunk_size, name="0", - chunk_key_encoding="/", + chunk_key_encoding={"name": "default", "separator": "/"}, overwrite=True, ) if _zarr_v3() else group.create_dataset( - shape=( - len(output_channels), # c - fused_y_size, - fused_x_size, - ), + shape=output_shape, dtype=target_dtype, chunks=(1,) + chunk_size, name="0", diff --git a/scallops/stitch/utils.py b/scallops/stitch/utils.py index c7bb063..d2448b4 100644 --- a/scallops/stitch/utils.py +++ b/scallops/stitch/utils.py @@ -4,7 +4,7 @@ import logging import os import tempfile -from collections.abc import Sequence +from collections.abc import Mapping, Sequence from typing import Literal import bioio @@ -20,6 +20,7 @@ from ome_types import from_xml from pint import UndefinedUnitError, UnitRegistry from skimage.util import img_as_float, img_as_ubyte, img_as_uint +from zarr.core.group import GroupMetadata from scallops.cli.util import _group_src_attrs from scallops.features.image_quality import power_spectrum @@ -311,10 +312,10 @@ def _get_ome(image: bioio.BioImage): return metadata except NotImplementedError: pass - - if isinstance(image.metadata, str): + image_metadata = _get_image_metadata(image) + if isinstance(image_metadata, str): try: - return from_xml(image.metadata) + return from_xml(image_metadata) except: # noqa: E722 pass return None @@ -323,7 +324,10 @@ def _get_ome(image: bioio.BioImage): def get_tile_position(image: bioio.BioImage, image_index: int = 0): ome_metadata = _get_ome(image) - if ome_metadata is not None: + if ( + ome_metadata is not None + and len(ome_metadata.images[image_index].pixels.planes) > 0 + ): values = [ ome_metadata.images[image_index].pixels.planes[0].position_y, ome_metadata.images[image_index].pixels.planes[0].position_x, @@ -334,36 +338,42 @@ def get_tile_position(image: bioio.BioImage, image_index: int = 0): physical_size_x_unit = ( ome_metadata.images[image_index].pixels.planes[0].position_x_unit.value ) - elif "multiscales" in image.metadata: - metadata = image.metadata["multiscales"][0]["metadata"] - values = [metadata["position_y"], metadata["position_x"]] - physical_size_y_unit = metadata["position_y_unit"] - physical_size_x_unit = metadata["position_x_unit"] else: - attrs = image.xarray_dask_data.attrs - if "unprocessed" in attrs: - if 51123 in attrs["unprocessed"]: - attrs = attrs["unprocessed"][51123] - return np.array([attrs["YPositionUm"], attrs["XPositionUm"]]) - elif 50839 in attrs["unprocessed"]: - attrs = attrs["unprocessed"][50839] - if "Info" in attrs: - attrs = json.loads(attrs["Info"]) + image_metadata = _get_image_metadata(image) + if "ome" in image_metadata or "multiscales" in image_metadata: + metadata = ( + image_metadata["ome"]["multiscales"][0]["metadata"] + if "ome" in image_metadata + else image.metadata["multiscales"][0]["metadata"] + ) + values = [metadata["position_y"], metadata["position_x"]] + physical_size_y_unit = metadata["position_y_unit"] + physical_size_x_unit = metadata["position_x_unit"] + else: + attrs = image.xarray_dask_data.attrs + if "unprocessed" in attrs: + if 51123 in attrs["unprocessed"]: + attrs = attrs["unprocessed"][51123] return np.array([attrs["YPositionUm"], attrs["XPositionUm"]]) - elif 270 in attrs["unprocessed"]: # IXM - attrs = attrs["unprocessed"][270] - import xml.etree.ElementTree as ET - - try: - tree = ET.fromstring(attrs) - stage_y = tree.findall(".//prop[@id='stage-position-y']") - stage_x = tree.findall(".//prop[@id='stage-position-x']") - if len(stage_y) == 1 and len(stage_x) == 1: - stage_y = stage_y[0].attrib["value"] - stage_x = stage_x[0].attrib["value"] - return np.array([stage_y, stage_x]) - except: # noqa: E722 - pass + elif 50839 in attrs["unprocessed"]: + attrs = attrs["unprocessed"][50839] + if "Info" in attrs: + attrs = json.loads(attrs["Info"]) + return np.array([attrs["YPositionUm"], attrs["XPositionUm"]]) + elif 270 in attrs["unprocessed"]: # IXM + attrs = attrs["unprocessed"][270] + import xml.etree.ElementTree as ET + + try: + tree = ET.fromstring(attrs) + stage_y = tree.findall(".//prop[@id='stage-position-y']") + stage_x = tree.findall(".//prop[@id='stage-position-x']") + if len(stage_y) == 1 and len(stage_x) == 1: + stage_y = stage_y[0].attrib["value"] + stage_x = stage_x[0].attrib["value"] + return np.array([stage_y, stage_x]) + except: # noqa: E722 + pass if physical_size_y_unit is not None and physical_size_x_unit is not None: try: values[0] = ( @@ -403,6 +413,15 @@ def get_pixel_size( return _pixel_size_from_image(_create_image(filepaths[0])) +def _get_image_metadata(image: bioio.BioImage) -> dict: + metadata = image.metadata # can be zarr GroupMetadata or dict + if isinstance(metadata, GroupMetadata): + metadata = metadata.attributes + if not isinstance(metadata, Mapping): + return dict() + return metadata + + def _pixel_size_from_image(image: bioio.BioImage) -> np.array: ome_metadata = _get_ome(image) values = None @@ -415,43 +434,51 @@ def _pixel_size_from_image(image: bioio.BioImage) -> np.array: ] physical_size_y_unit = ome_metadata.images[0].pixels.physical_size_y_unit.value physical_size_x_unit = ome_metadata.images[0].pixels.physical_size_x_unit.value - elif "multiscales" in image.metadata: - metadata = image.metadata["multiscales"][0]["metadata"] - values = [metadata["physical_size_y"], metadata["physical_size_x"]] - physical_size_y_unit = metadata["physical_size_y_unit"] - physical_size_x_unit = metadata["physical_size_x_unit"] else: - attrs = image.xarray_dask_data.attrs - if "unprocessed" in attrs: - attrs = attrs["unprocessed"] - if 51123 in attrs: - attrs = attrs[51123] - if "PixelSizeUm" in attrs: - pixel_size = attrs["PixelSizeUm"] - values = np.array([pixel_size, pixel_size]) - elif 270 in attrs: - import xml.etree.ElementTree as ET - - try: - tree = ET.fromstring(attrs[270]) - y = tree.findall(".//prop[@id='spatial-calibration-y']") - x = tree.findall(".//prop[@id='spatial-calibration-x']") - if len(y) == 1 and len(x) == 1: - y = y[0].attrib["value"] - x = x[0].attrib["value"] - values = np.array([y, x]).astype(float) - units = tree.findall(".//prop[@id='spatial-calibration-units']") - if len(units) == 1: - units = units[0].attrib["value"] - physical_size_y_unit = units - physical_size_x_unit = units - - except: # noqa: E722 - pass - if values is None and hasattr(image, "physical_pixel_sizes"): - values = np.array( - [image.physical_pixel_sizes.Y, image.physical_pixel_sizes.X] + image_metadata = _get_image_metadata(image) + if "ome" in image_metadata or "multiscales" in image_metadata: + metadata = ( + image_metadata["ome"]["multiscales"][0]["metadata"] + if "ome" in image_metadata + else image_metadata["multiscales"][0]["metadata"] ) + values = [metadata["physical_size_y"], metadata["physical_size_x"]] + physical_size_y_unit = metadata["physical_size_y_unit"] + physical_size_x_unit = metadata["physical_size_x_unit"] + else: + attrs = image.xarray_dask_data.attrs + if "unprocessed" in attrs: + attrs = attrs["unprocessed"] + if 51123 in attrs: + attrs = attrs[51123] + if "PixelSizeUm" in attrs: + pixel_size = attrs["PixelSizeUm"] + values = np.array([pixel_size, pixel_size]) + elif 270 in attrs: + import xml.etree.ElementTree as ET + + try: + tree = ET.fromstring(attrs[270]) + y = tree.findall(".//prop[@id='spatial-calibration-y']") + x = tree.findall(".//prop[@id='spatial-calibration-x']") + if len(y) == 1 and len(x) == 1: + y = y[0].attrib["value"] + x = x[0].attrib["value"] + values = np.array([y, x]).astype(float) + units = tree.findall( + ".//prop[@id='spatial-calibration-units']" + ) + if len(units) == 1: + units = units[0].attrib["value"] + physical_size_y_unit = units + physical_size_x_unit = units + + except: # noqa: E722 + pass + if values is None and hasattr(image, "physical_pixel_sizes"): + values = np.array( + [image.physical_pixel_sizes.Y, image.physical_pixel_sizes.X] + ) if physical_size_y_unit is not None and physical_size_x_unit is not None: try: values[0] = ( diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index 7b3a38d..3ca2ad3 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -133,7 +133,7 @@ def _create_omero_metadata( # Napari requires that colors are specified if channel names are specified channels = ( [ - dict(label=channel_names[i], color=colors[i % len(colors)]) + dict(label=str(channel_names[i]), color=colors[i % len(colors)]) for i in range(len(channel_names)) ] if not np.isscalar(channel_names) @@ -334,12 +334,7 @@ def _write_zarr_image( @lru_cache def _zarr_v3() -> bool: - try: - import zarr - except ImportError: - return False - else: - return Version(zarr.__version__).major >= 3 + return Version(zarr.__version__).major >= 3 def write_zarr( @@ -419,6 +414,7 @@ def write_zarr( image_attrs, axes, coordinate_transformations = _attrs_axes_coordinates( image_attrs, coords, dims ) + dask_delayed = [] if zarr_format == "zarr": # No axis validation kwargs = dict() @@ -474,6 +470,7 @@ def write_zarr( zarr_attrs["ome"]["omero"] = omero multiscales[0]["metadata"] = image_attrs + if len(dask_delayed) > 0: @dask.delayed @@ -677,11 +674,7 @@ def _read_ome_zarr_array( raise ValueError(f"{_node} not found") # For zarr v3, everything is under the "ome" namespace if "ome" in node.attrs or "multiscales" in node.attrs: - dims = None - coords = {} - attrs = {} coords, attrs, dims = _read_zarr_attrs(node.attrs) - array = node["0"] return array, dims, coords, attrs else: # see if user passed test.zarr and zarr file only has one image From da8161c5e09df361ec71cef0ad21b896aaead966 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Fri, 30 Jan 2026 10:49:38 -0500 Subject: [PATCH 05/32] Support zarr 3 --- scallops/stitch/_stitch.py | 9 ++++++--- scallops/tests/test_features.py | 8 ++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/scallops/stitch/_stitch.py b/scallops/stitch/_stitch.py index 527dd65..5d66f48 100644 --- a/scallops/stitch/_stitch.py +++ b/scallops/stitch/_stitch.py @@ -473,9 +473,12 @@ def _write_arrays( label_metadata = _create_label_ome_metadata( image_spacing, image_key + "-tile" ) - label_metadata["multiscales"][0]["metadata"] = { - "source": f"../../images/{image_key}" - } + label_multiscales = ( + label_metadata["ome"]["multiscales"] + if "ome" in label_metadata + else label_metadata["multiscales"] + ) + label_multiscales[0]["metadata"] = {"source": f"../../images/{image_key}"} group.attrs.update(label_metadata) cleanup_paths = [] if not no_save_image: diff --git a/scallops/tests/test_features.py b/scallops/tests/test_features.py index 08e8bdf..b509be3 100644 --- a/scallops/tests/test_features.py +++ b/scallops/tests/test_features.py @@ -60,10 +60,14 @@ def test_to_label_crops(tmp_path, array_A1_102_cells, array_A1_102_alnpheno): assert len(result_df) == 1 and result_df.index.values[0] == 2603 group = zarr.group() - intensity_image_zarr = group.create_array(name="image", shape=intensity_image.shape) + intensity_image_zarr = group.create_array( + name="image", shape=intensity_image.shape, dtype=intensity_image.dtype + ) intensity_image_zarr[:] = intensity_image.compute() - label_image_zarr = group.create_array(name="label", shape=label_image.shape) + label_image_zarr = group.create_array( + name="label", shape=label_image.shape, dtype=label_image.dtype + ) label_image_zarr[:] = label_image.compute() to_label_crops( From bb1aa32f49b095dbab6825b4725f588199144da9 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Fri, 30 Jan 2026 12:42:28 -0500 Subject: [PATCH 06/32] Support zarr 3 --- requirements.txt | 2 +- scallops/zarr_io.py | 38 ++++++++------------------------------ 2 files changed, 9 insertions(+), 31 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1b408ac..b02c781 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ anndata==0.12.9 adjustText==1.3.0 bioio==3.2.0 -bioio-nd2==1.6.0 +bioio-nd2==1.6.2 bioio-ome-tiff==1.4.0 bioio-ome-zarr==3.2.1 bioio-tifffile==1.3.0 diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index 3ca2ad3..d94d4a4 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -180,7 +180,7 @@ def _fix_attrs(d: dict) -> None: elif isinstance(value, ome_types.OME): # Hack to prevent OverflowError: # Overlong 4 byte UTF-8 sequence detected when encoding string - d[key] = d[key].model_dump() + d[key] = d[key].model_dump(mode="json") elif isinstance(value, zarr.Group): d[key] = str(value) elif isinstance(value, list): @@ -188,7 +188,7 @@ def _fix_attrs(d: dict) -> None: if isinstance(value[i], dict): _fix_attrs(value[i]) elif isinstance(value[i], ome_types.OME): - value[i] = value[i].dict() + value[i] = value[i].model_dump(mode="json") elif isinstance(value[i], zarr.Group): value[i] = str(value) @@ -209,33 +209,8 @@ def _attrs_axes_coordinates( - Updated image attributes dictionary. - List of axes dictionaries. - List of coordinate transformations dictionaries or None. - - :example: - - .. code-block:: python - - import xarray as xr - import numpy as np - from scallops.zarr_io import _attrs_axes_coordinates - - data = np.random.rand(5, 10, 512, 512) - dims = ("c", "z", "y", "x") - coords = {"c": ["DAPI", "FITC", "TRITC", "Cy5", "Cy7"]} - array = xr.DataArray(data, dims=dims, coords=coords) - image_attrs = { - "physical_pixel_sizes": [0.1, 0.1, 0.5], - "physical_pixel_units": ["um", "um", "um"], - } - - # Prepare attributes, axes, and coordinate transformations - updated_attrs, axes, coord_transformations = _attrs_axes_coordinates( - image_attrs, array.coords, array.dims - ) - print(updated_attrs) - print(axes) - print(coord_transformations) """ - image_attrs = _fix_json(image_attrs) + omero = _create_omero_metadata(coords, dims) if omero is not None: image_attrs["omero"] = omero @@ -268,7 +243,9 @@ def _attrs_axes_coordinates( axis["unit"] = physical_pixel_units[space_index] space_index = space_index + 1 axes.append(axis) - + image_attrs = image_attrs.copy() + _fix_attrs(image_attrs) + image_attrs = _fix_json(image_attrs) return image_attrs, axes, coordinate_transformations @@ -408,9 +385,10 @@ def write_zarr( if image_attrs is not None: # Metadata can't be numpy arrays or python classes so do a round trip # conversion to convert to JSON serializable - _fix_attrs(image_attrs) + if metadata is not None: image_attrs.update(metadata) + image_attrs, axes, coordinate_transformations = _attrs_axes_coordinates( image_attrs, coords, dims ) From 0f6791082a8ccd7fb1a2826f10b2fa08d6ddc911 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Mon, 2 Feb 2026 08:54:22 -0500 Subject: [PATCH 07/32] updated dask --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b02c781..9802c59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ centrosome==1.3.3 cp-measure==0.1.13 cython==3.2.4 dask-image==2025.11.0 -dask==2026.1.1 +dask==2026.1.2 decorator==5.2.1 filelock==3.20.3 flox==0.10.8 From 734e6f4e43db08258c5164d8c39bfb624879299c Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Mon, 2 Feb 2026 16:45:20 -0500 Subject: [PATCH 08/32] revert dask --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9802c59..b02c781 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ centrosome==1.3.3 cp-measure==0.1.13 cython==3.2.4 dask-image==2025.11.0 -dask==2026.1.2 +dask==2026.1.1 decorator==5.2.1 filelock==3.20.3 flox==0.10.8 From 217a63d2c5c12dc3b2490d826c561fea7870990e Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Mon, 2 Feb 2026 17:12:47 -0500 Subject: [PATCH 09/32] revert dask --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dfd9fb3..6b0dd86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "bioio", "centrosome", "cp-measure", - "dask", + "dask!=2026.1.2",# https://github.com/dask/dask/issues/12265 "dask-image", "decorator", "filelock", From c374a6fede19e8e8ac58151836a916b1484f4569 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 09:41:15 -0500 Subject: [PATCH 10/32] zarr config test --- scallops/cli/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scallops/cli/features.py b/scallops/cli/features.py index c962793..e4d12aa 100644 --- a/scallops/cli/features.py +++ b/scallops/cli/features.py @@ -449,7 +449,7 @@ def run_pipeline_compute_features(arguments: argparse.Namespace) -> None: dask_cluster_parameters = _dask_workers_threads( threads_per_worker=4 if "sizeshape" in unique_features else 1 ) - + zarr.config.set({"async.concurrency": 1, "threading.max_workers": 1}) objects_dir_sep = None if objects_dir is not None: objects_dir_sep = fsspec.core.url_to_fs(objects_dir)[0].sep From 061e7daf3cf6816da9c27c17d026052e65af16c8 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 10:44:18 -0500 Subject: [PATCH 11/32] zarr config test --- scallops/cli/features.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scallops/cli/features.py b/scallops/cli/features.py index e4d12aa..7a2c2dd 100644 --- a/scallops/cli/features.py +++ b/scallops/cli/features.py @@ -194,12 +194,12 @@ def single_feature( output_fs, _ = fsspec.core.url_to_fs(output_dir) join_df = False features_output_suffix = "" if join_df else "-features" - zarr_inputs = True + zarr_inputs = False # using zarr as input fails with zarr3 with credentials errors - for f in file_list: - if not isinstance(f, (zarr.Group, zarr.Array)): - zarr_inputs = False - break + # for f in file_list: + # if not isinstance(f, (zarr.Group, zarr.Array)): + # zarr_inputs = False + # break if zarr_inputs and stacked_image_tuple is not None: for f in stacked_file_list: @@ -449,7 +449,7 @@ def run_pipeline_compute_features(arguments: argparse.Namespace) -> None: dask_cluster_parameters = _dask_workers_threads( threads_per_worker=4 if "sizeshape" in unique_features else 1 ) - zarr.config.set({"async.concurrency": 1, "threading.max_workers": 1}) + objects_dir_sep = None if objects_dir is not None: objects_dir_sep = fsspec.core.url_to_fs(objects_dir)[0].sep From 01ca3f63f9fd4a234ed04f5d2bc2667a68c3b293 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 12:34:17 -0500 Subject: [PATCH 12/32] add trailing c dimension --- scallops/cli/features.py | 4 +++- scallops/features/generate.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scallops/cli/features.py b/scallops/cli/features.py index 7a2c2dd..aeae8b3 100644 --- a/scallops/cli/features.py +++ b/scallops/cli/features.py @@ -93,7 +93,9 @@ def _read_image(file_list: list[str], metadata: dict) -> xr.DataArray: # Try using swap_dims instead or use set_index after rename to create an indexed coordinate. warnings.filterwarnings("ignore", "rename .*", UserWarning) image = image.rename({"t_c_z": "c"}) - + else: + # add trailing c dimension + image = image.expand_dims("c", -1) return image diff --git a/scallops/features/generate.py b/scallops/features/generate.py index 1dd2e4e..1c389b1 100644 --- a/scallops/features/generate.py +++ b/scallops/features/generate.py @@ -158,7 +158,7 @@ def label_features( if isinstance(intensity_image, da.Array): # y,x,c assert intensity_image.shape[:-1] == label_shape, ( - f"{intensity_image.shape} != {label_shape}" + f"{intensity_image.shape[:-1]} != {label_shape}" ) label_image = label_image.rechunk(intensity_image.chunksize[:-1]) From 51892b27b38a405743d1b2eb105bdd48feac0650 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 14:55:28 -0500 Subject: [PATCH 13/32] removed unused parameter --- scallops/cli/pooled_if_sbs.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index b1f9c30..1744b98 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -179,7 +179,6 @@ def _peaks_to_bases( def spot_detection_pipeline( image_tuple: tuple[tuple[str, ...], list[str], dict], iss_channels: list[int], - file_separator: str, root: zarr.Group | str, max_filter_width: int, sigma_log: float | list[float], @@ -205,7 +204,6 @@ def spot_detection_pipeline( :param image_tuple: A tuple containing information about the images. :param iss_channels: List of channel indices used for ISS sequencing. - :param file_separator: Separator used in file paths. :param root: Root path or zarr group where the results will be stored. :param max_filter_width: Maximum filter width used in spot detection. :param z_index: Either 'max' or z-index @@ -299,7 +297,6 @@ def spot_detection_pipeline( root=root, image=loged, output_format=output_image_format, - file_separator=file_separator, zarr_format="zarr", compute=compute, ) @@ -314,7 +311,6 @@ def spot_detection_pipeline( root=root, image=std_arr, output_format=output_image_format, - file_separator=file_separator, metadata=dict(parent=image_key), compute=compute, ) @@ -329,7 +325,6 @@ def spot_detection_pipeline( root=root, image=maxed, output_format=output_image_format, - file_separator=file_separator, zarr_format="zarr", compute=compute, ) @@ -821,7 +816,6 @@ def spot_detect_main(arguments: argparse.Namespace): delayed_results += spot_detection_pipeline( img, iss_channels=channels, - file_separator=None, root=root, z_index=z_index, output_image_format="zarr", From 222fbc8bed6aaab50759028ee83c2a75b066d56b Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 14:58:37 -0500 Subject: [PATCH 14/32] added default value for file separator --- scallops/cli/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scallops/cli/util.py b/scallops/cli/util.py index 0d2da08..6782e78 100644 --- a/scallops/cli/util.py +++ b/scallops/cli/util.py @@ -199,7 +199,7 @@ def _write_image( root: zarr.Group | str, image: np.ndarray | xr.DataArray | da.Array, output_format: str, - file_separator: str, + file_separator: str = "/", metadata: dict | None = None, compute: bool = True, **kwargs, From fdd3546467d4fe2e713b9ee5db670f4bbd1813fd Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 15:09:19 -0500 Subject: [PATCH 15/32] updated keyword args for saving to zarr --- scallops/zarr_io.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index d94d4a4..f9c909c 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -395,28 +395,28 @@ def write_zarr( dask_delayed = [] if zarr_format == "zarr": # No axis validation - kwargs = dict() zarr_version = 3 if _zarr_v3() else 2 fmt = CurrentFormat() if zarr_version else FormatV04() + zarr_array_kwargs = dict() if zarr_version == 2: - kwargs["dimension_separator"] = "/" + zarr_array_kwargs["chunk_key_encoding"] = {"name": "v2", "separator": "/"} else: - kwargs["chunk_key_encoding"] = fmt.chunk_key_encoding + zarr_array_kwargs["chunk_key_encoding"] = fmt.chunk_key_encoding if isinstance(data, da.Array): d = da.to_zarr( arr=data, url=grp.store, component=str(Path(grp.path, "0")), compute=compute, - **kwargs, + zarr_array_kwargs=zarr_array_kwargs, ) if not compute: dask_delayed.append(d) elif not isinstance(data, zarr.Array): if zarr_version == 2: - grp.create_dataset("0", data=data, overwrite=True, **kwargs) + grp.create_dataset("0", data=data, overwrite=True, **zarr_array_kwargs) else: - grp.create_array("0", data=data, overwrite=True, **kwargs) + grp.create_array("0", data=data, overwrite=True, **zarr_array_kwargs) # v3 # ome/omero for channel metadata # ome/multiscales[0]/metadata for other metadata From 01d9046ba891252cc1d8ee35ca504abd21e7da09 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 15:09:37 -0500 Subject: [PATCH 16/32] remove unused delayed list --- scallops/cli/pooled_if_sbs.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index 1744b98..3d56551 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -811,9 +811,8 @@ def spot_detect_main(arguments: argparse.Namespace): _create_default_dask_config(), _create_dask_client(dask_scheduler_url, **dask_cluster_parameters), ): - delayed_results = [] for img in exp_gen: - delayed_results += spot_detection_pipeline( + spot_detection_pipeline( img, iss_channels=channels, root=root, @@ -834,8 +833,6 @@ def spot_detect_main(arguments: argparse.Namespace): spot_detection_n_cycles=spot_detection_n_cycles, expected_cycles=expected_cycles, ) - if len(delayed_results) > 0: - dask.compute(*delayed_results) def reads_pipeline( From 225be8945c2318c86d37e2d78ff11d1e95cdeb08 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 15:28:47 -0500 Subject: [PATCH 17/32] consolidate calls to getting fs sep --- scallops/cli/pooled_if_sbs.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index 3d56551..f02f5b6 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -332,16 +332,16 @@ def spot_detection_pipeline( else: del maxed if "peaks" in save_keys: - points_path = ( - f"{_get_store_path(root).rstrip(_get_sep(root))}{_get_sep(root)}points" - ) - protocol = _get_fs_protocol(_get_fs(root)) + root_sep = _get_sep(root) + points_path = f"{_get_store_path(root).rstrip(root_sep)}{root_sep}points" + root_fs = _get_fs(root) + protocol = _get_fs_protocol() if protocol != "file": points_path = f"{protocol}://{points_path}" - _get_fs(root).makedirs(points_path, exist_ok=True) - peaks_path = f"{points_path}{_get_sep(root)}{image_key}-peaks.parquet" - if _get_fs(root).exists(peaks_path): - _get_fs(root).rm(peaks_path, recursive=True) + root_fs.makedirs(points_path, exist_ok=True) + peaks_path = f"{points_path}{root_sep}{image_key}-peaks.parquet" + if root_fs.exists(peaks_path): + root_fs.rm(peaks_path, recursive=True) dask_delayed.append( _to_parquet( From 22206d00daf970162857786c803ca88ba809f186 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 15:48:11 -0500 Subject: [PATCH 18/32] added missing param --- scallops/cli/pooled_if_sbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index f02f5b6..2f06950 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -335,7 +335,7 @@ def spot_detection_pipeline( root_sep = _get_sep(root) points_path = f"{_get_store_path(root).rstrip(root_sep)}{root_sep}points" root_fs = _get_fs(root) - protocol = _get_fs_protocol() + protocol = _get_fs_protocol(root_fs) if protocol != "file": points_path = f"{protocol}://{points_path}" root_fs.makedirs(points_path, exist_ok=True) From 1c247d4dffcb8fe2e001c48b84ea55454efa5764 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 3 Feb 2026 18:01:28 -0500 Subject: [PATCH 19/32] test with zarr 3 - open zarr --- scallops/cli/pooled_if_sbs.py | 51 ++++++++++++++++------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index 2f06950..65b48c4 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -25,7 +25,7 @@ import pyarrow.parquet as pq import xarray as xr import zarr -from dask.delayed import Delayed, delayed +from dask.delayed import delayed from matplotlib import pyplot as plt from skimage.segmentation import expand_labels @@ -179,7 +179,7 @@ def _peaks_to_bases( def spot_detection_pipeline( image_tuple: tuple[tuple[str, ...], list[str], dict], iss_channels: list[int], - root: zarr.Group | str, + output: str, max_filter_width: int, sigma_log: float | list[float], z_index: int | str, @@ -196,7 +196,7 @@ def spot_detection_pipeline( spot_detection_method: Literal["log", "spotiflow", "u-fish", "piscis"] = "log", spot_detection_n_cycles: int | None = None, expected_cycles: int | None = None, -) -> list[Delayed]: +): """Run the spot detection pipeline. This function processes a set of images, performs spot detection, and saves the @@ -204,7 +204,7 @@ def spot_detection_pipeline( :param image_tuple: A tuple containing information about the images. :param iss_channels: List of channel indices used for ISS sequencing. - :param root: Root path or zarr group where the results will be stored. + :param output: Root path to where the results will be stored. :param max_filter_width: Maximum filter width used in spot detection. :param z_index: Either 'max' or z-index :param sigma_log: Sigma parameter for log transformation in spot detection. @@ -224,17 +224,19 @@ def spot_detection_pipeline( """ _, file_list, metadata = image_tuple image_key = metadata["id"] + output_fs = fsspec.url_to_fs(output)[0] + output_sep = output_fs.sep + output = output.rstrip(output_sep) + points_path = f"{output}{output_sep}points" + + points_protocol = _get_fs_protocol(output_fs) + if points_protocol != "file": + points_path = f"{points_protocol}://{points_path}" + peaks_path = f"{points_path}{output_sep}{image_key}-peaks.parquet" if not force: - points_path = ( - f"{_get_store_path(root).rstrip(_get_sep(root))}{_get_sep(root)}points" - ) - points_protocol = _get_fs_protocol(_get_fs(root)) - if points_protocol != "file": - points_path = f"{points_protocol}://{points_path}" - peaks_path = f"{points_path}{_get_sep(root)}{image_key}-peaks.parquet" if is_parquet_file(peaks_path): logger.info(f"Skipping spot detection for {image_key}") - return [] + return image = _images2fov(file_list, metadata, dask=True) image = _z_projection(image, z_index) if expected_cycles is not None: @@ -294,7 +296,7 @@ def spot_detection_pipeline( dask_delayed.append( _write_image( name=f"{image_key}-log", - root=root, + root=open_ome_zarr(output, mode="a"), image=loged, output_format=output_image_format, zarr_format="zarr", @@ -308,7 +310,7 @@ def spot_detection_pipeline( dask_delayed.append( _write_image( name=f"{image_key}-std", - root=root, + root=open_ome_zarr(output, mode="a"), image=std_arr, output_format=output_image_format, metadata=dict(parent=image_key), @@ -322,7 +324,7 @@ def spot_detection_pipeline( dask_delayed.append( _write_image( name=f"{image_key}-max", - root=root, + root=open_ome_zarr(output, mode="a"), image=maxed, output_format=output_image_format, zarr_format="zarr", @@ -332,16 +334,10 @@ def spot_detection_pipeline( else: del maxed if "peaks" in save_keys: - root_sep = _get_sep(root) - points_path = f"{_get_store_path(root).rstrip(root_sep)}{root_sep}points" - root_fs = _get_fs(root) - protocol = _get_fs_protocol(root_fs) - if protocol != "file": - points_path = f"{protocol}://{points_path}" - root_fs.makedirs(points_path, exist_ok=True) - peaks_path = f"{points_path}{root_sep}{image_key}-peaks.parquet" - if root_fs.exists(peaks_path): - root_fs.rm(peaks_path, recursive=True) + output_fs.makedirs(points_path, exist_ok=True) + + if output_fs.exists(peaks_path): + output_fs.rm(peaks_path, recursive=True) dask_delayed.append( _to_parquet( @@ -353,7 +349,6 @@ def spot_detection_pipeline( ) if not compute and len(dask_delayed) > 0: dask.compute(*dask_delayed) - return [] def _fix_cycles(sbs_cycles): @@ -805,7 +800,7 @@ def spot_detect_main(arguments: argparse.Namespace): chunks = (chunks, chunks) output = _add_suffix(output, ".zarr") - root = open_ome_zarr(output, mode="a") + exp_gen = _set_up_experiment(images, image_pattern, group_by, subset=subset) with ( _create_default_dask_config(), @@ -815,7 +810,7 @@ def spot_detect_main(arguments: argparse.Namespace): spot_detection_pipeline( img, iss_channels=channels, - root=root, + output=output, z_index=z_index, output_image_format="zarr", max_filter_width=max_filter_width, From a113c8e2a6400f181fae15ede7a77c3d64c18334 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 10 Feb 2026 13:01:49 -0500 Subject: [PATCH 20/32] write to zarr 2 format using zarr 3 library --- requirements.txt | 4 +-- scallops/registration/itk.py | 14 ++++++++++- scallops/zarr_io.py | 47 ++++++++++++++++++++++-------------- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/requirements.txt b/requirements.txt index ce3f99d..049f2e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ dask-image==2025.11.0 dask==2026.1.1 decorator==5.2.1 filelock==3.20.3 -flox==0.11.0 +flox==0.11.1 fsspec==2026.2.0 igraph==1.0.0 itk-elastix==0.23.0 @@ -24,7 +24,7 @@ matplotlib==3.10.8 natsort==8.4.0 numcodecs==0.16.5 numpy==1.26.4 -ome-zarr==0.12.2 +ome-zarr==0.13.0 pandas==2.3.3 pint==0.25.2 psutil==7.2.2 diff --git a/scallops/registration/itk.py b/scallops/registration/itk.py index f6257b2..263b782 100644 --- a/scallops/registration/itk.py +++ b/scallops/registration/itk.py @@ -33,7 +33,13 @@ from scallops.registration.landmarks import _get_translation, find_landmarks from scallops.utils import _dask_from_array_no_copy from scallops.xr import _get_dims -from scallops.zarr_io import _zarr_v3, open_ome_zarr, write_zarr +from scallops.zarr_io import ( + _zarr_v3, + default_zarr_format, + get_zarr_array_kwargs, + open_ome_zarr, + write_zarr, +) logger = logging.getLogger("scallops") @@ -328,6 +334,7 @@ def _init_callback(init_params: dict[str, Any]) -> dict[str, Any]: group = None if image_root is not None: images_group = image_root.require_group("images", overwrite=False) + fmt = default_zarr_format() group = images_group.create_group( image_name.replace("/", "-"), overwrite=True ) @@ -339,6 +346,7 @@ def _init_callback(init_params: dict[str, Any]) -> dict[str, Any]: chunks=(1,) * (len(shape) - 2) + chunk_size, dtype=dtype, overwrite=True, + zarr_array_kwargs=get_zarr_array_kwargs(fmt), ) if _zarr_v3() else group.create_dataset( @@ -347,6 +355,7 @@ def _init_callback(init_params: dict[str, Any]) -> dict[str, Any]: chunks=(1,) * (len(shape) - 2) + chunk_size, dtype=dtype, overwrite=True, + zarr_array_kwargs=get_zarr_array_kwargs(fmt), ) ) @@ -1175,6 +1184,7 @@ def _itk_transform_image_zarr( image_name.replace("/", "-"), overwrite=True ) chunks = (1,) * len(transform_dims) + (chunksize or (1024, 1024)) + fmt = default_zarr_format() data = ( group.create_array( @@ -1183,6 +1193,7 @@ def _itk_transform_image_zarr( chunks=chunks, dtype=image.dtype, overwrite=True, + zarr_array_kwargs=get_zarr_array_kwargs(fmt), ) if _zarr_v3() else group.create_dataset( @@ -1191,6 +1202,7 @@ def _itk_transform_image_zarr( chunks=chunks, dtype=image.dtype, overwrite=True, + zarr_array_kwargs=get_zarr_array_kwargs(fmt), ) ) diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index e7e7600..cc6e59b 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -25,7 +25,7 @@ from dask.delayed import Delayed from dask.graph_manipulation import bind from ome_zarr.axes import KNOWN_AXES -from ome_zarr.format import CurrentFormat, FormatV04 +from ome_zarr.format import FormatV04 from ome_zarr.io import parse_url from ome_zarr.scale import Scaler from ome_zarr.types import JSONDict @@ -40,6 +40,23 @@ logger = logging.getLogger("scallops") +def default_zarr_format(): + return FormatV04() + + +@lru_cache +def _zarr_v3() -> bool: + return Version(zarr.__version__).major >= 3 + + +def get_zarr_array_kwargs(fmt): + return ( + {"dimension_separator": "/"} + if fmt.version == 2 + else {"chunk_key_encoding": fmt.chunk_key_encoding} + ) + + def is_anndata_zarr(store: StoreLike) -> bool: """Determines whether store is an AnnData Zarr . @@ -320,11 +337,6 @@ def _write_zarr_image( ) -@lru_cache -def _zarr_v3() -> bool: - return Version(zarr.__version__).major >= 3 - - def write_zarr( grp: zarr.Group, data: np.ndarray | da.Array | xr.DataArray, @@ -405,14 +417,9 @@ def write_zarr( ) dask_delayed = [] + fmt = default_zarr_format() if zarr_format == "zarr": # No axis validation - zarr_version = 3 if _zarr_v3() else 2 - fmt = CurrentFormat() if zarr_version else FormatV04() - zarr_array_kwargs = dict() - if zarr_version == 2: - zarr_array_kwargs["chunk_key_encoding"] = {"name": "v2", "separator": "/"} - else: - zarr_array_kwargs["chunk_key_encoding"] = fmt.chunk_key_encoding + zarr_array_kwargs = get_zarr_array_kwargs(fmt) if isinstance(data, da.Array): d = da.to_zarr( arr=data, @@ -424,7 +431,7 @@ def write_zarr( if not compute: dask_delayed.append(d) elif not isinstance(data, zarr.Array): - if zarr_version == 2: + if not _zarr_v3(): grp.create_dataset("0", data=data, overwrite=True, **zarr_array_kwargs) else: grp.create_array("0", data=data, overwrite=True, **zarr_array_kwargs) @@ -441,7 +448,7 @@ def write_zarr( multiscales = [dict(version=fmt.version, datasets=datasets, name=grp.name)] zarr_attrs = ( {"multiscales": multiscales} - if zarr_version == 2 + if fmt.zarr_format == 2 else {"ome": {"multiscales": multiscales}} ) @@ -449,7 +456,7 @@ def write_zarr( multiscales[0]["axes"] = axes if image_attrs is not None: if "omero" in image_attrs: - if zarr_version == 2: + if fmt.zarr_format == 2: omero = zarr_attrs.get("omero", {}) omero.update(image_attrs.pop("omero")) zarr_attrs["omero"] = omero @@ -478,6 +485,7 @@ def _write_metadata_delayed(grp, d): group=grp, scaler=scaler, axes=axes, + fmt=fmt, compute=compute, metadata=image_attrs if image_attrs is not None else {}, coordinate_transformations=( @@ -578,11 +586,13 @@ def _write_zarr_labels( isinstance(labels, xr.DataArray) and isinstance(labels.data, da.Array) ): labels = rechunk(labels) + fmt = default_zarr_format() return write_image( labels, grp, scaler=scaler, axes=label_axes, + fmt=fmt, metadata=metadata, compute=compute, coordinate_transformations=None, @@ -718,10 +728,11 @@ def open_ome_zarr(url: Path | str, mode: str = "a") -> zarr.Group | None: """ try: - loc = parse_url(url, mode=mode) + fmt = default_zarr_format() + loc = parse_url(url, mode=mode, fmt=fmt) if loc is None: return None - return zarr.open(loc.store, mode=mode) + return zarr.open(loc.store, mode=mode, zarr_format=fmt.zarr_format) except Exception as e: logger.error(f"Failed to open OME-Zarr store: {url}") raise e From 1640298dfac3ecfeeadfa9af22e45e03d3d51944 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 10 Feb 2026 13:52:05 -0500 Subject: [PATCH 21/32] write to zarr 2 format using zarr 3 library --- scallops/stitch/_stitch.py | 68 +++++++++----------------------------- scallops/stitch/fuse.py | 26 +++++++++------ scallops/stitch/utils.py | 4 ++- 3 files changed, 33 insertions(+), 65 deletions(-) diff --git a/scallops/stitch/_stitch.py b/scallops/stitch/_stitch.py index 5d66f48..c294997 100644 --- a/scallops/stitch/_stitch.py +++ b/scallops/stitch/_stitch.py @@ -6,7 +6,6 @@ from collections.abc import Sequence from typing import Literal -import dask.array as da import fsspec import numpy as np import pandas as pd @@ -31,7 +30,7 @@ tile_source_labels, ) from scallops.utils import _dask_from_array_no_copy -from scallops.zarr_io import _zarr_v3, is_ome_zarr_array +from scallops.zarr_io import is_ome_zarr_array, write_zarr logger = _get_cli_logger() @@ -357,8 +356,6 @@ def _single_stitch( blend, image_output_root, image_key, - fused_y_size, - fused_x_size, fused_tile_shape, chunk_size, image_spacing, @@ -383,8 +380,6 @@ def _write_arrays( blend, image_output_root, image_key, - fused_y_size, - fused_x_size, fused_tile_shape, chunk_size, image_spacing, @@ -404,28 +399,8 @@ def _write_arrays( labels_group = image_output_root.require_group("labels") group = labels_group.create_group(image_key + "-mask", overwrite=True) - array = ( - group.create_array( - name="0", - shape=(fused_y_size, fused_x_size), - chunks=chunk_size, - dtype=np.uint8, - chunk_key_encoding={"name": "default", "separator": "/"}, - overwrite=True, - ) - if _zarr_v3() - else group.create_dataset( - name="0", - shape=(fused_y_size, fused_x_size), - chunks=chunk_size, - dtype=np.uint8, - dimension_separator="/", - overwrite=True, - ) - ) - - da.to_zarr( - arr=_dask_from_array_no_copy( + write_zarr( + data=_dask_from_array_no_copy( tile_overlap_mask( stitch_positions_df, fill=blend != "none", @@ -433,41 +408,28 @@ def _write_arrays( ), chunks=chunk_size, ), - url=array, + grp=group, + image_attrs=None, + coords=None, + dims=None, + scaler=None, compute=True, - dimension_separator="/", ) group.attrs.update( _create_label_ome_metadata(image_spacing, image_key + "-mask") ) if blend == "none": group = labels_group.create_group(image_key + "-tile", overwrite=True) - array = ( - group.create_array( - name="0", - shape=(fused_y_size, fused_x_size), - chunks=chunk_size, - dtype=np.uint16, - chunk_key_encoding={"name": "default", "separator": "/"}, - overwrite=True, - ) - if _zarr_v3() - else group.create_dataset( - name="0", - shape=(fused_y_size, fused_x_size), - chunks=chunk_size, - dtype=np.uint16, - dimension_separator="/", - overwrite=True, - ) - ) - - da.to_zarr( - arr=_dask_from_array_no_copy( + write_zarr( + data=_dask_from_array_no_copy( tile_source_labels(stitch_positions_df, fused_tile_shape), chunks=chunk_size, ), - url=array, + grp=group, + image_attrs=None, + coords=None, + dims=None, + scaler=None, compute=True, ) label_metadata = _create_label_ome_metadata( diff --git a/scallops/stitch/fuse.py b/scallops/stitch/fuse.py index 9863071..935a6ba 100644 --- a/scallops/stitch/fuse.py +++ b/scallops/stitch/fuse.py @@ -22,12 +22,13 @@ from scallops.stitch._radial import radial_correct from scallops.stitch.utils import dtype_convert from scallops.utils import _cpu_count, _dask_from_array_no_copy -from scallops.zarr_io import _zarr_v3 +from scallops.zarr_io import _zarr_v3, default_zarr_format, get_zarr_array_kwargs logger = logging.getLogger("scallops") def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: str): + fmt = default_zarr_format() d = { "multiscales": [ { @@ -50,13 +51,14 @@ def _create_label_ome_metadata(image_spacing: tuple[float, float], label_name: s } ], "name": f"/labels/{label_name}", - "version": "0.5" if _zarr_v3() else "0.4", + "version": fmt.version, } ] } - if _zarr_v3(): - return {"ome": d} - return d + if fmt.version in ("0.1", "0.2", "0.3", "0.4"): + return d + + return {"ome": d} def _create_ome_metadata( @@ -68,6 +70,7 @@ def _create_ome_metadata( metadata = {} metadata.update(**kwargs) metadata["stitch_coords"] = dict() + fmt = default_zarr_format() for c in stitch_coords: # convert to dict metadata["stitch_coords"][c] = stitch_coords[c].to_list() d = { @@ -95,13 +98,13 @@ def _create_ome_metadata( } ], "name": f"/images/{image_key}", - "version": "0.5" if _zarr_v3() else "0.4", + "version": fmt.version, } ] } - if _zarr_v3(): - return {"ome": d} - return d + if fmt.version in ("0.1", "0.2", "0.3", "0.4"): + return d + return {"ome": d} def _fuse( @@ -230,6 +233,7 @@ def _fuse( locks = np.array(locks) partition_tree = shapely.STRtree(partition_boxes) output_shape = (len(output_channels), fused_y_size, fused_x_size) + fmt = default_zarr_format() result = ( group.create_array( @@ -237,8 +241,8 @@ def _fuse( dtype=target_dtype, chunks=(1,) + chunk_size, name="0", - chunk_key_encoding={"name": "default", "separator": "/"}, overwrite=True, + zarr_array_kwargs=get_zarr_array_kwargs(fmt), ) if _zarr_v3() else group.create_dataset( @@ -246,8 +250,8 @@ def _fuse( dtype=target_dtype, chunks=(1,) + chunk_size, name="0", - dimension_separator="/", overwrite=True, + zarr_array_kwargs=get_zarr_array_kwargs(fmt), ) ) diff --git a/scallops/stitch/utils.py b/scallops/stitch/utils.py index d2448b4..12e5200 100644 --- a/scallops/stitch/utils.py +++ b/scallops/stitch/utils.py @@ -340,12 +340,14 @@ def get_tile_position(image: bioio.BioImage, image_index: int = 0): ) else: image_metadata = _get_image_metadata(image) + if "ome" in image_metadata or "multiscales" in image_metadata: metadata = ( image_metadata["ome"]["multiscales"][0]["metadata"] if "ome" in image_metadata - else image.metadata["multiscales"][0]["metadata"] + else image_metadata["multiscales"][0]["metadata"] ) + values = [metadata["position_y"], metadata["position_x"]] physical_size_y_unit = metadata["position_y_unit"] physical_size_x_unit = metadata["position_x_unit"] From a6819533ab62032d52f53fcb9a5e4d63b650ef14 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 10 Feb 2026 13:54:46 -0500 Subject: [PATCH 22/32] write to zarr 2 format using zarr 3 library --- scallops/registration/itk.py | 8 ++++---- scallops/stitch/fuse.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scallops/registration/itk.py b/scallops/registration/itk.py index 263b782..7ba58c5 100644 --- a/scallops/registration/itk.py +++ b/scallops/registration/itk.py @@ -346,7 +346,7 @@ def _init_callback(init_params: dict[str, Any]) -> dict[str, Any]: chunks=(1,) * (len(shape) - 2) + chunk_size, dtype=dtype, overwrite=True, - zarr_array_kwargs=get_zarr_array_kwargs(fmt), + **get_zarr_array_kwargs(fmt), ) if _zarr_v3() else group.create_dataset( @@ -355,7 +355,7 @@ def _init_callback(init_params: dict[str, Any]) -> dict[str, Any]: chunks=(1,) * (len(shape) - 2) + chunk_size, dtype=dtype, overwrite=True, - zarr_array_kwargs=get_zarr_array_kwargs(fmt), + **get_zarr_array_kwargs(fmt), ) ) @@ -1193,7 +1193,7 @@ def _itk_transform_image_zarr( chunks=chunks, dtype=image.dtype, overwrite=True, - zarr_array_kwargs=get_zarr_array_kwargs(fmt), + **get_zarr_array_kwargs(fmt), ) if _zarr_v3() else group.create_dataset( @@ -1202,7 +1202,7 @@ def _itk_transform_image_zarr( chunks=chunks, dtype=image.dtype, overwrite=True, - zarr_array_kwargs=get_zarr_array_kwargs(fmt), + **get_zarr_array_kwargs(fmt), ) ) diff --git a/scallops/stitch/fuse.py b/scallops/stitch/fuse.py index 935a6ba..cdd8d02 100644 --- a/scallops/stitch/fuse.py +++ b/scallops/stitch/fuse.py @@ -242,7 +242,7 @@ def _fuse( chunks=(1,) + chunk_size, name="0", overwrite=True, - zarr_array_kwargs=get_zarr_array_kwargs(fmt), + **get_zarr_array_kwargs(fmt), ) if _zarr_v3() else group.create_dataset( @@ -251,7 +251,7 @@ def _fuse( chunks=(1,) + chunk_size, name="0", overwrite=True, - zarr_array_kwargs=get_zarr_array_kwargs(fmt), + **get_zarr_array_kwargs(fmt), ) ) From 03821d0b414e106ac0dae0dc07ee96b8cf9e2552 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 10 Feb 2026 14:57:04 -0500 Subject: [PATCH 23/32] write to zarr 2 format using zarr 3 library --- scallops/registration/itk.py | 51 ++++++++++-------------------------- scallops/stitch/fuse.py | 27 ++++++------------- scallops/zarr_io.py | 12 +-------- 3 files changed, 23 insertions(+), 67 deletions(-) diff --git a/scallops/registration/itk.py b/scallops/registration/itk.py index 7ba58c5..5dbbef1 100644 --- a/scallops/registration/itk.py +++ b/scallops/registration/itk.py @@ -34,7 +34,6 @@ from scallops.utils import _dask_from_array_no_copy from scallops.xr import _get_dims from scallops.zarr_io import ( - _zarr_v3, default_zarr_format, get_zarr_array_kwargs, open_ome_zarr, @@ -339,24 +338,13 @@ def _init_callback(init_params: dict[str, Any]) -> dict[str, Any]: image_name.replace("/", "-"), overwrite=True ) - zarr_dataset = ( - group.create_array( - "0", - shape=shape, - chunks=(1,) * (len(shape) - 2) + chunk_size, - dtype=dtype, - overwrite=True, - **get_zarr_array_kwargs(fmt), - ) - if _zarr_v3() - else group.create_dataset( - "0", - shape=shape, - chunks=(1,) * (len(shape) - 2) + chunk_size, - dtype=dtype, - overwrite=True, - **get_zarr_array_kwargs(fmt), - ) + zarr_dataset = group.create_array( + "0", + shape=shape, + chunks=(1,) * (len(shape) - 2) + chunk_size, + dtype=dtype, + overwrite=True, + **get_zarr_array_kwargs(fmt), ) return { @@ -1186,24 +1174,13 @@ def _itk_transform_image_zarr( chunks = (1,) * len(transform_dims) + (chunksize or (1024, 1024)) fmt = default_zarr_format() - data = ( - group.create_array( - "0", - shape=dim_sizes + output_size, - chunks=chunks, - dtype=image.dtype, - overwrite=True, - **get_zarr_array_kwargs(fmt), - ) - if _zarr_v3() - else group.create_dataset( - "0", - shape=dim_sizes + output_size, - chunks=chunks, - dtype=image.dtype, - overwrite=True, - **get_zarr_array_kwargs(fmt), - ) + data = group.create_array( + "0", + shape=dim_sizes + output_size, + chunks=chunks, + dtype=image.dtype, + overwrite=True, + **get_zarr_array_kwargs(fmt), ) _itk_transform_image( diff --git a/scallops/stitch/fuse.py b/scallops/stitch/fuse.py index cdd8d02..3b1d5de 100644 --- a/scallops/stitch/fuse.py +++ b/scallops/stitch/fuse.py @@ -22,7 +22,7 @@ from scallops.stitch._radial import radial_correct from scallops.stitch.utils import dtype_convert from scallops.utils import _cpu_count, _dask_from_array_no_copy -from scallops.zarr_io import _zarr_v3, default_zarr_format, get_zarr_array_kwargs +from scallops.zarr_io import default_zarr_format, get_zarr_array_kwargs logger = logging.getLogger("scallops") @@ -235,24 +235,13 @@ def _fuse( output_shape = (len(output_channels), fused_y_size, fused_x_size) fmt = default_zarr_format() - result = ( - group.create_array( - shape=output_shape, - dtype=target_dtype, - chunks=(1,) + chunk_size, - name="0", - overwrite=True, - **get_zarr_array_kwargs(fmt), - ) - if _zarr_v3() - else group.create_dataset( - shape=output_shape, - dtype=target_dtype, - chunks=(1,) + chunk_size, - name="0", - overwrite=True, - **get_zarr_array_kwargs(fmt), - ) + result = group.create_array( + shape=output_shape, + dtype=target_dtype, + chunks=(1,) + chunk_size, + name="0", + overwrite=True, + **get_zarr_array_kwargs(fmt), ) _fuse_image_delayed = delayed(_fuse_image) diff --git a/scallops/zarr_io.py b/scallops/zarr_io.py index cc6e59b..780eb6c 100644 --- a/scallops/zarr_io.py +++ b/scallops/zarr_io.py @@ -10,7 +10,6 @@ import logging from collections.abc import Callable, Hashable -from functools import lru_cache from pathlib import Path from typing import Any, Literal @@ -30,7 +29,6 @@ from ome_zarr.scale import Scaler from ome_zarr.types import JSONDict from ome_zarr.writer import write_image -from packaging.version import Version from xarray.core.coordinates import DataArrayCoordinates from zarr.storage import StoreLike @@ -44,11 +42,6 @@ def default_zarr_format(): return FormatV04() -@lru_cache -def _zarr_v3() -> bool: - return Version(zarr.__version__).major >= 3 - - def get_zarr_array_kwargs(fmt): return ( {"dimension_separator": "/"} @@ -431,10 +424,7 @@ def write_zarr( if not compute: dask_delayed.append(d) elif not isinstance(data, zarr.Array): - if not _zarr_v3(): - grp.create_dataset("0", data=data, overwrite=True, **zarr_array_kwargs) - else: - grp.create_array("0", data=data, overwrite=True, **zarr_array_kwargs) + grp.create_array("0", data=data, overwrite=True, **zarr_array_kwargs) # v3 # ome/omero for channel metadata # ome/multiscales[0]/metadata for other metadata From 020b1eef6c79b957c59694730b7a78a488d7c153 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Tue, 10 Feb 2026 20:05:03 -0500 Subject: [PATCH 24/32] write to zarr 2 format using zarr 3 library --- scallops/stitch/fuse.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scallops/stitch/fuse.py b/scallops/stitch/fuse.py index 3b1d5de..ec16174 100644 --- a/scallops/stitch/fuse.py +++ b/scallops/stitch/fuse.py @@ -381,7 +381,6 @@ def _fuse( url=result, region=(slice(channel_batch, channel_batch + channels_per_batch),), compute=True, - dimension_separator="/", ) From 58372b4da33571d27b09deeda53020b259ac00e6 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Wed, 11 Feb 2026 08:04:09 -0500 Subject: [PATCH 25/32] use bio ome zarr reader --- pyproject.toml | 2 +- requirements.txt | 4 +- scallops/_bioio_zarr_reader.py | 564 --------------------------------- scallops/io.py | 4 +- 4 files changed, 5 insertions(+), 569 deletions(-) delete mode 100644 scallops/_bioio_zarr_reader.py diff --git a/pyproject.toml b/pyproject.toml index 6b0dd86..9faa417 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "bioio-nd2", "bioio-tifffile", "bioio-ome-tiff", - "bioio-ome-zarr", + "bioio-ome-zarr>=3.2.2", "bioio", "centrosome", "cp-measure", diff --git a/requirements.txt b/requirements.txt index 049f2e2..79bdf55 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ adjustText==1.3.0 bioio==3.2.0 bioio-nd2==1.6.2 bioio-ome-tiff==1.4.0 -bioio-ome-zarr==3.2.1 +bioio-ome-zarr==3.2.2 bioio-tifffile==1.3.0 centrosome==1.3.3 cp-measure==0.1.13 @@ -15,7 +15,7 @@ filelock==3.20.3 flox==0.11.1 fsspec==2026.2.0 igraph==1.0.0 -itk-elastix==0.23.0 +itk-elastix==0.24.0 itk==5.4.5 joblib==1.5.3 kneed==0.8.5 diff --git a/scallops/_bioio_zarr_reader.py b/scallops/_bioio_zarr_reader.py deleted file mode 100644 index 584115d..0000000 --- a/scallops/_bioio_zarr_reader.py +++ /dev/null @@ -1,564 +0,0 @@ -import logging -import warnings -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -import bioio_ome_zarr.utils as metadata_utils -import dask.array as da -import xarray as xr -import zarr -from bioio_base import constants, dimensions, exceptions, io, reader, types -from fsspec.spec import AbstractFileSystem -from ome_types import OME -from ome_types.model import Channel, Image, Pixels, PixelType -from zarr.core.group import GroupMetadata - -############################################################################### - -log = logging.getLogger(__name__) - - -############################################################################### - - -class ScallopsZarrReader(reader.Reader): - """ - The main class of the `bioio_ome_zarr` plugin. This class is a subclass - of the abstract class `reader` (`BaseReader`) in `bioio-base`. - - Parameters - ---------- - image : types.PathLike - String or Path to the Zarr top directory (v2 or v3 store). - fs_kwargs : Dict[str, Any] - Passed to fsspec when constructing the filesystem - (e.g. {"anon": True} for public S3). - """ - - _channel_names: Optional[List[str]] = None - _scenes: Optional[Tuple[str, ...]] = None - _zarr: zarr.Group - _ome_metadata: OME - - _fs: AbstractFileSystem - _path: str - - _current_scene_index: int = 0 - - def __init__( - self, - image: types.PathLike, - fs_kwargs: Dict[str, Any] = {}, - ): - # Expand details of provided image. - self._fs, self._path = io.pathlike_to_fs( - image, - enforce_exists=False, - fs_kwargs=fs_kwargs, - ) - self._path = self._fs.unstrip_protocol(self._path) - - # Validate the store – this will raise if unsupported - self._is_supported_image(fs=self._fs, path=self._path, fs_kwargs=fs_kwargs) - - store = self._fs.get_mapper(self._path) # type: ignore[attr-defined] - self._zarr = zarr.open_group(store=store, mode="r") - - self._multiscales_metadata = self._zarr.attrs.get("ome", {}).get( - "multiscales" - ) or self._zarr.attrs.get("multiscales", []) - - self._channel_metadata = ( - self._zarr.attrs.get("ome", {}).get("omero", {}).get("channels") - or self._zarr.attrs.get("omero", {}).get("channels") - or [] - ) - - @staticmethod - def _is_supported_image( - fs: AbstractFileSystem, path: str, fs_kwargs: Dict[str, Any], **kwargs: Any - ) -> bool: - try: - store = fs.get_mapper(path) - group = zarr.open_group(store=store, mode="r") - attrs = group.attrs.asdict() - - # Check for transitional metadata key - if ("bioformats2raw.layout" in attrs) or ( - isinstance(attrs.get("ome"), dict) - and "bioformats2raw.layout" in attrs["ome"] - ): - raise exceptions.UnsupportedFileFormatError( - reader.Reader.__name__, - path, - ( - "Detected transitional layout metadata key " - "'bioformats2raw.layout'. This layout describes multiple " - "image series, not a single image. BioIO does *not* support " - "reading stores using this format." - ), - ) - - return True - - except Exception as e: - raise exceptions.UnsupportedFileFormatError( - reader.Reader.__name__, - path, - f"Could not parse a Zarr store at the provided path: {e}", - ) from e - - @classmethod - def is_supported_image( - cls, image: types.PathLike, fs_kwargs: Dict[str, Any] = {}, **kwargs: Any - ) -> bool: - if isinstance(image, (str, Path)): - fs, path = io.pathlike_to_fs( - image, - enforce_exists=False, - fs_kwargs=fs_kwargs, - ) - path = fs.unstrip_protocol(path) - - return cls._is_supported_image( - fs=fs, path=path, fs_kwargs=fs_kwargs, **kwargs - ) - - return reader.Reader.is_supported_image( - cls, image, fs_kwargs=fs_kwargs, **kwargs - ) - - @property - def scenes(self) -> Tuple[str, ...]: - """ - Returns - ------- - scenes: Tuple[str, ...] - A tuple of valid scene ids in the file. - """ - if self._scenes is None: - scenes = [scene.get("name") for scene in self._multiscales_metadata] - - # Check that every name exists and that they're all unique - if all(scenes) and len(set(scenes)) == len(scenes): - self._scenes = tuple(scenes) - else: - # Otherwise generate default IDs by index - self._scenes = tuple( - metadata_utils.generate_ome_image_id(i) - for i in range(len(self._multiscales_metadata)) - ) - - return self._scenes - - def _get_ome_dims(self) -> Tuple[str, ...]: - multiscales = self._multiscales_metadata[self._current_scene_index] - axes = multiscales.get("axes", []) - if axes: - return tuple(ax["name"].upper() for ax in axes) - datasets = multiscales.get("datasets", []) - arr = self._zarr[datasets[0]["path"]] - return tuple(reader.Reader._guess_dim_order(arr.shape)) - - @property - def resolution_levels(self) -> Tuple[int, ...]: - """ - Returns - ------- - resolution_levels: Tuple[str, ...] - Return the available resolution levels for the current scene. - By default these are ordered from highest resolution to lowest - resolution. - """ - multiscales = self._multiscales_metadata[self._current_scene_index] - return tuple(range(len(multiscales.get("datasets", [])))) - - def _read_delayed(self) -> xr.DataArray: - return self._xarr_format(delayed=True) - - def _read_immediate(self) -> xr.DataArray: - return self._xarr_format(delayed=False) - - def _xarr_format(self, delayed: bool) -> xr.DataArray: - """ - Build an xarray.DataArray for the current scene and resolution level. - - Parameters - ---------- - delayed : bool - If True, wrap the Zarr array in a Dask array (lazy loading). If False, - load the entire dataset into memory as a NumPy array. - - Returns - ------- - xr.DataArray - The image data with proper dims, coords, and raw metadata attr. - - Notes - ----- - * Chooses the dataset path according to `self._current_resolution_level`. - * Attaches the original Zarr attributes under - `constants.METADATA_UNPROCESSED`. - """ - multiscales = self._multiscales_metadata[self._current_scene_index] - datasets = multiscales.get("datasets", []) - data_path = datasets[self._current_resolution_level].get("path") - arr = self._zarr[data_path] - - if delayed: - data = da.from_array(arr, chunks=arr.chunks) - else: - data = arr[:] - - coords = self._get_coords( - list(self._get_ome_dims()), - data.shape, - scene=self.current_scene, - channel_names=self.channel_names, - ) - return xr.DataArray( - data, - dims=self._get_ome_dims(), - coords=coords, - attrs={constants.METADATA_UNPROCESSED: self.metadata}, - ) - - @staticmethod - def _get_coords( - dims: List[str], - shape: Tuple[int, ...], - scene: str, - channel_names: Optional[List[str]], - ) -> Dict[str, Any]: - """ - Construct coordinate mappings for each dimension, currently only Channel. - - Parameters - ---------- - dims : list of str - The dimension names in order (e.g. ["T","C","Z","Y","X"]). - shape : tuple of int - The lengths of each dimension in `dims`. - scene : str - Identifier for the current scene, used in default channel IDs. - channel_names : list of str or None - If provided, use these names for the Channel coordinate; otherwise - generate default OME channel IDs. - - Returns - ------- - coords : dict - A mapping from dimension name to coordinate values. Only includes - entries for Channel if present in `dims`. - """ - coords = {} - if dimensions.DimensionNames.Channel in dims: - # Generate channel names if no existing channel names - if channel_names is None: - coords[dimensions.DimensionNames.Channel] = [ - metadata_utils.generate_ome_channel_id(scene, i) - for i in range(shape[dims.index(dimensions.DimensionNames.Channel)]) - ] - else: - coords[dimensions.DimensionNames.Channel] = channel_names - return coords - - def _get_scale_array(self, dims: Tuple[str, ...]) -> List[float]: - """ - Compute combined scale factors for each dimension by merging the - overall and per-dataset coordinate transformations. - - Parameters - ---------- - dims : tuple of str - The dimension names in order (e.g. ("T","C","Z","Y","X")). - - Returns - ------- - scale : list of float - The elementwise product of the global and dataset-specific scales. - """ - multiscales = self._multiscales_metadata[self._current_scene_index] - overall_scale = multiscales.get( - "coordinateTransformations", [{"scale": [1.0] * len(dims)}] - )[0]["scale"] - dataset_scale = multiscales["datasets"][self._current_resolution_level][ - "coordinateTransformations" - ][0]["scale"] - return [o * d for o, d in zip(overall_scale, dataset_scale)] - - @property - def time_interval(self) -> Optional[types.TimeInterval]: - """ - Returns - ------- - sizes: Time Interval - Using available metadata, this float represents the time interval for - dimension T. - - """ - try: - if dimensions.DimensionNames.Time in self._get_ome_dims(): - return self._get_scale_array(self._get_ome_dims())[ - self._get_ome_dims().index(dimensions.DimensionNames.Time) - ] - except Exception as e: - warnings.warn(f"Could not parse time interval: {e}") - return None - - @property - def physical_pixel_sizes(self) -> Optional[types.PhysicalPixelSizes]: - """ - Returns - ------- - sizes: PhysicalPixelSizes or None - Physical pixel sizes for Z, Y, and X if any are available; - otherwise None. Warns and returns None on parse errors. - """ - try: - dims = self._get_ome_dims() - arr = self._get_scale_array(dims) - - Z = ( - arr[dims.index(dimensions.DimensionNames.SpatialZ)] - if dimensions.DimensionNames.SpatialZ in dims - else None - ) - Y = ( - arr[dims.index(dimensions.DimensionNames.SpatialY)] - if dimensions.DimensionNames.SpatialY in dims - else None - ) - X = ( - arr[dims.index(dimensions.DimensionNames.SpatialX)] - if dimensions.DimensionNames.SpatialX in dims - else None - ) - except Exception as e: - warnings.warn(f"Could not parse pixel sizes: {e}") - return None - - # If none of the spatial axes were found, return None - if X is None and Y is None and X is None: - return None - - return types.PhysicalPixelSizes(Z=Z, Y=Y, X=X) - - @property - def channel_names(self) -> Optional[List[str]]: - """ - Returns - ------- - channel_names: List[str] - Using available metadata, the list of strings representing channel names. - If no channel dimension present in the data, returns None. - """ - if self._channel_names is None: - channels_meta = self._zarr.attrs.get("ome", {}).get("omero", {}).get( - "channels" - ) or self._zarr.attrs.get("omero", {}).get("channels") - if channels_meta: - self._channel_names = [str(ch.get("label", "")) for ch in channels_meta] - return self._channel_names - - @property - def metadata(self) -> GroupMetadata: - return self._zarr.metadata - - @property - def scale(self) -> types.Scale: - """ - Returns - ------- - scale: Scale - A Scale object constructed from the Reader's time_interval and - physical_pixel_sizes. - - Notes - ----- - * Combines temporal and spatial scaling information into a single object. - """ - # build a mapping from each dim → its scale value - dims = self._get_ome_dims() - arr = self._get_scale_array(dims) - scale_map = dict(zip(dims, arr)) - - return types.Scale( - T=self.time_interval, - C=scale_map.get(dimensions.DimensionNames.Channel), - Z=scale_map.get(dimensions.DimensionNames.SpatialZ), - Y=scale_map.get(dimensions.DimensionNames.SpatialY), - X=scale_map.get(dimensions.DimensionNames.SpatialX), - ) - - @property - def dimension_properties(self) -> types.DimensionProperties: - """ - Returns - ------- - dimension_properties: DimensionProperties - Per-dimension properties for T, C, Z, Y, X. - - This augments the base Reader's DimensionProperties with NGFF - multiscales axis metadata, if present: - - * axis["type"] → DimensionProperty.type (e.g. "space", "time", "channel") - * axis["unit"] → DimensionProperty.unit (parsed via types.ureg) - - Only dimensions with an explicit NGFF axis definition are populated. - Dimensions without an axis entry are cleared to (type=None, unit=None). - - Additionally, if a channel axis is present with no unit, this reader - assigns a default dimensionless unit for C. - """ - base_dp = super().dimension_properties - - multiscales = self._multiscales_metadata[self._current_scene_index] - axes = multiscales.get("axes") or [] - - axis_by_name: Dict[str, Dict[str, Any]] = {} - for ax in axes: - name = ax.get("name") - if name is not None: - axis_by_name[name.upper()] = ax - - def _from_axis( - dim_letter: str, base_prop: types.DimensionProperty - ) -> types.DimensionProperty: - """ - Build a DimensionProperty for a single dim based on its NGFF axis. - """ - ax = axis_by_name.get(dim_letter) - if ax is None: - # No axis defined for this dim → treat as absent - return types.DimensionProperty(type=None, unit=None) - - axis_type = ax.get("type", base_prop.type) - - unit = base_prop.unit - unit_str = ax.get("unit") - if unit_str is not None: - try: - unit = types.ureg.Unit(unit_str) - except Exception as e: - warnings.warn( - f"Could not parse unit {unit_str!r} for axis " - f"{ax.get('name')!r}: {e}. Leaving unit unset.", - UserWarning, - ) - - return types.DimensionProperty( - type=axis_type, - unit=unit, - ) - - dp = types.DimensionProperties( - T=_from_axis("T", base_dp.T), - C=_from_axis("C", base_dp.C), - Z=_from_axis("Z", base_dp.Z), - Y=_from_axis("Y", base_dp.Y), - X=_from_axis("X", base_dp.X), - ) - - # If a channel axis exists and still has no unit → default to dimensionless. - # We check axis_by_name to ensure "C" is actually defined for this store. - if "C" in axis_by_name and dp.C.type == "channel": - if dp.C.unit is None: - dp = types.DimensionProperties( - T=dp.T, - C=types.DimensionProperty( - type=dp.C.type, - unit=types.ureg.dimensionless, - ), - Z=dp.Z, - Y=dp.Y, - X=dp.X, - ) - elif dp.C.unit != types.ureg.dimensionless: - # Warn if C is ever assigned a non-dimensionless unit - warnings.warn( - f"Channel axis has a non-dimensionless unit {dp.C.unit!r}. " - "Channel dimensions should be unitless; this is likely a " - "metadata error in the NGFF store.", - UserWarning, - ) - - return dp - - @property - def ome_metadata(self) -> OME: - """ - Build multi-scene OME object with one Image per scene. - Raises ValueError on unsupported dtype, because Pixels is required. - """ - if hasattr(self, "_ome_metadata") and self._ome_metadata is not None: - return self._ome_metadata - - # Dynamic PixelType lookup - dtype_key = str(self.dtype).upper() - try: - pixel_type_enum = PixelType[dtype_key] - except KeyError: - raise ValueError( - f"Unsupported dtype '{self.dtype}' for Pixels.type. " - "Cannot build OME metadata without a supported pixel type." - ) - - original_scene = self.current_scene_index - images = [] - - for idx, scene_id in enumerate(self.scenes): - self.set_scene(idx) - - ch_meta = self._channel_metadata - - size_x = getattr(self.dims, "X", 1) - size_y = getattr(self.dims, "Y", 1) - size_z = getattr(self.dims, "Z", 1) - size_c = getattr(self.dims, "C", 1) - size_t = getattr(self.dims, "T", 1) - - channels = [] - for i in range(size_c): - meta = ch_meta[i] if i < len(ch_meta) else {} - contrast = [] - if not meta.get("active", True): - contrast.append("Off") - if meta.get("inverted"): - contrast.append("inverted") - color = meta.get("color") - channel_args = dict() - if contrast: - channel_args["contrast_method"] = contrast - if color is not None: - channel_args["color"] = color - - channels.append( - Channel( - id=f"Channel:{i}", - name=meta.get("label", f"Channel {i}"), - **channel_args, - ) - ) - - pixels = Pixels( - id=f"Pixels:{idx}", - size_x=size_x, - size_y=size_y, - size_z=size_z, - size_c=size_c, - size_t=size_t, - type=pixel_type_enum, - dimension_order="XYZCT", - channels=channels, - physical_size_x=self.scale.X, - physical_size_y=self.scale.Y, - physical_size_z=self.scale.Z, - time_increment=None, - ) - - images.append(Image(id=f"Image:{idx}", name=scene_id, pixels=pixels)) - - self.set_scene(original_scene) - self._ome_metadata = OME(images=images) - return self._ome_metadata diff --git a/scallops/io.py b/scallops/io.py index bfccc1a..8767edc 100644 --- a/scallops/io.py +++ b/scallops/io.py @@ -29,6 +29,7 @@ import anndata import bioio +import bioio_ome_zarr import bioio_tifffile import dask import dask.array as da @@ -54,7 +55,6 @@ from xarray.core.utils import equivalent from zarr.storage import StoreLike -from scallops._bioio_zarr_reader import ScallopsZarrReader from scallops.experiment.elements import Experiment, _LazyLoadData from scallops.externals.tifffile2014 import imsave from scallops.utils import forceTCZYX, mlcs @@ -234,7 +234,7 @@ def _create_image(path: str, **kwargs) -> bioio.BioImage: base_path_lc, ext = os.path.splitext(path_lc) if "reader" not in img_args: if ext in ["", ".zarr", "/", ".zarr/"]: - img_args["reader"] = ScallopsZarrReader + img_args["reader"] = bioio_ome_zarr.Reader elif ext in [".tiff", ".tif"] and os.path.splitext(base_path_lc)[1] != ".ome": img_args["reader"] = bioio_tifffile.Reader return bioio.BioImage(path, **img_args) From feec2d9b84504a2e802c6b074ca6669be267045e Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Wed, 11 Feb 2026 08:15:12 -0500 Subject: [PATCH 26/32] remove numpy<2 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9faa417..f710acb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ dependencies = [ "matplotlib", "natsort", "numcodecs", - "numpy<2", + "numpy", "ome-zarr", "pandas", "pint", @@ -72,7 +72,7 @@ dependencies = [ "tensorflow", "tifffile", "xarray", - "zarr" + "zarr>=3" ] [project.optional-dependencies] From 1da21eb9f65bb10a6b6ef149c47d1c526a6030dd Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Wed, 11 Feb 2026 08:16:36 -0500 Subject: [PATCH 27/32] remove numpy<2 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 79bdf55..fdc03a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ mahotas==1.4.18 matplotlib==3.10.8 natsort==8.4.0 numcodecs==0.16.5 -numpy==1.26.4 +numpy==2.3.5 ome-zarr==0.13.0 pandas==2.3.3 pint==0.25.2 From b60f7a78e0aea5db5d8464d8157f45f45cf1c0b0 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Wed, 11 Feb 2026 08:26:16 -0500 Subject: [PATCH 28/32] update ipython --- pyproject.toml | 4 ++-- requirements.doc.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f710acb..f83efb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,11 +36,11 @@ dependencies = [ "bioio-nd2", "bioio-tifffile", "bioio-ome-tiff", - "bioio-ome-zarr>=3.2.2", + "bioio-ome-zarr>=3.2.2", # https://github.com/bioio-devs/bioio-ome-zarr/pull/130, https://github.com/bioio-devs/bioio-ome-zarr/issues/128 "bioio", "centrosome", "cp-measure", - "dask!=2026.1.2",# https://github.com/dask/dask/issues/12265 + "dask!=2026.1.2", # https://github.com/dask/dask/issues/12265 "dask-image", "decorator", "filelock", diff --git a/requirements.doc.txt b/requirements.doc.txt index 6c56034..b75a0dd 100644 --- a/requirements.doc.txt +++ b/requirements.doc.txt @@ -1,4 +1,4 @@ -ipython==9.9.0 +ipython==9.10.0 nbsphinx==0.9.8 sphinx-copybutton==0.5.2 sphinx==9.1.0 From b5d383ad34548982b38a59e8353d961d0cccce46 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Wed, 11 Feb 2026 08:32:30 -0500 Subject: [PATCH 29/32] update deps --- pyproject.toml | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f83efb9..049a1c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,8 +94,7 @@ pysam = [ ] test = [ "miniwdl", - "pytest", - "pytest-xdist" + "pytest" ] dev = [ @@ -109,22 +108,7 @@ doc = [ "sphinx_argparse", "sphinx_rtd_theme", ] -all = [ - "napari", - "napari_ome_zarr", - "cellpose", - "dask-ml", - "ufish", - "pytest", - "pre-commit", - "ipython", - "myst_parser", - "nbsphinx", - "sphinx-copybutton", - "sphinx", - "sphinx_argparse", - "sphinx_rtd_theme" -] + [project.entry-points."miniwdl.plugin.container_backend"] miniwdl_test_local = "scallops.tests.miniwdl_local.local_runner:LocalRunner" From aab02d42c5cf38afd481eadfcce3b19c006fdc87 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Wed, 11 Feb 2026 08:59:10 -0500 Subject: [PATCH 30/32] update deps --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 049a1c1..6a15145 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ build-backend = "setuptools.build_meta" requires = [ "cython", - "numpy<2", + "numpy", "setuptools>=42", "setuptools_scm[toml]>=3.4", "wheel" From 1c25fd386379ffbb1dd7dab9e4bbadd1dcb8bd20 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Wed, 11 Feb 2026 09:54:42 -0500 Subject: [PATCH 31/32] rps temp fix --- scallops/features/image_quality.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/scallops/features/image_quality.py b/scallops/features/image_quality.py index 162cd4f..5548d9f 100644 --- a/scallops/features/image_quality.py +++ b/scallops/features/image_quality.py @@ -1,6 +1,32 @@ -import centrosome.radial_power_spectrum import numpy as np import scipy +from scipy.fftpack import fft2 +from scipy.ndimage import sum as nd_sum + + +# copied from centrosome.radial_power_spectrum but use np.ptp instead of img.ptp for numpy 2 +def rps(img): + assert img.ndim == 2 + radii2 = (np.arange(img.shape[0]).reshape((img.shape[0], 1)) ** 2) + ( + np.arange(img.shape[1]) ** 2 + ) + radii2 = np.minimum(radii2, np.flipud(radii2)) + radii2 = np.minimum(radii2, np.fliplr(radii2)) + maxwidth = ( + min(img.shape[0], img.shape[1]) / 8.0 + ) # truncate early to avoid edge effects + if np.ptp(img) > 0: + img = img / np.median(abs(img - img.mean())) # intensity invariant + mag = abs(fft2(img - np.mean(img))) + power = mag**2 + radii = np.floor(np.sqrt(radii2)).astype(int) + 1 + labels = np.arange(2, np.floor(maxwidth)).astype(int).tolist() # skip DC component + if len(labels) > 0: + magsum = nd_sum(mag, radii, labels) + powersum = nd_sum(power, radii, labels) + return np.array(labels), np.array(magsum), np.array(powersum) + + return [2], [0], [0] def power_spectrum(image: np.ndarray) -> float: @@ -9,7 +35,7 @@ def power_spectrum(image: np.ndarray) -> float: gives a measure of image blur. A higher slope indicates more lower frequency components, and hence more blur. See https://cellprofiler-manual.s3.amazonaws.com/CellProfiler-4.0.5/modules/measurement.html#measureimagequality """ - radii, magnitude, power = centrosome.radial_power_spectrum.rps(image) + radii, magnitude, power = rps(image) if sum(magnitude) > 0 and len(np.unique(image)) > 1: valid = magnitude > 0 radii = radii[valid].reshape((-1, 1)) From 4c0de2222262a68539daa60be149add7547f9752 Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Fri, 13 Feb 2026 08:47:14 -0500 Subject: [PATCH 32/32] try zarr inputs again --- scallops/cli/features.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scallops/cli/features.py b/scallops/cli/features.py index aeae8b3..a4e8330 100644 --- a/scallops/cli/features.py +++ b/scallops/cli/features.py @@ -196,12 +196,12 @@ def single_feature( output_fs, _ = fsspec.core.url_to_fs(output_dir) join_df = False features_output_suffix = "" if join_df else "-features" - zarr_inputs = False # using zarr as input fails with zarr3 with credentials errors + zarr_inputs = True - # for f in file_list: - # if not isinstance(f, (zarr.Group, zarr.Array)): - # zarr_inputs = False - # break + for f in file_list: + if not isinstance(f, (zarr.Group, zarr.Array)): + zarr_inputs = False + break if zarr_inputs and stacked_image_tuple is not None: for f in stacked_file_list: