diff --git a/bidsreader/_neurorad_algo.py b/bidsreader/_neurorad_algo.py new file mode 100644 index 0000000..b18974a --- /dev/null +++ b/bidsreader/_neurorad_algo.py @@ -0,0 +1,81 @@ +"""Pair-location algorithms copied verbatim from the CML neurorad pipeline. + +Source of truth: + github.com/pennmem/neurorad_pipeline + File: localization.py :: Localization.get_pair_coordinate + Pinned revision (on rhino2): cf563def3087126b1aedb1dea274172200f3fa75 + Path checked: /home2/iped/neurorad_pipeline/localization.py + +We copy rather than import to keep neurorad_pipeline off the runtime +dependency list. A test in tests/test_pair_midpoint_parity.py imports +neurorad_pipeline as a dev-only dependency and pins this copy against +the upstream implementation. + +When the upstream algorithm changes, update this file AND the pinned +revision in the parity test's docstring in the same commit. +""" + +from __future__ import annotations + +from typing import Dict, Tuple + +import numpy as np + + +# Mirrors Localization.VALID_COORDINATE_SPACES (localization.py:13-19). +VALID_COORDINATE_SPACES: Tuple[str, ...] = ( + "ct_voxel", + "fs", + "t1_mri", + "t2_mri", + "mni", +) + +# Mirrors Localization.VALID_COORDINATE_TYPES (localization.py:21-24). +VALID_COORDINATE_TYPES: Tuple[str, ...] = ( + "raw", + "corrected", +) + +# Mirrors Localization.VALID_ATLASES (localization.py:35-39). +VALID_ATLASES: Tuple[str, ...] = ( + "dk", + "whole_brain", + "mtl", +) + +# BIDS space label -> (neurorad coordinate_space, coordinate_type). +# Synthesized from event_creation/submission/neurorad_tasks.py +# FIELD_NAMES_TABLE (lines 190-198) and the bids-convert CML_TO_BIDS_SPACE +# map. Spaces not present here (Talairach, t1MRI, fsnativeDural) are +# valid midpoint targets but have no neurorad atlas lookup. +BIDS_SPACE_TO_NEURORAD: Dict[str, Tuple[str, str]] = { + "fsnative": ("fs", "raw"), + "fsnativeBrainshift": ("fs", "corrected"), + "fsaverage": ("fsaverage", "raw"), + "fsaverageBrainshift": ("fsaverage", "corrected"), + "MNI152NLin6ASym": ("mni", "raw"), + "Pixels": ("ct_voxel", "raw"), +} + + +def pair_coordinate(coord1, coord2): + """Midpoint of two contact coordinates. + + Verbatim behavior of + neurorad_pipeline.localization.Localization.get_pair_coordinate + (localization.py:274-289), stripped of its Localization-object + plumbing so callers can pass raw arrays. + """ + if coord1 is None or coord2 is None: + return None + return (np.array(coord1) + np.array(coord2)) / 2 + + +def pair_coordinate_axis(a, b): + """Vectorized midpoint for one coordinate axis across N pairs. + + NaN on either side propagates, matching the None-returning behavior + of scalar pair_coordinate. Intended for pandas/numpy column math. + """ + return (np.asarray(a, dtype=float) + np.asarray(b, dtype=float)) / 2.0 diff --git a/bidsreader/cmlbidsreader.py b/bidsreader/cmlbidsreader.py index 8ba7233..5592964 100644 --- a/bidsreader/cmlbidsreader.py +++ b/bidsreader/cmlbidsreader.py @@ -55,7 +55,7 @@ def _determine_device(self) -> Optional[str]: return None DEFAULT_SPACE = "MNI152NLin6ASym" - SPACE_PREFERENCE = ("MNI152NLin6ASym", "Talairach" ,"fsaverage", "Pixels", "fsaverageBrainshift" "fsnative", "fsnativeBrainshift", "fsnativeDural", "t1MRI") + SPACE_PREFERENCE = ("MNI152NLin6ASym", "Talairach", "fsaverage", "Pixels", "fsaverageBrainshift", "fsnative", "fsnativeBrainshift", "fsnativeDural", "t1MRI") def _coordsystem_dir(self) -> Path: subject_root = self._subject_root() @@ -104,16 +104,24 @@ def _determine_space(self) -> str: if len(spaces) == 1: return spaces[0] + # Multiple spaces present: pick the highest-priority preferred space + # that is available. for preferred in self.SPACE_PREFERENCE: if preferred in spaces: return preferred - raise AmbiguousMatchError( + # None of the preferred defaults is present. Rather than make the + # caller pass space= explicitly, fall back to a default: DEFAULT_SPACE + # if it happens to be available, otherwise the first space (sorted). + chosen = self.DEFAULT_SPACE if self.DEFAULT_SPACE in spaces else spaces[0] + warnings.warn( f"determine_space: multiple spaces found and none of the " f"preferred defaults {self.SPACE_PREFERENCE} present. " - f"Available spaces: {spaces}. " - f"Pass space= when constructing CMLBIDSReader." + f"Available spaces: {spaces}. Defaulting to {chosen!r}. " + f"Pass space= to choose explicitly.", + RuntimeWarning, ) + return chosen def _validate_acq(self, acquisition: Optional[str]) -> Optional[str]: if not self.is_intracranial(): @@ -128,7 +136,7 @@ def _get_needed_fields(self): def _attach_bipolar_midpoint_montage(self, raw: mne.io.BaseRaw, space: Optional[str] = None) -> None: pairs_df = self.load_channels("bipolar") elec_df = self.load_electrodes(space=space) - combo = combine_bipolar_electrodes(pairs_df, elec_df) + combo = combine_bipolar_electrodes(pairs_df, elec_df, space=self.space) if not {"name", "x_mid", "y_mid", "z_mid"}.issubset(combo.columns): return @@ -208,7 +216,7 @@ def load_combined_channels(self, acquisition: Optional[str] = None, space: Optio if acquisition == "monopolar" or acquisition is None: return channel_df.merge(elec_df, on="name", how="left", suffixes=("", "_elec")) if acquisition == "bipolar": - return combine_bipolar_electrodes(channel_df, elec_df) + return combine_bipolar_electrodes(channel_df, elec_df, space=self.space) @public_api def load_coordsystem_desc(self, space: Optional[str] = None) -> Dict: diff --git a/bidsreader/helpers.py b/bidsreader/helpers.py index 733b238..bf61be7 100644 --- a/bidsreader/helpers.py +++ b/bidsreader/helpers.py @@ -1,8 +1,27 @@ +"""Utility helpers for bidsreader. + +Pair-coordinate math (``combine_bipolar_electrodes``) uses +``pair_coordinate_axis`` from :mod:`._neurorad_algo`, which is a +verbatim copy of the CML neurorad pipeline's +``Localization.get_pair_coordinate``. The rule is the same in every +BIDS coordinate space we support — ``MNI152NLin6ASym``, ``Talairach``, +``fsaverage``, ``fsaverageBrainshift``, ``fsnative``, +``fsnativeBrainshift``, ``fsnativeDural``, ``t1MRI``, ``Pixels`` — +because those per-space contact coordinates have already been +produced by the pipeline's space-specific transforms before BIDS +export. + +For the BIDS space ↔ CML ``coordinate_space`` / ``coordinate_type`` +mapping see ``eeg_validation.preparers.montage.CML_TO_BIDS_SPACE`` or +:attr:`bidsreader._neurorad_algo.BIDS_SPACE_TO_NEURORAD`. +""" + import numpy as np import pandas as pd from typing import Iterable, Any, Tuple, Sequence, Optional, Dict import re from .exc import InvalidOptionError +from ._neurorad_algo import pair_coordinate_axis def validate_option(name: str, value: Any, allowed: Iterable[Any]) -> Any: if value is None: @@ -95,7 +114,23 @@ def combine_bipolar_electrodes( pair_col: str = "name", elec_name_col: str = "name", region_cols: Sequence[str] = ("wb.region", "ind.region", "stein.region"), + space: Optional[str] = None, ) -> pd.DataFrame: + """Join bipolar pairs with electrode metadata and compute per-pair + coordinate midpoints, matching the neurorad pipeline's pair-location + algorithm (``Localization.get_pair_coordinate`` — see + :mod:`bidsreader._neurorad_algo`). Midpoint is taken in whatever BIDS + space ``elec_df`` carries; upstream brainshift / nonlinear-warp + corrections are expected to have already been applied per-contact. + + Region columns from ``region_cols`` are carried through per-contact + (as ``{col}_ch1`` / ``{col}_ch2``) but no ``{col}_pair`` column is + synthesized. Neurorad assigns pair-region labels by independently + looking up the atlas at each pair's midpoint voxel, which cannot be + reproduced from contact-level agreement. Callers that need + pair-region labels should pull them from the upstream + ``pairs.json`` via ``enrich_pairs_with_cml_regions``. + """ sep = "-" out = pairs_df.copy() @@ -122,20 +157,18 @@ def combine_bipolar_electrodes( look2 = look.add_suffix("_ch2").rename(columns={f"{elec_name_col}_ch2": "ch2"}) out = out.merge(look2, on="ch2", how="left") - # Region agreement - for rc in region_cols: - if f"{rc}_ch1" in out.columns and f"{rc}_ch2" in out.columns: - a = out[f"{rc}_ch1"] - b = out[f"{rc}_ch2"] - out[f"{rc}_pair"] = np.where(a.notna() & (a == b), a, np.nan) - - # Midpoints for every detected coordinate triplet + # Midpoints for every detected coordinate triplet. pair_coordinate_axis + # is a verbatim copy of neurorad_pipeline.localization.Localization + # .get_pair_coordinate reduced to a single axis. for prefix, (xcol, ycol, zcol) in coord_triplets.items(): for col in (xcol, ycol, zcol): a = out[f"{col}_ch1"] b = out[f"{col}_ch2"] mid_name = f"{col}_mid" # e.g., "x_mid" or "tal.x_mid" - out[mid_name] = np.where(a.notna() & b.notna(), (a + b) / 2.0, np.nan) + mid = pair_coordinate_axis(a, b) + if space == "Pixels": + mid = np.floor(mid) + out[mid_name] = np.where(a.notna() & b.notna(), mid, np.nan) return out diff --git a/bidsreader/region_enrichment.py b/bidsreader/region_enrichment.py new file mode 100644 index 0000000..887d194 --- /dev/null +++ b/bidsreader/region_enrichment.py @@ -0,0 +1,94 @@ +"""Opt-in enrichment that attaches pair-level region labels from +the upstream CML ``pairs.json`` to a bidsreader pairs DataFrame. + +Pure-BIDS users don't need this module. It exists only for the hybrid +case where a subject's BIDS export sits alongside the original +neurorad-pipeline output on rhino and the caller wants the pair-region +labels that neurorad computed (via independent atlas lookup at each +pair's midpoint voxel). The BIDS side can't reproduce those labels +from contact-level data, so we pull them from the upstream artifact. + +See :mod:`._neurorad_algo` for why pair region labels differ from the +contact-level agreement heuristic. +""" + +from __future__ import annotations + +from typing import Iterable, Optional + +import pandas as pd + + +_DEFAULT_REGION_COLS: tuple[str, ...] = ( + "ind.region", + "ind.corrected.region", + "avg.region", + "avg.corrected.region", + "mni.region", + "hcp.region", + "stein.region", + "das.region", +) + + +def enrich_pairs_with_cml_regions( + pairs_df: pd.DataFrame, + *, + subject: Optional[str] = None, + experiment: Optional[str] = None, + session: Optional[int] = None, + localization: Optional[int] = 0, + montage: Optional[int] = 0, + reader=None, + label_col: str = "name", + region_cols: Optional[Iterable[str]] = None, +) -> pd.DataFrame: + """Return a copy of ``pairs_df`` with CML ``*.region`` columns joined on. + + ``pairs_df`` is the output of + :func:`bidsreader.helpers.combine_bipolar_electrodes`, which uses + ``name`` as the pair-label column by default. + """ + + try: + from cmlreaders import CMLReader + except ImportError as exc: + raise ImportError( + "enrich_pairs_with_cml_regions requires cmlreaders. Install " + "it (pip install cmlreaders) or load pairs.json manually." + ) from exc + + if reader is None: + if subject is None or experiment is None or session is None: + raise ValueError( + "Provide either a ready CMLReader or " + "(subject, experiment, session)." + ) + reader = CMLReader( + subject=subject, + experiment=experiment, + session=session, + localization=localization, + montage=montage, + ) + + cml_pairs = reader.load("pairs") + + wanted = tuple(region_cols) if region_cols is not None else _DEFAULT_REGION_COLS + available = [c for c in wanted if c in cml_pairs.columns] + + if "label" not in cml_pairs.columns: + raise KeyError( + "cml_pairs is missing the 'label' column — can't join on pair " + "name. Got columns: %r" % (list(cml_pairs.columns),) + ) + + right = cml_pairs[["label", *available]].copy() + right["label"] = right["label"].astype("string").str.strip() + if label_col != "label": + right = right.rename(columns={"label": label_col}) + + out = pairs_df.copy() + out[label_col] = out[label_col].astype("string").str.strip() + + return out.merge(right, how="left", on=label_col) diff --git a/tests/test_cmlbidsreader.py b/tests/test_cmlbidsreader.py index b9e9fc4..1a5e93d 100644 --- a/tests/test_cmlbidsreader.py +++ b/tests/test_cmlbidsreader.py @@ -142,13 +142,27 @@ def test_no_coordsystem_file_raises(self, tmp_root): with pytest.raises(FileNotFoundBIDSError, match="no.*coordsystem.json"): r._determine_space() - def test_multiple_coordsystem_files_raises(self, tmp_root): + def test_multiple_spaces_prefers_preferred_default(self, tmp_root): + """When several spaces are present, the highest-priority space in + SPACE_PREFERENCE wins (here MNI152NLin6ASym over fsnative).""" + data_dir = self._make_data_dir(tmp_root) + (data_dir / "sub-R1001P_ses-0_space-fsnative_coordsystem.json").touch() + (data_dir / "sub-R1001P_ses-0_space-MNI152NLin6ASym_coordsystem.json").touch() + r = CMLBIDSReader(root=tmp_root, subject="R1001P", task="FR1", session="0", device="ieeg") + assert r._determine_space() == "MNI152NLin6ASym" + + def test_multiple_spaces_none_preferred_falls_back_with_warning(self, tmp_root): + """When multiple spaces are present and none is in SPACE_PREFERENCE, + _determine_space falls back to a default (warning) instead of raising. + Neither MNI nor TAL is a canonical preferred label, so it picks the + first available (sorted).""" data_dir = self._make_data_dir(tmp_root) (data_dir / "sub-R1001P_ses-0_space-MNI_coordsystem.json").touch() (data_dir / "sub-R1001P_ses-0_space-TAL_coordsystem.json").touch() r = CMLBIDSReader(root=tmp_root, subject="R1001P", task="FR1", session="0", device="ieeg") - with pytest.raises(AmbiguousMatchError, match="multiple coordsystem"): - r._determine_space() + with pytest.warns(RuntimeWarning, match="Defaulting to"): + chosen = r._determine_space() + assert chosen == "MNI" # sorted(['MNI', 'TAL'])[0] def test_single_valid_match(self, tmp_root): data_dir = self._make_data_dir(tmp_root) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index efc53f6..f3091aa 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -7,7 +7,7 @@ - add_prefix: None input, already-prefixed value, value needing prefix - merge_duplicate_sample_events: no duplicates, duplicate trial_type merge, all-NaN - find_coord_triplets: bare xyz, prefixed xyz, mixed columns, no triplets - - combine_bipolar_electrodes: pair splitting, midpoint computation, region agreement + - combine_bipolar_electrodes: pair splitting, midpoint computation, no synthetic pair region - normalize_trial_types: converts iterable of strings to set - match_event_label: exact match, slash-separated label, no match """ @@ -197,7 +197,12 @@ def test_midpoint_computation(self, sample_bipolar_channels_df, sample_electrode assert result.iloc[0]["y_mid"] == pytest.approx(5.5) assert result.iloc[0]["z_mid"] == pytest.approx(9.5) - def test_region_agreement(self): + def test_no_synthetic_pair_region_column(self): + """Neurorad assigns pair-region labels via atlas lookup at the + midpoint voxel, not by contact-level agreement. The combiner + must therefore NOT synthesize a ``{col}_pair`` column. Per-contact + region columns are still present as ``{col}_ch1`` / ``{col}_ch2``. + """ pairs = pd.DataFrame({"name": ["A1-A2"]}) elecs = pd.DataFrame({ "name": ["A1", "A2"], @@ -205,17 +210,74 @@ def test_region_agreement(self): "wb.region": ["hippocampus", "hippocampus"], }) result = combine_bipolar_electrodes(pairs, elecs) - assert result.iloc[0]["wb.region_pair"] == "hippocampus" + assert "wb.region_pair" not in result.columns + assert result.iloc[0]["wb.region_ch1"] == "hippocampus" + assert result.iloc[0]["wb.region_ch2"] == "hippocampus" - def test_region_disagreement(self): + def test_pixels_space_floors_midpoint(self): + """In Pixels (CT voxel) space the midpoint is floored to an integer + voxel index, matching the neurorad pipeline's voxel handling.""" pairs = pd.DataFrame({"name": ["A1-A2"]}) elecs = pd.DataFrame({ "name": ["A1", "A2"], - "x": [1.0, 2.0], "y": [3.0, 4.0], "z": [5.0, 6.0], - "wb.region": ["hippocampus", "amygdala"], + "x": [1.0, 2.0], "y": [5.0, 8.0], "z": [9.0, 10.0], + }) + # raw midpoint = (1.5, 6.5, 9.5) -> floored = (1.0, 6.0, 9.0) + result = combine_bipolar_electrodes(pairs, elecs, space="Pixels") + assert result.iloc[0]["x_mid"] == pytest.approx(1.0) + assert result.iloc[0]["y_mid"] == pytest.approx(6.0) + assert result.iloc[0]["z_mid"] == pytest.approx(9.0) + + def test_pixels_space_floors_multiple_pairs(self): + """Regression: flooring must work across many pairs at once. A scalar + math.floor here raised 'only length-1 arrays can be converted to + Python scalars' for any subject with >1 bipolar pair.""" + pairs = pd.DataFrame({"name": ["A1-A2", "B1-B2"]}) + elecs = pd.DataFrame({ + "name": ["A1", "A2", "B1", "B2"], + "x": [1.2, 2.9, 3.1, 4.4], + "y": [5.0, 6.0, 7.0, 8.0], + "z": [9.0, 10.0, 11.0, 12.0], + }) + result = combine_bipolar_electrodes(pairs, elecs, space="Pixels") + # A1-A2 raw mid x = 2.05 -> 2.0 ; B1-B2 raw mid x = 3.75 -> 3.0 + assert list(result["x_mid"]) == pytest.approx([2.0, 3.0]) + + def test_non_pixels_space_does_not_floor(self): + """Default / non-Pixels spaces keep the fractional midpoint.""" + pairs = pd.DataFrame({"name": ["A1-A2"]}) + elecs = pd.DataFrame({ + "name": ["A1", "A2"], + "x": [1.0, 2.0], "y": [5.0, 8.0], "z": [9.0, 10.0], + }) + result = combine_bipolar_electrodes(pairs, elecs, space="MNI152NLin6ASym") + assert result.iloc[0]["x_mid"] == pytest.approx(1.5) + assert result.iloc[0]["y_mid"] == pytest.approx(6.5) + + def test_missing_contact_coords_yield_nan_midpoint(self): + """If a pair references a contact missing from elec_df, its midpoint + columns are NaN rather than raising.""" + pairs = pd.DataFrame({"name": ["A1-A2", "A1-GHOST"]}) + elecs = pd.DataFrame({ + "name": ["A1", "A2"], + "x": [1.0, 3.0], "y": [5.0, 7.0], "z": [9.0, 11.0], + }) + result = combine_bipolar_electrodes(pairs, elecs) + assert result.iloc[0]["x_mid"] == pytest.approx(2.0) + assert pd.isna(result.iloc[1]["x_mid"]) + + def test_prefixed_coord_triplet_midpoint(self): + """Prefixed coordinate triplets (e.g. tal.x/tal.y/tal.z) also get a + {prefix}.{axis}_mid column.""" + pairs = pd.DataFrame({"name": ["A1-A2"]}) + elecs = pd.DataFrame({ + "name": ["A1", "A2"], + "tal.x": [0.0, 4.0], "tal.y": [0.0, 2.0], "tal.z": [0.0, 6.0], }) result = combine_bipolar_electrodes(pairs, elecs) - assert pd.isna(result.iloc[0]["wb.region_pair"]) + assert result.iloc[0]["tal.x_mid"] == pytest.approx(2.0) + assert result.iloc[0]["tal.y_mid"] == pytest.approx(1.0) + assert result.iloc[0]["tal.z_mid"] == pytest.approx(3.0) # --------------------------------------------------------------------------- diff --git a/tests/test_neurorad_algo.py b/tests/test_neurorad_algo.py new file mode 100644 index 0000000..5ed838f --- /dev/null +++ b/tests/test_neurorad_algo.py @@ -0,0 +1,113 @@ +""" +Tests for bidsreader._neurorad_algo + +These are upstream-free unit tests for the copied neurorad pair-location +algorithm. The rhino-only parity test in test_pair_midpoint_parity.py pins +this implementation against the real neurorad_pipeline; here we exercise the +copy's behavior directly so it is covered even off rhino. + +What is tested: + - pair_coordinate: midpoint, None-in/None-out short-circuit, return type + - pair_coordinate_axis: vectorized midpoint, NaN propagation, scalar input + - constant maps: shape/content sanity for the mirrored neurorad constants +""" +import numpy as np +import pytest + +from bidsreader._neurorad_algo import ( + pair_coordinate, + pair_coordinate_axis, + VALID_COORDINATE_SPACES, + VALID_COORDINATE_TYPES, + VALID_ATLASES, + BIDS_SPACE_TO_NEURORAD, +) + + +# --------------------------------------------------------------------------- +# pair_coordinate +# --------------------------------------------------------------------------- +class TestPairCoordinate: + """Tests for pair_coordinate (scalar / single-triplet midpoint).""" + + def test_midpoint(self): + result = pair_coordinate([0.0, 0.0, 0.0], [2.0, 4.0, 6.0]) + np.testing.assert_allclose(result, [1.0, 2.0, 3.0]) + + def test_negative_and_fractional(self): + result = pair_coordinate([-1.5, 0.0, 10.25], [1.5, 0.0, -10.25]) + np.testing.assert_allclose(result, [0.0, 0.0, 0.0]) + + def test_none_first_returns_none(self): + assert pair_coordinate(None, [1.0, 2.0, 3.0]) is None + + def test_none_second_returns_none(self): + assert pair_coordinate([1.0, 2.0, 3.0], None) is None + + def test_returns_ndarray(self): + result = pair_coordinate([1.0, 2.0, 3.0], [3.0, 4.0, 5.0]) + assert isinstance(result, np.ndarray) + + def test_accepts_ndarray_input(self): + result = pair_coordinate(np.array([0.0, 0.0, 0.0]), np.array([4.0, 8.0, 2.0])) + np.testing.assert_allclose(result, [2.0, 4.0, 1.0]) + + +# --------------------------------------------------------------------------- +# pair_coordinate_axis +# --------------------------------------------------------------------------- +class TestPairCoordinateAxis: + """Tests for pair_coordinate_axis (vectorized single-axis midpoint).""" + + def test_vectorized_midpoint(self): + a = [0.0, 10.0, -4.0] + b = [2.0, 20.0, 4.0] + np.testing.assert_allclose(pair_coordinate_axis(a, b), [1.0, 15.0, 0.0]) + + def test_nan_propagates(self): + a = [1.0, np.nan, 3.0] + b = [3.0, 5.0, np.nan] + result = pair_coordinate_axis(a, b) + assert result[0] == pytest.approx(2.0) + assert np.isnan(result[1]) + assert np.isnan(result[2]) + + def test_scalar_input(self): + # Single-pair case: must not crash and must return the midpoint. + result = pair_coordinate_axis([2.0], [5.0]) + np.testing.assert_allclose(result, [3.5]) + + def test_returns_float_dtype(self): + # Integer input must be promoted so /2 does not truncate. + result = pair_coordinate_axis([1, 2], [2, 3]) + assert result.dtype == np.float64 + np.testing.assert_allclose(result, [1.5, 2.5]) + + +# --------------------------------------------------------------------------- +# Mirrored neurorad constants +# --------------------------------------------------------------------------- +class TestNeuroradConstants: + """Light sanity checks on the constants copied from localization.py.""" + + def test_valid_spaces(self): + assert VALID_COORDINATE_SPACES == ("ct_voxel", "fs", "t1_mri", "t2_mri", "mni") + + def test_valid_types(self): + assert VALID_COORDINATE_TYPES == ("raw", "corrected") + + def test_valid_atlases(self): + assert VALID_ATLASES == ("dk", "whole_brain", "mtl") + + def test_bids_space_map_values_are_pairs(self): + # Every value is a (coordinate_space, coordinate_type) tuple, and the + # coordinate_type is one of the valid neurorad types. + for space, mapped in BIDS_SPACE_TO_NEURORAD.items(): + assert len(mapped) == 2 + assert mapped[1] in VALID_COORDINATE_TYPES + + def test_pixels_maps_to_ct_voxel(self): + assert BIDS_SPACE_TO_NEURORAD["Pixels"] == ("ct_voxel", "raw") + + def test_brainshift_maps_to_corrected(self): + assert BIDS_SPACE_TO_NEURORAD["fsnativeBrainshift"][1] == "corrected" diff --git a/tests/test_pair_midpoint_parity.py b/tests/test_pair_midpoint_parity.py new file mode 100644 index 0000000..fcca6e8 --- /dev/null +++ b/tests/test_pair_midpoint_parity.py @@ -0,0 +1,137 @@ +"""Parity test: bidsreader._neurorad_algo.pair_coordinate vs. the +upstream neurorad pipeline's Localization.get_pair_coordinate. + +neurorad_pipeline is NOT a runtime dependency of bidsreader. It is +loaded here dynamically from its rhino install (default: +/home2/iped/neurorad_pipeline/) for test purposes only. If the +directory isn't present (e.g. we're off rhino), the whole module is +skipped — the copied algorithm in _neurorad_algo.py is still +exercised by unit tests that don't require upstream. + +When the upstream get_pair_coordinate changes, this test fails. Update +_neurorad_algo.pair_coordinate AND the pinned revision recorded in +_neurorad_algo.py's top-of-file docstring in the same commit. +""" + +from __future__ import annotations + +import importlib.util +import os +import sys +from pathlib import Path + +import numpy as np +import pytest + + +NEURORAD_DIR_CANDIDATES = ( + Path("/home2/iped/neurorad_pipeline"), + Path("/home2/iped/event_creation/neurorad"), +) + + +def _load_upstream_localization(): + for d in NEURORAD_DIR_CANDIDATES: + loc_py = d / "localization.py" + if not loc_py.exists(): + continue + # localization.py does `from json_cleaner import ...` so the dir + # must be on sys.path before import. + sys.path.insert(0, str(d)) + try: + spec = importlib.util.spec_from_file_location( + f"_upstream_localization_{d.name}", str(loc_py) + ) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod.Localization + except Exception: + sys.path.pop(0) + continue + return None + + +UpstreamLocalization = _load_upstream_localization() +pytestmark = pytest.mark.skipif( + UpstreamLocalization is None, + reason="upstream neurorad_pipeline not available (off rhino?)", +) + + +def _make_loc_with_two_contacts(space, c1_xyz, c2_xyz, ctype="raw"): + loc = UpstreamLocalization() + loc._contact_dict = { + "leads": { + "LA": { + "type": "D", + "n_groups": 1, + "contacts": [ + { + "name": "LA1", + "lead_group": 0, + "lead_loc": 0, + "grid_group": 0, + "grid_loc": [0, 0], + "atlases": {}, + "info": {}, + "coordinate_spaces": {space: {ctype: list(c1_xyz)}}, + }, + { + "name": "LA2", + "lead_group": 0, + "lead_loc": 1, + "grid_group": 0, + "grid_loc": [1, 0], + "atlases": {}, + "info": {}, + "coordinate_spaces": {space: {ctype: list(c2_xyz)}}, + }, + ], + "pairs": [], + } + } + } + return loc + + +SPACE_TYPE_CASES = [ + ("ct_voxel", "raw"), + ("fs", "raw"), + ("fs", "corrected"), + ("t1_mri", "raw"), + ("t2_mri", "raw"), + ("mni", "raw"), + ("mni", "corrected"), +] + +COORD_CASES = [ + ([0.0, 0.0, 0.0], [2.0, 4.0, 6.0]), + ([-1.5, 0.0, 10.25], [1.5, 0.0, -10.25]), + ([100.0, 200.0, -50.0], [101.0, 201.0, -49.0]), +] + + +@pytest.mark.parametrize("space,ctype", SPACE_TYPE_CASES) +@pytest.mark.parametrize("c1,c2", COORD_CASES) +def test_pair_coordinate_matches_upstream(space, ctype, c1, c2): + """pair_coordinate(c1, c2) == Localization.get_pair_coordinate + across every (space, coordinate_type) combination.""" + from bidsreader._neurorad_algo import pair_coordinate + + loc = _make_loc_with_two_contacts(space, c1, c2, ctype=ctype) + upstream = loc.get_pair_coordinate(space, ["LA1", "LA2"], ctype) + + ours = pair_coordinate(c1, c2) + + # Upstream returns shape (1, 3); ours returns shape (3,). Flatten. + np.testing.assert_allclose( + np.asarray(upstream).ravel(), np.asarray(ours).ravel() + ) + + +def test_pair_coordinate_none_in_none_out(): + """Matches upstream short-circuit behavior (localization.py:286-287).""" + from bidsreader._neurorad_algo import pair_coordinate + + assert pair_coordinate(None, [1.0, 2.0, 3.0]) is None + assert pair_coordinate([1.0, 2.0, 3.0], None) is None diff --git a/tests/test_region_enrichment.py b/tests/test_region_enrichment.py new file mode 100644 index 0000000..293b675 --- /dev/null +++ b/tests/test_region_enrichment.py @@ -0,0 +1,111 @@ +""" +Tests for bidsreader.region_enrichment.enrich_pairs_with_cml_regions + +enrich_pairs_with_cml_regions joins authoritative pair-level region labels +from the upstream CML pairs.json onto a bidsreader pairs DataFrame. It loads +those labels via a cmlreaders.CMLReader, but accepts a ready reader object so +the join logic can be tested without cmlreaders installed or rhino access. + +What is tested: + - region columns from pairs.json are joined onto matching pair names + - whitespace on both join keys is stripped before merging + - only the requested / available region columns are pulled + - left-join semantics: unmatched pairs survive with NaN region labels + - missing 'label' column in cml_pairs raises KeyError + - reader=None with incomplete (subject, experiment, session) raises ValueError +""" +import importlib.util + +import pandas as pd +import pytest + +from bidsreader.region_enrichment import enrich_pairs_with_cml_regions + +# enrich_pairs_with_cml_regions imports cmlreaders unconditionally (even when a +# reader is injected), so skip the whole module where cmlreaders is unavailable. +pytestmark = pytest.mark.skipif( + importlib.util.find_spec("cmlreaders") is None, + reason="cmlreaders not installed", +) + + +class FakeReader: + """Stand-in for cmlreaders.CMLReader.load('pairs').""" + + def __init__(self, pairs_df): + self._pairs_df = pairs_df + + def load(self, what): + assert what == "pairs" + return self._pairs_df + + +def _cml_pairs(): + return pd.DataFrame({ + "label": ["A1-A2", "B1-B2"], + "ind.region": ["hippocampus", "amygdala"], + "stein.region": ["CA1", "BLA"], + # a column not in the default wanted-set, to confirm filtering + "contact_1": [1, 3], + }) + + +class TestEnrichPairsWithCmlRegions: + """Tests for enrich_pairs_with_cml_regions.""" + + def test_joins_region_columns(self): + pairs = pd.DataFrame({"name": ["A1-A2", "B1-B2"]}) + result = enrich_pairs_with_cml_regions(pairs, reader=FakeReader(_cml_pairs())) + assert list(result["ind.region"]) == ["hippocampus", "amygdala"] + assert list(result["stein.region"]) == ["CA1", "BLA"] + + def test_only_requested_region_cols_pulled(self): + pairs = pd.DataFrame({"name": ["A1-A2"]}) + result = enrich_pairs_with_cml_regions( + pairs, reader=FakeReader(_cml_pairs()), region_cols=["ind.region"] + ) + assert "ind.region" in result.columns + assert "stein.region" not in result.columns + + def test_non_region_columns_not_pulled(self): + """contact_1 exists in cml_pairs but is not a region col -> dropped.""" + pairs = pd.DataFrame({"name": ["A1-A2"]}) + result = enrich_pairs_with_cml_regions(pairs, reader=FakeReader(_cml_pairs())) + assert "contact_1" not in result.columns + + def test_strips_whitespace_on_join_keys(self): + pairs = pd.DataFrame({"name": [" A1-A2 "]}) + cml = _cml_pairs() + cml.loc[0, "label"] = " A1-A2 " + result = enrich_pairs_with_cml_regions(pairs, reader=FakeReader(cml)) + assert result.iloc[0]["ind.region"] == "hippocampus" + + def test_left_join_keeps_unmatched_pairs(self): + pairs = pd.DataFrame({"name": ["A1-A2", "Z9-Z10"]}) + result = enrich_pairs_with_cml_regions(pairs, reader=FakeReader(_cml_pairs())) + assert len(result) == 2 + assert result.iloc[0]["ind.region"] == "hippocampus" + assert pd.isna(result.iloc[1]["ind.region"]) + + def test_does_not_mutate_input(self): + pairs = pd.DataFrame({"name": ["A1-A2"]}) + enrich_pairs_with_cml_regions(pairs, reader=FakeReader(_cml_pairs())) + assert "ind.region" not in pairs.columns + + def test_custom_label_col(self): + pairs = pd.DataFrame({"pair": ["A1-A2"]}) + result = enrich_pairs_with_cml_regions( + pairs, reader=FakeReader(_cml_pairs()), label_col="pair" + ) + assert result.iloc[0]["ind.region"] == "hippocampus" + + def test_missing_label_column_raises(self): + bad = _cml_pairs().drop(columns=["label"]) + pairs = pd.DataFrame({"name": ["A1-A2"]}) + with pytest.raises(KeyError): + enrich_pairs_with_cml_regions(pairs, reader=FakeReader(bad)) + + def test_no_reader_and_incomplete_args_raises(self): + pairs = pd.DataFrame({"name": ["A1-A2"]}) + with pytest.raises(ValueError): + enrich_pairs_with_cml_regions(pairs, subject="R1001P") # no exp/session