From d2ad8218327669035d3b1dcd9393644ed43f8579 Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Thu, 7 May 2026 15:17:51 +0200 Subject: [PATCH] Replace pybids with ancpBIDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces pybids and bids-validator with ancpbids as the BIDS layout engine. A layout_to_df() shim in converter.py bridges the ancpBIDS native API to the pandas DataFrame expected by the rest of the pipeline, so main.py and utility.py are unchanged. ancpBIDS correctly treats CTF MEG .ds directories as single artifacts rather than enumerating their internal files (BadChannels, ClassFile.cls, etc.), which is the correct BIDS interpretation. The ds000247 expected file count in the test suite is updated accordingly (202 → 120). --- bids2openminds/converter.py | 56 +++++++++++++++++++++++++++++++++++-- pyproject.toml | 3 +- test/test_bids_examples.py | 4 ++- test/test_task.py | 2 +- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/bids2openminds/converter.py b/bids2openminds/converter.py index b2f63ca..64d18bd 100644 --- a/bids2openminds/converter.py +++ b/bids2openminds/converter.py @@ -1,12 +1,62 @@ import warnings -from bids import BIDSLayout, BIDSValidator -from openminds import Collection import os +import pandas as pd +from ancpbids import BIDSLayout +from ancpbids.query import Artifact +from ancpbids.model_base import DatatypeFolder +from openminds import Collection import click from . import main from . import utility from . import report +_ENTITY_RENAMES = {"sub": "subject", "ses": "session"} + +# Root-level BIDS files that ancpBIDS does not expose as Artifacts +_ROOT_BIDS_FILES = [ + ("dataset_description.json", "description", ".json"), + ("participants.tsv", "participants", ".tsv"), + ("participants.json", "participants", ".json"), + ("CHANGES", None, None), + ("README", None, None), + ("README.md", None, None), +] + + +def layout_to_df(layout): + dataset = layout.get_dataset() + rows = [] + + for obj in layout.get(return_type='object', scope='raw'): + if not isinstance(obj, Artifact): + continue + parent = obj.get_parent() + datatype = parent.name if isinstance(parent, DatatypeFolder) else None + row = { + "path": obj.get_absolute_path(), + "suffix": obj.suffix, + "datatype": datatype, + "extension": obj.extension, + } + for entity in obj.entities: + key = _ENTITY_RENAMES.get(entity.key, entity.key) + row[key] = entity.value + rows.append(row) + + base_dir = os.path.abspath(dataset.base_dir_) + for fname, suffix, extension in _ROOT_BIDS_FILES: + path = os.path.join(base_dir, fname) + if not os.path.exists(path): + continue + row = {"path": path, "datatype": None} + if suffix is not None: + row["suffix"] = suffix + if extension is not None: + row["extension"] = extension + rows.append(row) + + return pd.DataFrame(rows) + def convert(input_path, save_output=False, output_path=None, multiple_files=False, include_empty_properties=False, quiet=False): if not (os.path.isdir(input_path)): @@ -23,7 +73,7 @@ def convert(input_path, save_output=False, output_path=None, multiple_files=Fal collection = Collection() bids_layout = BIDSLayout(input_path) - layout_df = bids_layout.to_df() + layout_df = layout_to_df(bids_layout) subjects_id = bids_layout.get_subjects() diff --git a/pyproject.toml b/pyproject.toml index 11a419f..6a61853 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,7 @@ name = "bids2openminds" version = "0.1.1" dependencies = [ - "bids-validator == 1.14.6" , - "bids", + "ancpbids", "openminds >= 0.2.3", "click>=8.1", "pandas", diff --git a/test/test_bids_examples.py b/test/test_bids_examples.py index 064f1b8..bd068e9 100644 --- a/test/test_bids_examples.py +++ b/test/test_bids_examples.py @@ -6,7 +6,9 @@ # Dataset information in following order dataset_label, dataset_subject_number, dataset_subject_state_number, dataset_person_number, dataset_files_number, dataset_file_bundles_number, dataset_behavioral_protocol_number example_dataset = [("ds003", 13, 13, 2, 58, 39, 1), - ("ds000247", 6, 10, 5, 202, 41, 2), + # ancpBIDS treats CTF MEG .ds directories as single artifacts (not their internal files), + # so file count is lower than with pybids (which listed each file inside .ds separately) + ("ds000247", 6, 10, 5, 120, 41, 2), # The authors list in 'eeg_cbm' contains non person entities 2 is not correct name (issue raied #43) ("eeg_cbm", 20, 20, 2, 104, 40, 1), ("asl001", 1, 1, 2, 8, 3, 0), diff --git a/test/test_task.py b/test/test_task.py index 2876917..2b2dbe1 100644 --- a/test/test_task.py +++ b/test/test_task.py @@ -1,5 +1,5 @@ import pytest -from bids import BIDSLayout +from ancpbids import BIDSLayout from openminds import Collection import os from bids2openminds.main import create_behavioral_protocol