diff --git a/bids2openminds/converter.py b/bids2openminds/converter.py index d8037dc..1a545f3 100644 --- a/bids2openminds/converter.py +++ b/bids2openminds/converter.py @@ -1,12 +1,62 @@ import warnings -from bids import BIDSLayout, BIDSValidator -from openminds import Collection import os +import pandas as pd +from ancpbids import BIDSLayout +from ancpbids.query import Artifact +from ancpbids.model_base import DatatypeFolder +from openminds import Collection import click from . import main from . import utility from . import report +_ENTITY_RENAMES = {"sub": "subject", "ses": "session"} + +# Root-level BIDS files that ancpBIDS does not expose as Artifacts +_ROOT_BIDS_FILES = [ + ("dataset_description.json", "description", ".json"), + ("participants.tsv", "participants", ".tsv"), + ("participants.json", "participants", ".json"), + ("CHANGES", None, None), + ("README", None, None), + ("README.md", None, None), +] + + +def layout_to_df(layout): + dataset = layout.get_dataset() + rows = [] + + for obj in layout.get(return_type='object', scope='raw'): + if not isinstance(obj, Artifact): + continue + parent = obj.get_parent() + datatype = parent.name if isinstance(parent, DatatypeFolder) else None + row = { + "path": obj.get_absolute_path(), + "suffix": obj.suffix, + "datatype": datatype, + "extension": obj.extension, + } + for entity in obj.entities: + key = _ENTITY_RENAMES.get(entity.key, entity.key) + row[key] = entity.value + rows.append(row) + + base_dir = os.path.abspath(dataset.base_dir_) + for fname, suffix, extension in _ROOT_BIDS_FILES: + path = os.path.join(base_dir, fname) + if not os.path.exists(path): + continue + row = {"path": path, "datatype": None} + if suffix is not None: + row["suffix"] = suffix + if extension is not None: + row["extension"] = extension + rows.append(row) + + return pd.DataFrame(rows) + def convert(input_path, save_output=False, output_path=None, multiple_files=False, include_empty_properties=False, quiet=False): if not (os.path.isdir(input_path)): @@ -23,7 +73,7 @@ def convert(input_path, save_output=False, output_path=None, multiple_files=Fal collection = Collection() bids_layout = BIDSLayout(input_path) - layout_df = bids_layout.to_df() + layout_df = layout_to_df(bids_layout) subjects_id = bids_layout.get_subjects() diff --git a/pyproject.toml b/pyproject.toml index 82bc440..07f75ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,7 @@ name = "bids2openminds" version = "0.2.0" dependencies = [ - "bids-validator == 1.14.6" , - "bids", + "ancpbids", "openminds >= 0.2.3", "click>=8.1", "pandas", diff --git a/test/test_bids_examples.py b/test/test_bids_examples.py index 064f1b8..bd068e9 100644 --- a/test/test_bids_examples.py +++ b/test/test_bids_examples.py @@ -6,7 +6,9 @@ # Dataset information in following order dataset_label, dataset_subject_number, dataset_subject_state_number, dataset_person_number, dataset_files_number, dataset_file_bundles_number, dataset_behavioral_protocol_number example_dataset = [("ds003", 13, 13, 2, 58, 39, 1), - ("ds000247", 6, 10, 5, 202, 41, 2), + # ancpBIDS treats CTF MEG .ds directories as single artifacts (not their internal files), + # so file count is lower than with pybids (which listed each file inside .ds separately) + ("ds000247", 6, 10, 5, 120, 41, 2), # The authors list in 'eeg_cbm' contains non person entities 2 is not correct name (issue raied #43) ("eeg_cbm", 20, 20, 2, 104, 40, 1), ("asl001", 1, 1, 2, 8, 3, 0), diff --git a/test/test_task.py b/test/test_task.py index 2876917..2b2dbe1 100644 --- a/test/test_task.py +++ b/test/test_task.py @@ -1,5 +1,5 @@ import pytest -from bids import BIDSLayout +from ancpbids import BIDSLayout from openminds import Collection import os from bids2openminds.main import create_behavioral_protocol