From ae1e6934899832a5300215cb2daf91406bac9a4c Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Wed, 13 May 2026 15:00:03 -0400 Subject: [PATCH 1/4] Add `common_paths` to input dataset config for non-subject files Replaces the hard-coded `dataset_description.json` in sparse-checkout, `datalad get`, and `datalad run -i` with a configurable `common_paths` list on each input dataset entry. Defaults to `["dataset_description.json"]` to preserve existing behaviour. An empty list disables all common-path inclusion. Closes #374 Co-Authored-By: Claude Sonnet 4.6 --- babs/input_dataset.py | 9 +++++++++ babs/templates/participant_job.sh.jinja2 | 9 ++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/babs/input_dataset.py b/babs/input_dataset.py index 45ec2ed6..ec138032 100644 --- a/babs/input_dataset.py +++ b/babs/input_dataset.py @@ -24,6 +24,7 @@ def __init__( is_zipped, unzipped_path_containing_subject_dirs=None, required_files=None, + common_paths=None, processing_level=None, babs_project_analysis_path=None, ): @@ -43,6 +44,11 @@ def __init__( when unzipped, this string precedes the subject directories required_files: list of str or None list of required files in the input dataset + common_paths: list of str or None + paths relative to the dataset root to include in the sparse-checkout for every job, + in addition to the per-subject (and per-session) path. + Defaults to ``["dataset_description.json"]`` when ``None``. + Pass an empty list to disable all common-path inclusion. processing_level: {'subject', 'session'} or None whether processing is done on a subject-wise or session-wise basis babs_project_analysis_path: str or None @@ -57,6 +63,7 @@ def __init__( else: self.is_zipped = bool(is_zipped) self.required_files = required_files + self.common_paths = ['dataset_description.json'] if common_paths is None else common_paths if processing_level not in ['subject', 'session']: raise ValueError('invalid `processing_level`!') self.processing_level = processing_level @@ -269,6 +276,7 @@ def as_dict(self): 'is_zipped': self.is_zipped, 'unzipped_path_containing_subject_dirs': unzipped_path, 'required_files': self.required_files, + 'common_paths': self.common_paths, 'processing_level': self.processing_level, 'babs_project_analysis_path': self.babs_project_analysis_path, } @@ -433,4 +441,5 @@ def __init__(self, input_dataset): input_dataset.unzipped_path_containing_subject_dirs ) self.required_files = input_dataset.required_files + self.common_paths = input_dataset.common_paths self.processing_level = input_dataset.processing_level diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index f3151cea..5a3683fa 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -87,7 +87,9 @@ echo "# Pull down the input session but don't retrieve data contents:" {% if not input_dataset['is_zipped'] %} datalad get -n "{{ input_dataset['path_in_babs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" -datalad get -n "{{ input_dataset['path_in_babs'] }}/dataset_description.json" +{% for common_path in input_dataset['common_paths'] %} +datalad get -n "{{ input_dataset['path_in_babs'] }}/{{ common_path }}" +{% endfor %} {% else %} datalad get -n "{{ input_dataset['path_in_babs'] }}" {% endif %} @@ -99,7 +101,7 @@ datalad get -n "{{ input_dataset['path_in_babs'] }}" if [ -d "{{ input_dataset['path_in_babs'] }}/.git" ]; then ( cd "{{ input_dataset['path_in_babs'] }}" && \ ( git sparse-checkout init --no-cone 2>/dev/null && \ - { echo "{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}"; echo 'dataset_description.json'; } | git sparse-checkout set --stdin 2>/dev/null ) ) || true + { echo "{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}"; {% for common_path in input_dataset['common_paths'] %}echo '{{ common_path }}'; {% endfor %}} | git sparse-checkout set --stdin 2>/dev/null ) ) || true fi {% endif %} {% endfor %} @@ -137,7 +139,8 @@ datalad run \ {% for input_dataset in input_datasets %} {% if not input_dataset['is_zipped'] %} -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \ - -i "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \ +{% for common_path in input_dataset['common_paths'] %} -i "{{ input_dataset['path_in_babs'] }}/{{ common_path }}" \ +{% endfor %} {% else %} -i "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \ {% endif %} From fb21ddfa7f17a90925b691f9013a92d924efde2d Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Wed, 13 May 2026 16:00:21 -0400 Subject: [PATCH 2/4] adding echo --- babs/templates/participant_job.sh.jinja2 | 1 + 1 file changed, 1 insertion(+) diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index 5a3683fa..5f26e886 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -88,6 +88,7 @@ echo "# Pull down the input session but don't retrieve data contents:" datalad get -n "{{ input_dataset['path_in_babs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" {% for common_path in input_dataset['common_paths'] %} +echo "# Getting common path: {{ input_dataset['path_in_babs'] }}/{{ common_path }}" datalad get -n "{{ input_dataset['path_in_babs'] }}/{{ common_path }}" {% endfor %} {% else %} From e3dafc819cbb73e337139e6955dd717d0292c1d4 Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Wed, 13 May 2026 16:18:41 -0400 Subject: [PATCH 3/4] docs: document common_paths field in preparation_config_yaml_file.rst Add common_paths to the section overview list and optional sections list, add a stub required_files section, and add a full common_paths section with examples and usage notes. Co-Authored-By: Claude Sonnet 4.6 --- docs/preparation_config_yaml_file.rst | 64 ++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/docs/preparation_config_yaml_file.rst b/docs/preparation_config_yaml_file.rst index 767a95ac..e2593aea 100644 --- a/docs/preparation_config_yaml_file.rst +++ b/docs/preparation_config_yaml_file.rst @@ -29,6 +29,7 @@ Sections in the configuration YAML file * **all_results_in_one_zip**: whether to zip all results in one zip file; * **zip_foldernames**: the results foldername(s) to be zipped; * **required_files**: to only keep subjects (sessions) that have this list of required files in input dataset(s); +* **common_paths**: dataset-root paths to include in the sparse-checkout for every job, in addition to the per-subject path (e.g., a shared ``phenotype/participants.tsv`` file); * **alert_log_messages**: alert messages in the log files that may be helpful for debugging errors in failed jobs; Among these sections, these sections are optional: @@ -40,6 +41,7 @@ Among these sections, these sections are optional: * You must include this section if there are more one input dataset. * **required_files** +* **common_paths** * **alert_log_messages** * **imported_files** @@ -103,7 +105,7 @@ Example section **input_datasets** unzipped_path_containing_subject_dirs: "freesurfer" path_in_babs: inputs/data/freesurfer -This example shows two input datasets: +This example shows two input datasets: one is a raw BIDS dataset, and the other is a zipped FreeSurfer results from another BABS project. Previously, the commandline to use something like this would have required:: @@ -773,4 +775,64 @@ Notes: .. _required_files: +Section ``required_files`` +========================== + +.. note:: + + ``required_files`` is currently not fully implemented. + The field is accepted in the YAML file but filtering is not yet applied. + +.. _common-paths: + +Section ``common_paths`` +========================= + +The ``common_paths`` field lists paths (relative to an input dataset's root) +that every job should include in the sparse-checkout and retrieve with +``datalad get``, in addition to the per-subject (and per-session) path. +This is useful when BIDS Apps or processing scripts need dataset-level files +that live outside any individual subject directory. + +By default (when the field is omitted), BABS automatically includes +``dataset_description.json`` for every non-zipped input dataset. +Once you supply ``common_paths`` explicitly, the default is **replaced** — +so if you still want ``dataset_description.json`` you must list it yourself. + +``common_paths`` is optional. It is nested under the relevant input dataset +entry inside the ``input_datasets`` section. + +Example — keep the default ``dataset_description.json`` **and** add a shared +phenotype file: + +.. code-block:: yaml + + input_datasets: + BIDS: + is_zipped: false + origin_url: "/path/to/BIDS" + path_in_babs: inputs/data/BIDS + common_paths: + - "phenotype/participants.tsv" + - "dataset_description.json" + +Example — disable all common-path retrieval (pass an empty list): + +.. code-block:: yaml + + input_datasets: + BIDS: + is_zipped: false + origin_url: "/path/to/BIDS" + path_in_babs: inputs/data/BIDS + common_paths: [] + +Notes: + +* Paths are relative to the input dataset root (e.g., ``"phenotype/participants.tsv"`` + not ``"inputs/data/BIDS/phenotype/participants.tsv"``). +* Each path is retrieved individually with ``datalad get -n`` so you can track + exactly which files are fetched in the job log. +* This field has no effect on zipped input datasets. + From 443dcf66a0d2a0b79ee8a99fddceced9186639ac Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Thu, 21 May 2026 08:59:56 -0500 Subject: [PATCH 4/4] Fixup tests Add common paths to the submit script test inputs. No need to add to the zipped ones, they are skipped. --- tests/test_generate_submit_script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_generate_submit_script.py b/tests/test_generate_submit_script.py index 8f4e832b..7ee9f610 100644 --- a/tests/test_generate_submit_script.py +++ b/tests/test_generate_submit_script.py @@ -14,6 +14,7 @@ 'path_in_babs': 'inputs/data/BIDS', 'unzipped_path_containing_subject_dirs': 'inputs/data/BIDS', 'is_zipped': False, + 'common_paths': ['dataset_description.json'], }, ] @@ -29,6 +30,7 @@ 'path_in_babs': 'inputs/data/BIDS', 'unzipped_path_containing_subject_dirs': 'inputs/data/BIDS', 'is_zipped': False, + 'common_paths': [], }, ]