Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions babs/input_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(
is_zipped,
unzipped_path_containing_subject_dirs=None,
required_files=None,
common_paths=None,
processing_level=None,
babs_project_analysis_path=None,
):
Expand All @@ -43,6 +44,11 @@ def __init__(
when unzipped, this string precedes the subject directories
required_files: list of str or None
list of required files in the input dataset
common_paths: list of str or None
paths relative to the dataset root to include in the sparse-checkout for every job,
in addition to the per-subject (and per-session) path.
Defaults to ``["dataset_description.json"]`` when ``None``.
Pass an empty list to disable all common-path inclusion.
processing_level: {'subject', 'session'} or None
whether processing is done on a subject-wise or session-wise basis
babs_project_analysis_path: str or None
Expand All @@ -57,6 +63,7 @@ def __init__(
else:
self.is_zipped = bool(is_zipped)
self.required_files = required_files
self.common_paths = ['dataset_description.json'] if common_paths is None else common_paths
if processing_level not in ['subject', 'session']:
raise ValueError('invalid `processing_level`!')
self.processing_level = processing_level
Expand Down Expand Up @@ -269,6 +276,7 @@ def as_dict(self):
'is_zipped': self.is_zipped,
'unzipped_path_containing_subject_dirs': unzipped_path,
'required_files': self.required_files,
'common_paths': self.common_paths,
'processing_level': self.processing_level,
'babs_project_analysis_path': self.babs_project_analysis_path,
}
Expand Down Expand Up @@ -433,4 +441,5 @@ def __init__(self, input_dataset):
input_dataset.unzipped_path_containing_subject_dirs
)
self.required_files = input_dataset.required_files
self.common_paths = input_dataset.common_paths
self.processing_level = input_dataset.processing_level
10 changes: 7 additions & 3 deletions babs/templates/participant_job.sh.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ echo "# Pull down the input session but don't retrieve data contents:"
{% if not input_dataset['is_zipped'] %}
datalad get -n "{{ input_dataset['path_in_babs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}"

datalad get -n "{{ input_dataset['path_in_babs'] }}/dataset_description.json"
{% for common_path in input_dataset['common_paths'] %}
echo "# Getting common path: {{ input_dataset['path_in_babs'] }}/{{ common_path }}"
datalad get -n "{{ input_dataset['path_in_babs'] }}/{{ common_path }}"
{% endfor %}
{% else %}
datalad get -n "{{ input_dataset['path_in_babs'] }}"
{% endif %}
Expand All @@ -99,7 +102,7 @@ datalad get -n "{{ input_dataset['path_in_babs'] }}"
if [ -d "{{ input_dataset['path_in_babs'] }}/.git" ]; then
( cd "{{ input_dataset['path_in_babs'] }}" && \
( git sparse-checkout init --no-cone 2>/dev/null && \
{ echo "{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}"; echo 'dataset_description.json'; } | git sparse-checkout set --stdin 2>/dev/null ) ) || true
{ echo "{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}"; {% for common_path in input_dataset['common_paths'] %}echo '{{ common_path }}'; {% endfor %}} | git sparse-checkout set --stdin 2>/dev/null ) ) || true
fi
{% endif %}
{% endfor %}
Expand Down Expand Up @@ -137,7 +140,8 @@ datalad run \
{% for input_dataset in input_datasets %}
{% if not input_dataset['is_zipped'] %}
-i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \
-i "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \
{% for common_path in input_dataset['common_paths'] %} -i "{{ input_dataset['path_in_babs'] }}/{{ common_path }}" \
{% endfor %}
{% else %}
-i "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \
{% endif %}
Expand Down
64 changes: 63 additions & 1 deletion docs/preparation_config_yaml_file.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Sections in the configuration YAML file
* **all_results_in_one_zip**: whether to zip all results in one zip file;
* **zip_foldernames**: the results foldername(s) to be zipped;
* **required_files**: to only keep subjects (sessions) that have this list of required files in input dataset(s);
* **common_paths**: dataset-root paths to include in the sparse-checkout for every job, in addition to the per-subject path (e.g., a shared ``phenotype/participants.tsv`` file);
* **alert_log_messages**: alert messages in the log files that may be helpful for debugging errors in failed jobs;

Among these sections, these sections are optional:
Expand All @@ -40,6 +41,7 @@ Among these sections, these sections are optional:
* You must include this section if there are more one input dataset.

* **required_files**
* **common_paths**
* **alert_log_messages**
* **imported_files**

Expand Down Expand Up @@ -103,7 +105,7 @@ Example section **input_datasets**
unzipped_path_containing_subject_dirs: "freesurfer"
path_in_babs: inputs/data/freesurfer

This example shows two input datasets:
This example shows two input datasets:
one is a raw BIDS dataset, and the other is a zipped FreeSurfer results from another BABS project.
Previously, the commandline to use something like this would have required::

Expand Down Expand Up @@ -773,4 +775,64 @@ Notes:

.. _required_files:

Section ``required_files``
==========================

.. note::

``required_files`` is currently not fully implemented.
The field is accepted in the YAML file but filtering is not yet applied.

.. _common-paths:

Section ``common_paths``
=========================

The ``common_paths`` field lists paths (relative to an input dataset's root)
that every job should include in the sparse-checkout and retrieve with
``datalad get``, in addition to the per-subject (and per-session) path.
This is useful when BIDS Apps or processing scripts need dataset-level files
that live outside any individual subject directory.

By default (when the field is omitted), BABS automatically includes
``dataset_description.json`` for every non-zipped input dataset.
Once you supply ``common_paths`` explicitly, the default is **replaced** —
so if you still want ``dataset_description.json`` you must list it yourself.

``common_paths`` is optional. It is nested under the relevant input dataset
entry inside the ``input_datasets`` section.

Example — keep the default ``dataset_description.json`` **and** add a shared
phenotype file:

.. code-block:: yaml

input_datasets:
BIDS:
is_zipped: false
origin_url: "/path/to/BIDS"
path_in_babs: inputs/data/BIDS
common_paths:
- "phenotype/participants.tsv"
- "dataset_description.json"

Example — disable all common-path retrieval (pass an empty list):

.. code-block:: yaml

input_datasets:
BIDS:
is_zipped: false
origin_url: "/path/to/BIDS"
path_in_babs: inputs/data/BIDS
common_paths: []

Notes:

* Paths are relative to the input dataset root (e.g., ``"phenotype/participants.tsv"``
not ``"inputs/data/BIDS/phenotype/participants.tsv"``).
* Each path is retrieved individually with ``datalad get -n`` so you can track
exactly which files are fetched in the job log.
* This field has no effect on zipped input datasets.


2 changes: 2 additions & 0 deletions tests/test_generate_submit_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
'path_in_babs': 'inputs/data/BIDS',
'unzipped_path_containing_subject_dirs': 'inputs/data/BIDS',
'is_zipped': False,
'common_paths': ['dataset_description.json'],
},
]

Expand All @@ -29,6 +30,7 @@
'path_in_babs': 'inputs/data/BIDS',
'unzipped_path_containing_subject_dirs': 'inputs/data/BIDS',
'is_zipped': False,
'common_paths': [],
},
]

Expand Down
Loading