-
Notifications
You must be signed in to change notification settings - Fork 8
Adding an option to fit BIDS-study layout (updated) #369
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9bb4256
3535cd8
24570d1
6295329
42a8cea
39b58f7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ | |
|
|
||
| import datalad.api as dlapi | ||
| import pandas as pd | ||
| import yaml | ||
|
|
||
| from babs.input_datasets import InputDatasets, OutputDatasets | ||
| from babs.scheduler import ( | ||
|
|
@@ -45,7 +46,7 @@ | |
| class BABS: | ||
| """The BABS base class holds common attributes and methods for all BABS classes.""" | ||
|
|
||
| def __init__(self, project_root): | ||
| def __init__(self, project_root, container_config=None): | ||
| """The BABS class is for babs projects of BIDS Apps. | ||
|
|
||
| The constructor only initializes the attributes. | ||
|
|
@@ -108,13 +109,28 @@ def __init__(self, project_root): | |
| # attributes: | ||
| self.project_root = str(project_root) | ||
|
|
||
| self.analysis_path = op.join(self.project_root, 'analysis') | ||
| if container_config is not None: | ||
| with open(container_config) as f: | ||
| cfg = yaml.safe_load(f) | ||
| else: | ||
| root_config_path = op.join(self.project_root, '.babs', 'babs_init_config.yaml') | ||
| cfg = {} | ||
| if op.exists(root_config_path): | ||
| with open(root_config_path) as f: | ||
| cfg = yaml.safe_load(f) or {} | ||
|
|
||
| analysis_dir = cfg.get('analysis_path', 'analysis') | ||
| self.analysis_path = op.normpath(op.join(self.project_root, analysis_dir)) | ||
| self._analysis_datalad_handle = None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
if a config uses Suggested fix: centralize path resolution with |
||
|
|
||
| self.config_path = op.join(self.analysis_path, 'code/babs_proj_config.yaml') | ||
|
|
||
| self.input_ria_path = op.join(self.project_root, 'input_ria') | ||
| self.output_ria_path = op.join(self.project_root, 'output_ria') | ||
| self.input_ria_path = op.normpath( | ||
| op.join(self.project_root, cfg.get('input_ria_path', 'input_ria')) | ||
| ) | ||
| self.output_ria_path = op.normpath( | ||
| op.join(self.project_root, cfg.get('output_ria_path', 'output_ria')) | ||
| ) | ||
|
|
||
| self.input_ria_url = 'ria+file://' + self.input_ria_path | ||
| self.output_ria_url = 'ria+file://' + self.output_ria_path | ||
|
|
@@ -128,6 +144,7 @@ def __init__(self, project_root): | |
| self.job_status_path_rel = 'code/job_status.csv' | ||
| self.job_status_path_abs = op.join(self.analysis_path, self.job_status_path_rel) | ||
| self.job_submit_path_abs = op.join(self.analysis_path, 'code/job_submit.csv') | ||
| self.analysis_root = op.dirname(self.analysis_path) | ||
| self._shared_group_enabled_cache = None | ||
| self._apply_config() | ||
|
|
||
|
|
@@ -178,7 +195,7 @@ def _apply_config(self) -> None: | |
| self.wtf_key_info(flag_output_ria_only=True) | ||
|
|
||
| self.input_datasets = InputDatasets(self.processing_level, config_yaml['input_datasets']) | ||
| self.input_datasets.update_abs_paths(Path(self.project_root) / 'analysis') | ||
| self.input_datasets.update_abs_paths(Path(self.analysis_path)) | ||
| self.ensure_shared_group_git_safe_directories() | ||
|
|
||
| def _validate_pipeline_config(self) -> None: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| import csv | ||
| import os | ||
| import os.path as op | ||
| import shutil | ||
| import subprocess | ||
| import tempfile | ||
| from pathlib import Path | ||
|
|
@@ -25,6 +26,9 @@ | |
| class BABSBootstrap(BABS): | ||
| """A BABS subclass that implements the bootstrap process.""" | ||
|
|
||
| def __init__(self, project_root, container_config=None): | ||
| super().__init__(project_root, container_config=container_config) | ||
|
|
||
| def _apply_config(self): | ||
| pass | ||
|
|
||
|
|
@@ -114,14 +118,24 @@ def babs_bootstrap( | |
| self.queue = validate_queue(queue) | ||
| system = System(self.queue) | ||
|
|
||
| # Create `analysis` folder: ----------------------------- | ||
| # Create analysis folder: ----------------------------- | ||
| print('DataLad version: ' + get_datalad_version()) | ||
| print('\nCreating `analysis` folder (also a datalad dataset)...') | ||
| print(f'\nCreating `{self.analysis_path}` folder (also a datalad dataset)...') | ||
| create_kwargs = {'cfg_proc': 'yoda', 'annex': True} | ||
| if self.shared_group is not None: | ||
| create_kwargs['initopts'] = ['--shared=group'] | ||
| self._analysis_datalad_handle = dlapi.create(self.analysis_path, **create_kwargs) | ||
| self.input_datasets.update_abs_paths(Path(self.analysis_path)) | ||
|
|
||
| # Persist original config so other BABS commands can find it: | ||
| babs_dir = op.join(self.project_root, '.babs') | ||
| os.makedirs(babs_dir, exist_ok=True) | ||
| shutil.copy2(container_config, op.join(babs_dir, 'babs_init_config.yaml')) | ||
| if op.normpath(self.analysis_path) == op.normpath(self.project_root): | ||
| self.datalad_save( | ||
| path='.babs/babs_init_config.yaml', | ||
| message='Save babs init config', | ||
| ) | ||
| self.input_datasets.set_inclusion_dataframe(initial_inclusion_df, processing_level) | ||
|
|
||
| # Prepare `.gitignore` ------------------------------ | ||
|
|
@@ -132,6 +146,9 @@ def babs_bootstrap( | |
| os.remove(gitignore_path) | ||
| gitignore_file = open(gitignore_path, 'a') # open in append mode | ||
|
|
||
| # not to track input/output RIA stores: | ||
| gitignore_file.write('\n' + op.basename(self.input_ria_path)) | ||
| gitignore_file.write('\n' + op.basename(self.output_ria_path)) | ||
| # not to track `logs` folder: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested fix: when RIA paths are inside analysis_path, write paths relative to analysis_path. |
||
| gitignore_file.write('\nlogs') | ||
| # not to track `.*_datalad_lock`: | ||
|
|
@@ -153,7 +170,7 @@ def babs_bootstrap( | |
|
|
||
| # Create `babs_proj_config.yaml` file: ---------------------- | ||
| print('Save BABS project configurations in a YAML file ...') | ||
| print("Path to this yaml file will be: 'analysis/code/babs_proj_config.yaml'") | ||
| print(f"Path to this yaml file will be: '{self.config_path}'") | ||
|
|
||
| env = Environment( | ||
| loader=PackageLoader('babs', 'templates'), | ||
|
|
@@ -165,6 +182,7 @@ def babs_bootstrap( | |
| with open(self.config_path, 'w') as f: | ||
| f.write( | ||
| template.render( | ||
| analysis_dir=op.basename(self.analysis_path), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| processing_level=self.processing_level, | ||
| queue=self.queue, | ||
| input_ds=self.input_datasets, | ||
|
|
@@ -443,6 +461,7 @@ def _bootstrap_single_app_scripts( | |
| self.processing_level, | ||
| system, | ||
| project_root=op.dirname(self.analysis_path), | ||
| analysis_dir=op.basename(self.analysis_path), | ||
| shared_group_mode=shared_group_mode, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nested but PROJECT_ROOT in the generated environment still means the real project root. This is fragile and confusing, and breaks easily for external callers or future script changes. Suggested fix: pass a single |
||
| ) | ||
|
|
||
|
|
@@ -518,6 +537,7 @@ def _bootstrap_pipeline_scripts(self, container_ds, container_config, system): | |
| container_images=container_images, | ||
| datalad_run_message='pipeline', | ||
| project_root=op.dirname(self.analysis_path), | ||
| analysis_dir=op.basename(self.analysis_path), | ||
| ) | ||
|
|
||
| with open(bash_path, 'w') as f: | ||
|
|
@@ -599,6 +619,8 @@ def clean_up(self): | |
| if op.exists(self.analysis_path): # analysis folder is created by datalad | ||
| print('Removing input dataset(s) if cloned...') | ||
| for in_ds in self.input_datasets: | ||
| if in_ds._babs_project_analysis_path is None: | ||
| continue | ||
| if op.exists(in_ds.babs_project_analysis_path): | ||
| # use `datalad remove` to remove: | ||
| _ = self.analysis_datalad_handle.remove( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -206,7 +206,7 @@ def babs_init_main( | |
|
|
||
| from babs import BABSBootstrap | ||
|
|
||
| babs_proj = BABSBootstrap(project_root) | ||
| babs_proj = BABSBootstrap(project_root, container_config=container_config) | ||
| try: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Direct API usage like silently ignores custom analysis_path. Suggested fix: either require |
||
| babs_proj.babs_bootstrap( | ||
| processing_level, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -157,6 +157,7 @@ def generate_bash_participant_job( | |
| processing_level, | ||
| system, | ||
| project_root=None, | ||
| analysis_dir='analysis', | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. new arg but docstring wasn't updated |
||
| shared_group_mode=False, | ||
| ): | ||
| """Generate bash script for participant job. | ||
|
|
@@ -188,6 +189,7 @@ def generate_bash_participant_job( | |
| container_name=self.container_name, | ||
| zip_foldernames=self.config['zip_foldernames'], | ||
| project_root=project_root, | ||
| analysis_dir=analysis_dir, | ||
| ) | ||
|
|
||
| with open(bash_path, 'w') as f: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,5 +2,5 @@ | |
| # '${max_array}' is a placeholder. | ||
| {% endif %} | ||
|
|
||
| cmd_template: '{{ submit_head }} {{ env_flags }} {{ name_flag_str }}{{ job_name }} {{ eo_args }} {{ array_args }} {% if test %}{{ babs.analysis_path }}/code/check_setup/call_test_job.sh{% else %}{{ babs.analysis_path }}/code/participant_job.sh {{ dssource }} {{ pushgitremote }} {{ babs.job_submit_path_abs }} {{ babs.project_root }}{% endif %}' | ||
| cmd_template: '{{ submit_head }} {{ env_flags }} {{ name_flag_str }}{{ job_name }} {{ eo_args }} {{ array_args }} {% if test %}{{ babs.analysis_path }}/code/check_setup/call_test_job.sh{% else %}{{ babs.analysis_path }}/code/participant_job.sh {{ dssource }} {{ pushgitremote }} {{ babs.job_submit_path_abs }} {{ babs.analysis_root }}{% endif %}' | ||
| job_name_template: '{{ job_name }}' | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. generated command strings are split with plain .split(). spaces in project paths, RIA paths, or analysis paths will break job submission. Suggested fix: build argv as structured data, or render shell-quoted strings and parse with shlex.split(). |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| # This is an example config yaml file for: | ||
| # BIDS App: freesurfer-post | ||
| # BIDS App version: 0.1.2 | ||
| # Task: Creating tabular outputs from FreeSurfer results | ||
| # Which layout: BIDS study layout | ||
| # Which system: Slurm | ||
|
|
||
| # WARNING!!! | ||
| # This is only an example, which may not necessarily fit your purpose, | ||
| # or be an optimized solution for your case, | ||
| # or be compatible to the BIDS App version you're using. | ||
| # Therefore, please change and tailor it for your case before use it!!! | ||
|
|
||
| # In BIDS study layout, initialize the BABS project inside the study's derivatives/ folder, for example: | ||
| # babs init ... /path/to/BIDS_STUDY/derivatives/freesurferpost-0-1-2-babs | ||
| # | ||
| # With `analysis_path: "."`, the derivative project directory itself is the BABS analysis dataset. | ||
| # Internal BABS RIA stores and the init config live under `.babs/`. | ||
| analysis_path: "." | ||
| input_ria_path: ".babs/input_ria" | ||
| output_ria_path: ".babs/output_ria" | ||
|
|
||
| # Define the input datasets | ||
| input_datasets: | ||
| fmriprep_anat: | ||
| required_files: | ||
| - "*fmriprep_anat*.zip" | ||
| is_zipped: true | ||
| # [FIX ME] path to zipped fMRIPrep anatomical outputs, usually from a BABS fMRIPrep --anat-only project. | ||
| # If that upstream project used the original BABS layout, use: | ||
| # ria+file:///path/to/BIDS_STUDY/derivatives/fmriprep_anat-24-1-1-babs/output_ria#~data | ||
| origin_url: "ria+file:///path/to/BIDS_STUDY/derivatives/fmriprep_anat-24-1-1-babs/.babs/output_ria#~data" | ||
| unzipped_path_containing_subject_dirs: sourcedata/fmriprep_anat/fmriprep_anat | ||
| path_in_babs: sourcedata/fmriprep_anat | ||
|
|
||
| # Arguments in `singularity run`: | ||
| bids_app_args: | ||
| $SUBJECT_SELECTION_FLAG: "--subject-id" | ||
| $SESSION_SELECTION_FLAG: "--session-id" | ||
| -w: "$BABS_TMPDIR" | ||
| --fs-license-file: "/path/to/FreeSurfer/license.txt" # [FIX ME] path to FreeSurfer license file | ||
| --subjects-dir: '"${PWD}"/sourcedata/fmriprep_anat/fmriprep_anat/sourcedata/freesurfer' | ||
|
|
||
| # Arguments that are passed directly to singularity/apptainer: | ||
| singularity_args: | ||
| - --cleanenv | ||
| - --containall | ||
| - --writable-tmpfs | ||
|
|
||
| # Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): | ||
| all_results_in_one_zip: true | ||
| zip_foldernames: | ||
| freesurfer-post: "0-1-2" # folder 'freesurfer-post' will be zipped into 'sub-xx_(ses-yy_)freesurfer-post-0-1-2.zip' | ||
|
|
||
| # How much cluster resources it needs: | ||
| cluster_resources: | ||
| interpreting_shell: "/bin/bash" | ||
| hard_runtime_limit: "01:00:00" | ||
| temporary_disk_space: 5G | ||
| customized_text: | | ||
| #SBATCH -p all | ||
| #SBATCH --nodes=1 | ||
| #SBATCH --ntasks=1 | ||
| #SBATCH --cpus-per-task=1 | ||
| #SBATCH --mem=5G | ||
| #SBATCH --propagate=NONE | ||
|
|
||
| # Necessary commands to be run first: | ||
| # [FIX ME] change or add commands for setting up the virtual environment, for loading necessary modules, etc | ||
| script_preamble: | | ||
| source "${CONDA_PREFIX}"/bin/activate babs # you may need to change this to work with your environment manager. | ||
| source xxxx # [FIX ME or DELETE ME] source any necessary program | ||
| module load xxxx # [FIX ME or DELETE ME] source any necessary program | ||
|
|
||
| # Where to run the jobs: | ||
| job_compute_space: "path/to/temporary_compute_space" # [FIX ME] replace "/path/to/temporary_compute_space" with yours |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
empty YAML returns None, causing an opaque AttributeError.
Suggested fix: validate with something like: