diff --git a/babs/base.py b/babs/base.py index a5a2ea01..cc8b520f 100644 --- a/babs/base.py +++ b/babs/base.py @@ -10,6 +10,7 @@ import datalad.api as dlapi import pandas as pd +import yaml from babs.input_datasets import InputDatasets, OutputDatasets from babs.scheduler import ( @@ -45,7 +46,7 @@ class BABS: """The BABS base class holds common attributes and methods for all BABS classes.""" - def __init__(self, project_root): + def __init__(self, project_root, container_config=None): """The BABS class is for babs projects of BIDS Apps. The constructor only initializes the attributes. @@ -108,13 +109,28 @@ def __init__(self, project_root): # attributes: self.project_root = str(project_root) - self.analysis_path = op.join(self.project_root, 'analysis') + if container_config is not None: + with open(container_config) as f: + cfg = yaml.safe_load(f) + else: + root_config_path = op.join(self.project_root, '.babs', 'babs_init_config.yaml') + cfg = {} + if op.exists(root_config_path): + with open(root_config_path) as f: + cfg = yaml.safe_load(f) or {} + + analysis_dir = cfg.get('analysis_path', 'analysis') + self.analysis_path = op.normpath(op.join(self.project_root, analysis_dir)) self._analysis_datalad_handle = None self.config_path = op.join(self.analysis_path, 'code/babs_proj_config.yaml') - self.input_ria_path = op.join(self.project_root, 'input_ria') - self.output_ria_path = op.join(self.project_root, 'output_ria') + self.input_ria_path = op.normpath( + op.join(self.project_root, cfg.get('input_ria_path', 'input_ria')) + ) + self.output_ria_path = op.normpath( + op.join(self.project_root, cfg.get('output_ria_path', 'output_ria')) + ) self.input_ria_url = 'ria+file://' + self.input_ria_path self.output_ria_url = 'ria+file://' + self.output_ria_path @@ -128,6 +144,7 @@ def __init__(self, project_root): self.job_status_path_rel = 'code/job_status.csv' self.job_status_path_abs = op.join(self.analysis_path, self.job_status_path_rel) self.job_submit_path_abs = op.join(self.analysis_path, 'code/job_submit.csv') + self.analysis_root = op.dirname(self.analysis_path) self._shared_group_enabled_cache = None self._apply_config() @@ -178,7 +195,7 @@ def _apply_config(self) -> None: self.wtf_key_info(flag_output_ria_only=True) self.input_datasets = InputDatasets(self.processing_level, config_yaml['input_datasets']) - self.input_datasets.update_abs_paths(Path(self.project_root) / 'analysis') + self.input_datasets.update_abs_paths(Path(self.analysis_path)) self.ensure_shared_group_git_safe_directories() def _validate_pipeline_config(self) -> None: diff --git a/babs/bootstrap.py b/babs/bootstrap.py index 3827bff6..471948d1 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -3,6 +3,7 @@ import csv import os import os.path as op +import shutil import subprocess import tempfile from pathlib import Path @@ -25,6 +26,9 @@ class BABSBootstrap(BABS): """A BABS subclass that implements the bootstrap process.""" + def __init__(self, project_root, container_config=None): + super().__init__(project_root, container_config=container_config) + def _apply_config(self): pass @@ -114,14 +118,24 @@ def babs_bootstrap( self.queue = validate_queue(queue) system = System(self.queue) - # Create `analysis` folder: ----------------------------- + # Create analysis folder: ----------------------------- print('DataLad version: ' + get_datalad_version()) - print('\nCreating `analysis` folder (also a datalad dataset)...') + print(f'\nCreating `{self.analysis_path}` folder (also a datalad dataset)...') create_kwargs = {'cfg_proc': 'yoda', 'annex': True} if self.shared_group is not None: create_kwargs['initopts'] = ['--shared=group'] self._analysis_datalad_handle = dlapi.create(self.analysis_path, **create_kwargs) self.input_datasets.update_abs_paths(Path(self.analysis_path)) + + # Persist original config so other BABS commands can find it: + babs_dir = op.join(self.project_root, '.babs') + os.makedirs(babs_dir, exist_ok=True) + shutil.copy2(container_config, op.join(babs_dir, 'babs_init_config.yaml')) + if op.normpath(self.analysis_path) == op.normpath(self.project_root): + self.datalad_save( + path='.babs/babs_init_config.yaml', + message='Save babs init config', + ) self.input_datasets.set_inclusion_dataframe(initial_inclusion_df, processing_level) # Prepare `.gitignore` ------------------------------ @@ -132,6 +146,9 @@ def babs_bootstrap( os.remove(gitignore_path) gitignore_file = open(gitignore_path, 'a') # open in append mode + # not to track input/output RIA stores: + gitignore_file.write('\n' + op.basename(self.input_ria_path)) + gitignore_file.write('\n' + op.basename(self.output_ria_path)) # not to track `logs` folder: gitignore_file.write('\nlogs') # not to track `.*_datalad_lock`: @@ -153,7 +170,7 @@ def babs_bootstrap( # Create `babs_proj_config.yaml` file: ---------------------- print('Save BABS project configurations in a YAML file ...') - print("Path to this yaml file will be: 'analysis/code/babs_proj_config.yaml'") + print(f"Path to this yaml file will be: '{self.config_path}'") env = Environment( loader=PackageLoader('babs', 'templates'), @@ -165,6 +182,7 @@ def babs_bootstrap( with open(self.config_path, 'w') as f: f.write( template.render( + analysis_dir=op.basename(self.analysis_path), processing_level=self.processing_level, queue=self.queue, input_ds=self.input_datasets, @@ -443,6 +461,7 @@ def _bootstrap_single_app_scripts( self.processing_level, system, project_root=op.dirname(self.analysis_path), + analysis_dir=op.basename(self.analysis_path), shared_group_mode=shared_group_mode, ) @@ -518,6 +537,7 @@ def _bootstrap_pipeline_scripts(self, container_ds, container_config, system): container_images=container_images, datalad_run_message='pipeline', project_root=op.dirname(self.analysis_path), + analysis_dir=op.basename(self.analysis_path), ) with open(bash_path, 'w') as f: @@ -599,6 +619,8 @@ def clean_up(self): if op.exists(self.analysis_path): # analysis folder is created by datalad print('Removing input dataset(s) if cloned...') for in_ds in self.input_datasets: + if in_ds._babs_project_analysis_path is None: + continue if op.exists(in_ds.babs_project_analysis_path): # use `datalad remove` to remove: _ = self.analysis_datalad_handle.remove( diff --git a/babs/check_setup.py b/babs/check_setup.py index 86bf9dbd..0249fb78 100644 --- a/babs/check_setup.py +++ b/babs/check_setup.py @@ -40,9 +40,10 @@ def babs_check_setup(self, submit_a_test_job): print('Did not request `--job-test`; will not submit a test job.') # Print out the saved configuration info: ---------------- + analysis_dir = op.basename(self.analysis_path) print( 'Below is the configuration information saved during `babs init`' - " in file 'analysis/code/babs_proj_config.yaml':\n" + f" in file '{analysis_dir}/code/babs_proj_config.yaml':\n" ) with open(op.join(self.analysis_path, 'code/babs_proj_config.yaml')) as f: file_contents = f.read() @@ -52,13 +53,13 @@ def babs_check_setup(self, submit_a_test_job): print('Checking the BABS project itself...') if not op.exists(self.analysis_path): raise FileNotFoundError( - "Folder 'analysis' does not exist in this BABS project!" + f"Folder '{analysis_dir}' does not exist in this BABS project!" ' Current path to analysis folder: ' + self.analysis_path ) print(CHECK_MARK + ' All good!') - # Check `analysis` datalad dataset: ---------------------- - print("\nCheck status of 'analysis' DataLad dataset...") + # Check analysis datalad dataset: ---------------------- + print(f"\nCheck status of '{analysis_dir}' DataLad dataset...") # Are there anything unsaved? ref: CuBIDS function analysis_statuses = { status['state'] diff --git a/babs/cli.py b/babs/cli.py index f7dee487..85c47329 100644 --- a/babs/cli.py +++ b/babs/cli.py @@ -206,7 +206,7 @@ def babs_init_main( from babs import BABSBootstrap - babs_proj = BABSBootstrap(project_root) + babs_proj = BABSBootstrap(project_root, container_config=container_config) try: babs_proj.babs_bootstrap( processing_level, diff --git a/babs/container.py b/babs/container.py index 4424b833..ab445098 100644 --- a/babs/container.py +++ b/babs/container.py @@ -157,6 +157,7 @@ def generate_bash_participant_job( processing_level, system, project_root=None, + analysis_dir='analysis', shared_group_mode=False, ): """Generate bash script for participant job. @@ -188,6 +189,7 @@ def generate_bash_participant_job( container_name=self.container_name, zip_foldernames=self.config['zip_foldernames'], project_root=project_root, + analysis_dir=analysis_dir, ) with open(bash_path, 'w') as f: diff --git a/babs/generate_submit_script.py b/babs/generate_submit_script.py index 8242a4e2..fb745aec 100644 --- a/babs/generate_submit_script.py +++ b/babs/generate_submit_script.py @@ -30,6 +30,7 @@ def generate_submit_script( container_images=None, datalad_run_message=None, project_root=None, + analysis_dir='analysis', ): """ Generate a bash script that runs the BIDS App singularity image. @@ -129,6 +130,7 @@ def generate_submit_script( container_image_paths=container_image_paths, datalad_run_message=datalad_run_message, project_root=project_root, + analysis_dir=analysis_dir, ) diff --git a/babs/templates/job_submit.yaml.jinja2 b/babs/templates/job_submit.yaml.jinja2 index 9c6df77e..29ee0100 100644 --- a/babs/templates/job_submit.yaml.jinja2 +++ b/babs/templates/job_submit.yaml.jinja2 @@ -2,5 +2,5 @@ # '${max_array}' is a placeholder. {% endif %} -cmd_template: '{{ submit_head }} {{ env_flags }} {{ name_flag_str }}{{ job_name }} {{ eo_args }} {{ array_args }} {% if test %}{{ babs.analysis_path }}/code/check_setup/call_test_job.sh{% else %}{{ babs.analysis_path }}/code/participant_job.sh {{ dssource }} {{ pushgitremote }} {{ babs.job_submit_path_abs }} {{ babs.project_root }}{% endif %}' +cmd_template: '{{ submit_head }} {{ env_flags }} {{ name_flag_str }}{{ job_name }} {{ eo_args }} {{ array_args }} {% if test %}{{ babs.analysis_path }}/code/check_setup/call_test_job.sh{% else %}{{ babs.analysis_path }}/code/participant_job.sh {{ dssource }} {{ pushgitremote }} {{ babs.job_submit_path_abs }} {{ babs.analysis_root }}{% endif %}' job_name_template: '{{ job_name }}' \ No newline at end of file diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index f3151cea..25eae8ee 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -114,7 +114,7 @@ CONTAINER_IMAGE_PATHS=( ) for CONTAINER_JOB in "${CONTAINER_IMAGE_PATHS[@]}"; do - CONTAINER_SHARED="${PROJECT_ROOT}/analysis/${CONTAINER_JOB}" + CONTAINER_SHARED="${PROJECT_ROOT}/{{ analysis_dir }}/${CONTAINER_JOB}" if [ ! -e "${CONTAINER_SHARED}" ] && [ ! -L "${CONTAINER_SHARED}" ]; then echo "ERROR: shared container image not found at ${CONTAINER_SHARED}" >&2 diff --git a/notebooks/README.md b/notebooks/README.md index 296295b8..70e0bc22 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -26,6 +26,7 @@ In addition, please check if the function of the YAML files (especially the `bid | [link](eg_fmriprep-24-1-1_regular.yaml) | fMRIPrep | 24.1.1 | regular use of fMRIPrep | one raw BIDS dataset | | | [link](eg_fmriprep-24-1-1_anatonly.yaml) | fMRIPrep | 24.1.1 | fMRIPrep `--anat-only` | one raw BIDS dataset | | | [link](eg_fmriprep-24-1-1_ingressed-fs.yaml) | fMRIPrep | 24.1.1 | fMRIPrep with FreeSurfer results ingressed | one raw BIDS dataset + one zipped BIDS derivatives dataset of FreeSurfer results | For 2nd input dataset, you may use results from fMRIPrep `--anat-only` | +| [link](eg_freesurferpost-0-1-2_bids-study-layout.yaml) | freesurfer-post | 0.1.2 | creating tabular outputs from FreeSurfer results using BIDS study layout | one zipped BIDS derivatives dataset of fMRIPrep anatomical outputs | Uses `analysis_path: "."` and `.babs/` RIA stores for BIDS study layout | | [link](eg_xcpd-0-10-6_linc.yaml) | XCP-D | 0.10.6 | XCP-D with LINC mode | one zipped BIDS derivatives dataset of fMRIPrep results | Includes multiple atlases for connectivity analysis | | [link](eg_mriqc-24-0-2.yaml) | MRIQC | 24.0.2 | regular use of MRIQC | one raw BIDS dataset | For quality control metrics | | [link](eg_aslprep-0-7-5.yaml) | ASLPrep | 0.7.5 | regular use of ASLPrep | one raw BIDS dataset | For processing arterial spin labeling (ASL) data | diff --git a/notebooks/eg_freesurferpost-0-1-2_bids-study-layout.yaml b/notebooks/eg_freesurferpost-0-1-2_bids-study-layout.yaml new file mode 100644 index 00000000..ac048d77 --- /dev/null +++ b/notebooks/eg_freesurferpost-0-1-2_bids-study-layout.yaml @@ -0,0 +1,76 @@ +# This is an example config yaml file for: +# BIDS App: freesurfer-post +# BIDS App version: 0.1.2 +# Task: Creating tabular outputs from FreeSurfer results +# Which layout: BIDS study layout +# Which system: Slurm + +# WARNING!!! +# This is only an example, which may not necessarily fit your purpose, +# or be an optimized solution for your case, +# or be compatible to the BIDS App version you're using. +# Therefore, please change and tailor it for your case before use it!!! + +# In BIDS study layout, initialize the BABS project inside the study's derivatives/ folder, for example: +# babs init ... /path/to/BIDS_STUDY/derivatives/freesurferpost-0-1-2-babs +# +# With `analysis_path: "."`, the derivative project directory itself is the BABS analysis dataset. +# Internal BABS RIA stores and the init config live under `.babs/`. +analysis_path: "." +input_ria_path: ".babs/input_ria" +output_ria_path: ".babs/output_ria" + +# Define the input datasets +input_datasets: + fmriprep_anat: + required_files: + - "*fmriprep_anat*.zip" + is_zipped: true + # [FIX ME] path to zipped fMRIPrep anatomical outputs, usually from a BABS fMRIPrep --anat-only project. + # If that upstream project used the original BABS layout, use: + # ria+file:///path/to/BIDS_STUDY/derivatives/fmriprep_anat-24-1-1-babs/output_ria#~data + origin_url: "ria+file:///path/to/BIDS_STUDY/derivatives/fmriprep_anat-24-1-1-babs/.babs/output_ria#~data" + unzipped_path_containing_subject_dirs: sourcedata/fmriprep_anat/fmriprep_anat + path_in_babs: sourcedata/fmriprep_anat + +# Arguments in `singularity run`: +bids_app_args: + $SUBJECT_SELECTION_FLAG: "--subject-id" + $SESSION_SELECTION_FLAG: "--session-id" + -w: "$BABS_TMPDIR" + --fs-license-file: "/path/to/FreeSurfer/license.txt" # [FIX ME] path to FreeSurfer license file + --subjects-dir: '"${PWD}"/sourcedata/fmriprep_anat/fmriprep_anat/sourcedata/freesurfer' + +# Arguments that are passed directly to singularity/apptainer: +singularity_args: + - --cleanenv + - --containall + - --writable-tmpfs + +# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): +all_results_in_one_zip: true +zip_foldernames: + freesurfer-post: "0-1-2" # folder 'freesurfer-post' will be zipped into 'sub-xx_(ses-yy_)freesurfer-post-0-1-2.zip' + +# How much cluster resources it needs: +cluster_resources: + interpreting_shell: "/bin/bash" + hard_runtime_limit: "01:00:00" + temporary_disk_space: 5G + customized_text: | + #SBATCH -p all + #SBATCH --nodes=1 + #SBATCH --ntasks=1 + #SBATCH --cpus-per-task=1 + #SBATCH --mem=5G + #SBATCH --propagate=NONE + +# Necessary commands to be run first: +# [FIX ME] change or add commands for setting up the virtual environment, for loading necessary modules, etc +script_preamble: | + source "${CONDA_PREFIX}"/bin/activate babs # you may need to change this to work with your environment manager. + source xxxx # [FIX ME or DELETE ME] source any necessary program + module load xxxx # [FIX ME or DELETE ME] source any necessary program + +# Where to run the jobs: +job_compute_space: "path/to/temporary_compute_space" # [FIX ME] replace "/path/to/temporary_compute_space" with yours diff --git a/tests/test_babs_workflow.py b/tests/test_babs_workflow.py index 934c57ef..97c33df7 100644 --- a/tests/test_babs_workflow.py +++ b/tests/test_babs_workflow.py @@ -235,7 +235,10 @@ def test_init_forwards_shared_group(tmp_path): with mock.patch('babs.BABSBootstrap') as mock_bootstrap_cls: _enter_init() - mock_bootstrap_cls.assert_called_once_with(options.project_root) + mock_bootstrap_cls.assert_called_once_with( + options.project_root, + container_config=options.container_config, + ) mock_bootstrap_cls.return_value.babs_bootstrap.assert_called_once_with( options.processing_level, options.queue,