Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/chunked_ms2rescore.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def argparse_setup():
parser.add_argument("-spectra", help="Corresponding mzML file or .d path for PSMs file", required=True, type=str)

parser.add_argument("-model", help="Model for MS2PIP", default="HCD", type=str)
parser.add_argument("-model_dir", help="Directory to store/find MS2PIP model", default="/mnt/data/ms2pip-model", type=str)
parser.add_argument("-model_dir", help="Directory to store/find MS2PIP model", default="./ms2pip-model", type=str)
parser.add_argument("-ms2_tolerance", help="The MS2/fragment tolerance", default=0.02, type=float)
parser.add_argument("-spectrum_id_pattern", help="The spectrum ID pattern to correspond PSMs to spectra", default="(.*)", type=str)
parser.add_argument("-processes", help="Number of processes / threads to use", default=8, type=int)
Expand Down
33 changes: 33 additions & 0 deletions bin/ms2rescore_check_or_download_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python

import argparse
import logging

from ms2pip.constants import MODELS
from ms2pip._utils.xgb_models import validate_requested_xgb_model


def argparse_setup():
parser = argparse.ArgumentParser()
parser.add_argument("-ms2pip_model", help="Model for MS2PIP", default="HCD", type=str)
parser.add_argument("-model_dir", help="Directory to store/find MS2PIP model", default="./ms2pip-model", type=str)

return parser.parse_args()

if __name__ == "__main__":
args = argparse_setup()
logging.basicConfig(level=logging.INFO)

ms2pip_model = args.ms2pip_model
model_dir = args.model_dir

# Validate / download requested model
if ms2pip_model in MODELS.keys():
print(f"Checking {ms2pip_model} model")
if "xgboost_model_files" in MODELS[ms2pip_model].keys():
validate_requested_xgb_model(
MODELS[ms2pip_model]["xgboost_model_files"],
MODELS[ms2pip_model]["model_hash"],
model_dir,
)

37 changes: 0 additions & 37 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,6 @@
// Nextflow pipeline for peptide identification with multiple search engines and post-processing tools
//

// default python image
params.python_image = 'ghcr.io/medbioinf/pipeline-of-identification:latest'
params.oktoberfest_image = 'medbioinf/oktoberfest'

// parameters set by the command line
params.raw_files = ''
params.mzml_files = '' // may contain globs
params.fasta = ''
params.fasta_target_decoy = ''
params.precursor_tol_ppm = 10
params.fragment_tol_da = 0.02
params.is_timstof = false
params.entrapment_fold = 0
params.use_only_rank1_psms = true

// keep the (converted) mzML files
params.keep_mzmls = true

// should the search engines be executed?
params.execute_comet = true
params.execute_maxquant = true
params.execute_msamanda = true
params.execute_msfragger = true
params.execute_msgfplus = true
params.execute_sage = true
params.execute_xtandem = true

// default parameter files
params.outdir = './'
params.comet_params_file = "${baseDir}/config/comet.params"
params.maxquant_params_file = "${baseDir}/config/mqpar.xml"
params.msamanda_config_file = "${baseDir}/config/msamanda_settings.xml"
params.msfragger_config_file = "${baseDir}/config/closed_fragger.params"
params.msgfplus_params_file = "${baseDir}/config/MSGFPlus_Params.txt"
params.sage_config_file = "${baseDir}/config/sage_config.json"
params.xtandem_config_file = "${baseDir}/config/xtandem_input.xml"

// including modules
include {create_entrapment_database} from "./src/preprocess/create_entrapment_database.nf"
include {create_decoy_database} from "./src/preprocess/create_decoy_database.nf"
Expand Down
208 changes: 202 additions & 6 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -1,9 +1,205 @@
workflow.output.mode = "copy"
// Global default params, used in configs
params {
// input / output parameters
outdir = './'
raw_files = './msms-files/*.raw'
mzml_files = ''
fasta = ''
fasta_target_decoy = ''
entrapment_fold = 0
keep_mzmls = true
tdf2mzml_threads = 8
decoy_database_threads = 4
fdrbench_mem_gb = 16

// search specific parameters
precursor_tol_ppm = 10
fragment_tol_da = 0.02
is_timstof = false
use_only_rank1_psms = true
convert_psm_tsv_mem = '32.GB'
enhance_psm_tsv_mem = '8.GB'

execute_comet = true
comet_params_file = "${baseDir}/config/comet.params"
comet_threads = 16
comet_mem = '8.GB'
comet_psm_id_pattern = '(.*)'
comet_spectrum_id_pattern = '.*scan=(\\d+)$'
comet_scan_id_pattern = '^(?P<scan_id>\\d+)$'

execute_maxquant = true
maxquant_params_file = "${baseDir}/config/mqpar.xml"
maxquant_threads = 4
maxquant_mem = "32.GB"
maxquant_psm_id_pattern = ""
maxquant_spectrum_id_pattern = ""
maxquant_scan_id_pattern = ""

execute_msamanda = true
msamanda_config_file = "${baseDir}/config/msamanda_settings.xml"
msamanda_threads = 16
msamanda_mem = '64.GB'
msamanda_psm_id_pattern = '(.*)'
msamanda_spectrum_id_pattern = '(.*)'
msamanda_scan_id_pattern = '.*scan=(?P<scan_id>\\d+)$'

execute_msfragger = true
msfragger_config_file = "${baseDir}/config/closed_fragger.params"
msfragger_threads = 16
msfragger_mem_gb = 16
msfragger_db_split = 0
msfragger_calibrate = 2
msfragger_psm_id_pattern = '(.*)'
msfragger_spectrum_id_pattern = '(.*)'
msfragger_scan_id_pattern = '.*scan=(?P<scan_id>\\d+)$'

execute_msgfplus = true
msgfplus_params_file = "${baseDir}/config/MSGFPlus_Params.txt"
msgfplus_threads = 6
msgfplus_mem_gb = 16
msgfplus_tasks = 0
msgfplus_instrument = 1 // 0: Low-res LCQ/LTQ, 1: Orbitrap/FTICR/Lumos, 2: TOF, 3: Q-Exactive
msgfplus_split_input = 10000 // split input mzMLs into chunks of this size, 0 to disable
msgfplus_merge_mem_gb = 16 // memory for merging PSMs, used in merge_psms process
msgfplus_split_fasta = 0 // split the fasta into this many chunks, 0 to disable
msgfplus_psm_id_pattern = '(.*)'
msgfplus_spectrum_id_pattern = '(.*)'
msgfplus_scan_id_pattern = '.*scan=(?P<scan_id>\\d+)$'

execute_sage = true
sage_config_file = "${baseDir}/config/sage_config.json"
sage_threads = 16
sage_mem = '128.GB'
sage_prefilter = 'false'
sage_prefilter_chunk_size = 0
sage_psm_id_pattern = '(.*)'
sage_spectrum_id_pattern = '(.*)'
sage_scan_id_pattern = '.*scan=(?P<scan_id>\\d+)$'

execute_xtandem = true
xtandem_config_file = "${baseDir}/config/xtandem_input.xml"
xtandem_threads = 16
xtandem_mem = '128.GB'
xtandem_psm_id_pattern = '(.*)'
xtandem_spectrum_id_pattern = '(.*)'
xtandem_scan_id_pattern = '.*scan=(?P<scan_id>\\d+)$'

// parameters for ms2rescore
ms2rescore_threads = 4
ms2rescore_mem = '64.GB'
ms2rescore_model = 'HCD'
ms2rescore_chunk_size = 100000
ms2pip_model_dir = './ms2pip-model'

// parameters for oktoberfest
oktoberfest_memory = '64.GB'
oktoberfest_to_pin_memory = '4.GB'
oktoberfest_intensity_model = 'Prosit_2020_intensity_HCD'
oktoberfest_irt_model = 'Prosit_2019_irt'
oktoberfest_forks = 1 // have some mercy with the koina servers

// number of threads used by percolator
percolator_threads = 4
percolator_mem = '4.GB'
}

manifest {
name = 'mpc-bioinformatics/McQuaC'
contributors = [
[
name: 'Julian Uszkoreit',
affiliation: 'Ruhr University Bochum, Medical Bioinformatics',
github: '@julianu',
contribution: ['author', 'maintainer', 'contributor'],
orcid: '0000-0001-7522-4007',
],
[
name: 'Dirk Winkelhardt',
affiliation: 'Ruhr University Bochum, Medical Bioinformatics',
github: '@di-hardt',
contribution: ['author', 'maintainer', 'contributor'],
orcid: '0000-0001-8770-2221',
],
]
homePage = 'https://github.com/medbioinf/pipeline-of-identification'
description = """A pipeline for the identification of peptides from mass spectrometry data, integrating multiple search engines and post-processing tools."""
mainScript = 'main.nf'
defaultBranch = 'main'
nextflowVersion = '!>=24.10.6'
version = '0.1.0'
}

profiles {
docker {
docker.enabled = true
docker.runOptions = "--user=root"
docker.fixOwnership = true
docker {
docker.enabled = true
docker.runOptions = "--user=root"
docker.fixOwnership = true

process {
withLabel: python_image {
container = 'ghcr.io/medbioinf/mspepid:latest'
}

withLabel: comet_image {
container = 'quay.io/medbioinf/comet-ms:v2024.01.0'
}

withLabel: maxquant_image {
container = 'quay.io/medbioinf/maxquant:2.6.3.0'
}

withLabel: msamanda_image {
container = 'quay.io/medbioinf/msamanda:3.0.22.071'
}

withLabel: msfragger_image {
container = 'medbioinf/msfragger'
}

withLabel: msgfplus_image {
container = 'quay.io/medbioinf/msgfplus:v2024.03.26'
}

withLabel: mzidmerger_image {
container = 'quay.io/medbioinf/mzid-merger:1.4.26'
}

withLabel: sage_image {
container = 'quay.io/medbioinf/sage:v0.15.0-beta.1'
}

withLabel: xtandem_image {
container = 'quay.io/medbioinf/xtandem:2017.2.1.4'
}

withLabel: percolator_image {
container = 'ghcr.io/percolator/percolator:branch-3-08'
}

withLabel: msconvert_image {
container = 'proteowizard/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.25073-842baef'
}

withLabel: tdf2mzml_image {
container = 'quay.io/medbioinf/tdf2mzml:0.4'
}

withLabel: oktoberfest_image {
container = 'medbioinf/oktoberfest'
}

withLabel: openms_image {
container = 'quay.io/medbioinf/openms:3.4.1'
}

withLabel: fdrbench_image {
container = 'quay.io/medbioinf/fdrbench-nightly:146f77'
}
}
}
}
}

plugins {
id 'nf-schema@2.5.0'
}
Loading