diff --git a/conf/modules.config b/conf/modules.config index baec60e..92102bf 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -36,6 +36,7 @@ process { } withName: OVERLAP_HEATMAP { + publishDir = [ [ path: { "${params.outdir}/TADA/QC/Plots/" }, @@ -65,18 +66,54 @@ process { ] } - withName: ILLUMINA_CUTADAPT { - publishDir = [ - path: { "${params.outdir}/Intermediate/cutadapt" }, - mode: params.publish_dir_mode - ] - } - - withName: PACBIO_CUTADAPT { - publishDir = [ - path: { "${params.outdir}/Intermediate/cutadapt" }, - mode: params.publish_dir_mode - ] + withName: SHORT_READ_CUTADAPT { + ext.args = { [ + // TODO: not sure why this one is being weird, but + // seems to trigger an error even though this is + // cutadapt 5.0 + // "--json=${meta.id}.cutadapt.json", + params.cutadapt_strict_match ? "--pair-filter=any" : "--pair-filter=both", + "--report=minimal", + "--max-ee ${[params.maxEE_for,params.maxEE_rev].max()}", + params.trunc_for > 0 ? "-l ${params.trunc_for}" : "", + params.maxN >= 0 ? "--max-n ${params.maxN}" : "", + params.min_read_len ? "-m ${params.min_read_len}" : "-m 50", + params.max_read_len != "Inf" ? "-M ${params.max_read_len}" : "", + !meta.single_end && params.trunc_rev > 0 ? "-L ${params.trunc_rev}" : "", + params.illumina_twocolor ? "--nextseq-trim=2" : "", + "-g ${meta.for}", + params.cutadapt_dovetail ? "-a ${meta.rev_rc} -n 2" : "", + meta.single_end ? "" : "-G ${meta.rev}", + !meta.single_end && params.cutadapt_dovetail ? "-A ${meta.for_rc}" : "", + ].join(' ').trim() } + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/Intermediate/cutadapt" }, + mode: params.publish_dir_mode + ] + } + + withName: LONG_READ_CUTADAPT { + ext.args = { [ + // TODO: not sure why this one is being weird, but + // seems to trigger an error even though this is + // cutadapt 5.0 + // "--json=${meta.id}.cutadapt.json", + "--rc", + "--report=minimal", + "--max-ee ${[params.maxEE_for,params.maxEE_rev].max()}", + // params.trunc_for > 0 ? "-l ${params.trunc_for}" : "", + params.cutadapt_strict_match ? "-g ${meta.for}...${meta.rev_rc}" : "-a ${for_primer}...${rev_primer_rc}", + params.maxN >= 0 ? "--max-n ${params.maxN}" : "", + params.min_read_len ? "-m ${params.min_read_len}" : "-m 50", + params.max_read_len != "Inf" ? "-M ${params.max_read_len}" : "", + "--untrimmed-output ${meta.id}.untrimmed.fastq.gz", + ].join(' ').trim() } + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/Intermediate/cutadapt" }, + mode: params.publish_dir_mode + ] } withName: ILLUMINA_DADA2_FILTER_AND_TRIM { diff --git a/modules.json b/modules.json index ac022ef..23b5719 100644 --- a/modules.json +++ b/modules.json @@ -7,72 +7,72 @@ "nf-core": { "cutadapt": { "branch": "master", - "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "b1966f36ec9de31927b2603d8f499960b2a4c294", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, "fasttree": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, "mafft/align": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", "installed_by": ["modules"] }, "mmseqs/createdb": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "38697a933bef7041bb935c9b8374d9948ce6c794", "installed_by": ["modules"] }, "mmseqs/createindex": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "38697a933bef7041bb935c9b8374d9948ce6c794", "installed_by": ["modules"] }, "mmseqs/createtsv": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", "installed_by": ["modules"] }, "mmseqs/easysearch": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "38697a933bef7041bb935c9b8374d9948ce6c794", "installed_by": ["modules"] }, "mmseqs/search": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "38697a933bef7041bb935c9b8374d9948ce6c794", "installed_by": ["modules"] }, "mmseqs/taxonomy": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", "installed_by": ["modules"] }, "mmseqs/tsv2exprofiledb": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "7b50cb7be890e4b28cffb82e438cc6a8d7805d3f", + "git_sha": "82a79183037a403ad1b6714e5dbcff25500efaf6", "installed_by": ["modules"] }, "muscle": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", "installed_by": ["modules"] }, "muscle5/super5": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", "installed_by": ["modules"] } } diff --git a/modules/local/dadainfer.nf b/modules/local/dadainfer.nf index 2b39cb5..fc51f10 100644 --- a/modules/local/dadainfer.nf +++ b/modules/local/dadainfer.nf @@ -66,7 +66,7 @@ process DADA2_POOLED_INFER { tracking_dds <- as.data.frame(sapply(dds, getN)) colnames(tracking_dds) <- c("dada2.denoised.pooled.${readmode}") - nms <- gsub(".R[12].filtered.fastq.gz", "", rownames(tracking_dds)) + nms <- gsub("(.R[12])?.trim.fastq.gz", "", rownames(tracking_dds)) tracking_dds <- tracking_dds %>% as_tibble() %>% mutate(SampleID = nms, .before = 1) diff --git a/modules/local/filterandtrim.nf b/modules/local/filterandtrim.nf deleted file mode 100644 index 941f6cf..0000000 --- a/modules/local/filterandtrim.nf +++ /dev/null @@ -1,213 +0,0 @@ -process ILLUMINA_DADA2_FILTER_AND_TRIM { - tag "$meta.id" - label 'process_medium' - - container "ghcr.io/h3abionet/tada:docker-DADA-1.36" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("${meta.id}.R1.filtered.fastq.gz"), optional: true, emit: trimmed_R1 - tuple val(meta), path("${meta.id}.R2.filtered.fastq.gz"), optional: true, emit: trimmed_R2 - tuple val(meta), path("${meta.id}.R[12].filtered.fastq.gz"), optional: true, emit: trimmed - path("*.trimmed.txt"), emit: trimmed_report - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - #!/usr/bin/env Rscript - suppressPackageStartupMessages(library(dada2)) - - out <- filterAndTrim(fwd = "${reads[0]}", - filt = "${meta.id}.R1.filtered.fastq.gz", - rev = if("${reads[1]}" == "null") NULL else "${reads[1]}", - filt.rev = if("${reads[1]}" == "null") NULL else "${meta.id}.R2.filtered.fastq.gz", - trimLeft = if("${reads[1]}" == "null") ${params.trim_for} else c(${params.trim_for}, ${params.trim_rev}), - truncLen = if("${reads[1]}" == "null") ${params.trunc_for} else c(${params.trunc_for}, ${params.trunc_rev}), - maxEE = if("${reads[1]}" == "null") ${params.maxEE_for} else c(${params.maxEE_for}, ${params.maxEE_rev}), - truncQ = ${params.truncQ}, - maxN = ${params.maxN}, - rm.phix = as.logical("${params.rmPhiX}"), - maxLen = ${params.max_read_len}, - minLen = ${params.min_read_len}, - compress = TRUE, - verbose = TRUE, - multithread = ${task.cpus} - ) - - colnames(out) <- c('input', 'filtered') - - write.csv(out, "${meta.id}.trimmed.txt") - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.R1.filtered.fastq.gz - touch ${prefix}.R2.filtered.fastq.gz - touch ${prefix}.trimmed.txt - """ -} - -process PACBIO_CUTADAPT_FILTER_AND_TRIM { - tag { "PacBioTrim_${meta.id}" } - - // container "ghcr.io/h3abionet/tada:docker-DADA-1.36" - - input: - // TODO: Note the channel name here should probably be changed - tuple val(meta), path(reads) - - output: - // tuple val(meta), file("${meta.id}.R1.filtered.fastq.gz") optional true into filteredReadsR1 - tuple val(meta), file("${meta.id}.noprimer.fastq.gz"), optional: true, emit: cutadapt_trimmed - file("*.cutadapt.out"), emit: cutadapt_report - file("${meta.id}.untrimmed.fastq.gz"), emit: cutadapt_untrimmed - - when: - !(params.precheck) - - script: - strictness = params.pacbio_strict_match ? '-g' : '-a' - """ - # Logic: we should trim out the HiFi reads and require *both* primers be present (-g). - # This should also reorient the sequence to match the primers (--rc). - # Keep anything longer than 50bp, and allow users to filter their data by length later - revprimer_rc=\$( echo -n ${params.rev_primer} | tr "[ATGCUNYRSWKMBDHV]" "[TACGANRYSWMKVHDB]" | rev ) - - cutadapt --rc \\ - ${strictness} "${params.fwd_primer}...\${revprimer_rc}" \\ - -m 50 \\ - -j ${task.cpus} \\ - --untrimmed-output "${meta.id}.untrimmed.fastq.gz" \\ - -o "${meta.id}.noprimer.fastq.gz" \\ - ${reads} > "${meta.id}.noprimer.cutadapt.out" - """ -} - -process PACBIO_DADA2_FILTER_AND_TRIM { - tag "$meta.id" - label 'process_medium' - - container "ghcr.io/h3abionet/tada:docker-DADA-1.36" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("${meta.id}.R1.filtered.fastq.gz"), optional: true, emit: trimmed - path("*.trimmed.txt"), emit: trimmed_report - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - #!/usr/bin/env Rscript - suppressPackageStartupMessages(library(dada2)) - suppressPackageStartupMessages(library(ShortRead)) - suppressPackageStartupMessages(library(Biostrings)) - - out2 <- filterAndTrim(fwd = "${reads}", - filt = "${meta.id}.R1.filtered.fastq.gz", - maxEE = ${params.maxEEFor}, - maxN = ${params.maxN}, - maxLen = ${params.maxLen}, - minLen = ${params.minLen}, - compress = TRUE, - verbose = TRUE, - multithread = ${task.cpus}) - - #Change input read counts to actual raw read counts - write.csv(out2, paste0("${meta.id}", ".trimmed.txt")) - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.R1.filtered.fastq.gz - touch ${prefix}.R2.filtered.fastq.gz - touch ${prefix}.trimmed.txt - """ -} - -// // this path is only needed when using variable length sequences -// process ITSFilterAndTrimStep1 { -// tag { "ITS_Step1_${meta.id}" } - -// input: -// tuple val(meta), file(reads) from dada2ReadPairs - -// output: -// tuple val(meta), file("${meta.id}.R[12].noN.fastq.gz") optional true into itsStep2 -// tuple val(meta), file("${meta.id}.out.RDS") into itsStep3Trimming // needed for join() later -// file('forward_rc') into forwardP -// // TODO make this optional if data are SE -// file('reverse_rc') into reverseP - -// when: -// !(params.precheck) - -// script: -// template "ITSFilterAndTrimStep1.R" -// } - -// process ITSFilterAndTrimStep2 { -// tag { "ITS_Step2_${meta.id}" } -// publishDir "${params.outdir}/dada2-FilterAndTrim", mode: "copy", overwrite: true - -// input: -// tuple(meta), file(reads) from itsStep2 -// file(forP) from forwardP -// file(revP) from reverseP - -// output: -// tuple val(meta), file("${meta.id}.R[12].cutadapt.fastq.gz") optional true into itsStep3 -// file("*.cutadapt.out") into cutadaptToMultiQC - -// when: -// !(params.precheck) - -// script: -// outr2 = meta.single_end ? '' : "-p ${meta.id}.R2.cutadapt.fastq.gz" -// p2 = meta.single_end ? '' : "-G ${params.revprimer} -A \$REV_PRIMER" -// """ -// FWD_PRIMER=\$( ${meta.id}.cutadapt.out -// """ -// } - -// process ITSFilterAndTrimStep3 { -// tag { "ITS_Step3_${meta.id}" } -// publishDir "${params.outdir}/dada2-FilterAndTrim", mode: "copy", overwrite: true - -// input: -// tuple val(meta), file(reads), file(trimming) from itsStep3.join(itsStep3Trimming) - -// output: -// tuple val(meta), file("${meta.id}.R1.filtered.fastq.gz") optional true into filteredReadsR1 -// tuple val(meta), file("${meta.id}.R2.filtered.fastq.gz") optional true into filteredReadsR2 -// tuple val(meta), file("${meta.id}.R[12].filtered.fastq.gz") optional true into readsToFastQC,readsToPerSample -// file "*.trimmed.txt" into trimTracking - -// when: -// !(params.precheck) - -// script: -// template "ITSFilterAndTrimStep3.R" -// } \ No newline at end of file diff --git a/modules/local/illumina_filterandtrim.nf b/modules/local/illumina_filterandtrim.nf index 28b89b4..e5832c5 100644 --- a/modules/local/illumina_filterandtrim.nf +++ b/modules/local/illumina_filterandtrim.nf @@ -8,7 +8,7 @@ process ILLUMINA_DADA2_FILTER_AND_TRIM { tuple val(meta), path(reads) output: - tuple val(meta), path("${meta.id}.R[12].filtered.fastq.gz"), optional: true, emit: trimmed + tuple val(meta), path("${meta.id}.R[12].trim.fastq.gz"), optional: true, emit: trimmed path("*.trimmed.txt"), emit: trimmed_report when: @@ -22,9 +22,9 @@ process ILLUMINA_DADA2_FILTER_AND_TRIM { suppressPackageStartupMessages(library(dada2)) out <- filterAndTrim(fwd = "${reads[0]}", - filt = "${meta.id}.R1.filtered.fastq.gz", + filt = "${meta.id}.R1.trim.fastq.gz", rev = if("${reads[1]}" == "null") NULL else "${reads[1]}", - filt.rev = if("${reads[1]}" == "null") NULL else "${meta.id}.R2.filtered.fastq.gz", + filt.rev = if("${reads[1]}" == "null") NULL else "${meta.id}.R2.trim.fastq.gz", trimLeft = if("${reads[1]}" == "null") ${params.trim_for} else c(${params.trim_for}, ${params.trim_rev}), truncLen = if("${reads[1]}" == "null") ${params.trunc_for} else c(${params.trunc_for}, ${params.trunc_rev}), maxEE = if("${reads[1]}" == "null") ${params.maxEE_for} else c(${params.maxEE_for}, ${params.maxEE_rev}), @@ -47,8 +47,8 @@ process ILLUMINA_DADA2_FILTER_AND_TRIM { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.R1.filtered.fastq.gz - touch ${prefix}.R2.filtered.fastq.gz + touch ${prefix}.R1.trim.fastq.gz + touch ${prefix}.R2.trim.fastq.gz touch ${prefix}.trimmed.txt """ } \ No newline at end of file diff --git a/modules/local/learnerrors/main.nf b/modules/local/learnerrors/main.nf index 3799fd6..139f13b 100644 --- a/modules/local/learnerrors/main.nf +++ b/modules/local/learnerrors/main.nf @@ -9,7 +9,6 @@ process DADA2_LEARN_ERRORS { output: tuple val(readmode), path("errors.${readmode}.RDS"), emit: error_models - // tuple val(readmode), path("dereps.${readmode}.RDS"), emit: dereps_full path("${readmode}*.err.pdf"), emit: pdf when: @@ -48,7 +47,7 @@ process DADA2_LEARN_ERRORS { } # File parsing - filts <- list.files('.', pattern=paste0("${readmode}",".filtered.fastq.gz"), full.names = TRUE) + filts <- list.files('.', pattern=".trim.fastq.gz", full.names = TRUE) set.seed(${params.random_seed}) diff --git a/modules/local/mergetrimtables.nf b/modules/local/mergetrimtables.nf index 38e7c59..3f3a4d8 100644 --- a/modules/local/mergetrimtables.nf +++ b/modules/local/mergetrimtables.nf @@ -42,11 +42,11 @@ process MERGE_TRIM_TABLES { } # gather files and load - cutadapt_files <- list.files(path = ".", pattern = "*.cutadapt.out") + cutadapt_files <- list.files(path = ".", pattern = "*.cutadapt.log") cutadapt_sample_data <- lapply(cutadapt_files, read_cutadapt) # fix sample names - nms <- gsub(".cutadapt.out", "", cutadapt_files) + nms <- gsub(".cutadapt.log", "", cutadapt_files) names(cutadapt_sample_data) <- nms # only keep some data diff --git a/modules/local/pacbio_cutadapt.nf b/modules/local/pacbio_cutadapt.nf index 43278f6..b362226 100644 --- a/modules/local/pacbio_cutadapt.nf +++ b/modules/local/pacbio_cutadapt.nf @@ -20,7 +20,7 @@ process PACBIO_CUTADAPT { script: maxN = params.maxN >=0 ? "--max-n ${params.maxN}" : "" maxEE = "--max-ee ${[params.maxEE_for,params.maxEE_rev].max()}" - strictness = params.pacbio_strict_match ? '-g' : '-a' + strictness = params.cutadapt_strict_match ? '-g' : '-a' min_len = params.min_read_len ? "-m ${params.min_read_len}" : "-m 50" max_len = params.max_read_len != "Inf" ? "-M ${params.max_read_len}" : "" """ diff --git a/modules/local/pacbio_filterandtrim.nf b/modules/local/pacbio_filterandtrim.nf index c8e0151..a0b6355 100644 --- a/modules/local/pacbio_filterandtrim.nf +++ b/modules/local/pacbio_filterandtrim.nf @@ -8,7 +8,7 @@ process PACBIO_DADA2_FILTER_AND_TRIM { tuple val(meta), path(reads) output: - tuple val(meta), path("${meta.id}.R1.filtered.fastq.gz"), optional: true, emit: trimmed + tuple val(meta), path("${meta.id}.R1.trim.fastq.gz"), optional: true, emit: trimmed path("*.trimmed.txt"), emit: trimmed_report when: @@ -24,7 +24,7 @@ process PACBIO_DADA2_FILTER_AND_TRIM { suppressPackageStartupMessages(library(Biostrings)) out2 <- filterAndTrim(fwd = "${reads}", - filt = "${meta.id}.R1.filtered.fastq.gz", + filt = "${meta.id}.R1.trim.fastq.gz", maxEE = ${params.maxEE_for}, maxN = ${params.maxN}, maxLen = ${params.max_read_len}, @@ -41,8 +41,8 @@ process PACBIO_DADA2_FILTER_AND_TRIM { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.R1.filtered.fastq.gz - touch ${prefix}.R2.filtered.fastq.gz + touch ${prefix}.R1.trim.fastq.gz + touch ${prefix}.R2.trim.fastq.gz touch ${prefix}.trimmed.txt """ } \ No newline at end of file diff --git a/modules/local/pooledseqtable.nf b/modules/local/pooledseqtable.nf index 39a11e2..58b633a 100644 --- a/modules/local/pooledseqtable.nf +++ b/modules/local/pooledseqtable.nf @@ -197,7 +197,7 @@ process DADA2_POOLED_SEQTABLE { mergers_summary <- as.data.frame(sapply(mergers, function(x) sum(getUniques(x %>% filter(accept))))) colnames(mergers_summary) <- c("dada2.pooled.merged") - nms <- gsub(".R1.filtered.fastq.gz", "", rownames(mergers_summary)) + nms <- gsub("(.R1)?.trim.fastq.gz", "", rownames(mergers_summary)) mergers_summary <- mergers_summary %>% as_tibble() %>% mutate(SampleID = nms, .before = 1) @@ -213,8 +213,8 @@ process DADA2_POOLED_SEQTABLE { saveRDS(seqtab, "seqtab.full.RDS") seqtab_stats <- rowSums(seqtab) - nms <- gsub(".R1.filtered.fastq.gz", "", names(seqtab_stats)) - seqtab_stats <- as_tibble_col(seqtab_stats, column_name = "dada.pooled.seqtab.raw") %>% + nms <- gsub("(.R1)?.trim.fastq.gz", "", names(seqtab_stats)) + seqtab_stats <- as_tibble_col(seqtab_stats, column_name = "dada2.pooled.seqtab.raw") %>% mutate(SampleID = nms, .before = 1) write_csv(seqtab_stats, "seqtab.original.pooled.${readmode}.csv") @@ -232,8 +232,8 @@ process DADA2_POOLED_SEQTABLE { if (${params.min_asv_len} > 0 | ${params.max_asv_len} > 0) { seqtab_stats <- rowSums(seqtab) - nms <- gsub(".R1.filtered.fastq.gz", "", names(seqtab_stats)) - seqtab_stats <- as_tibble_col(seqtab_stats, column_name = "dada.pooled.seqtab.lengthfiltered") %>% + nms <- gsub("(.R1)?.trim.fastq.gz", "", names(seqtab_stats)) + seqtab_stats <- as_tibble_col(seqtab_stats, column_name = "dada2.pooled.seqtab.lengthfiltered") %>% mutate(SampleID = nms, .before = 1) write_csv(seqtab_stats, "seqtab.${readmode}.lengthfiltered.csv") } diff --git a/modules/local/removechimeras.nf b/modules/local/removechimeras.nf index 9eda2a9..73dbef2 100644 --- a/modules/local/removechimeras.nf +++ b/modules/local/removechimeras.nf @@ -35,7 +35,7 @@ process DADA2_REMOVE_CHIMERAS { # read tracking seqtab.nonchim <- rowSums(seqtab) - nms <- gsub('.R1.filtered.fastq.gz', '',names(seqtab.nonchim)) + nms <- gsub('(.R1)?.trim.fastq.gz', '',names(seqtab.nonchim)) nms <- gsub(".dd\$", "", nms) seqtab.nonchim <- as_tibble_col(seqtab.nonchim, column_name = "dada2.nonchim") %>% mutate(SampleID = nms, .before = 1) diff --git a/modules/local/renameasvs.nf b/modules/local/renameasvs.nf index 24fb620..6cb245a 100644 --- a/modules/local/renameasvs.nf +++ b/modules/local/renameasvs.nf @@ -52,9 +52,9 @@ process RENAME_ASVS { writeFasta(seqs.dna.raw, file = 'asvs.${params.id_type}.raw.fna') # replace rownames - rownames(st) <- gsub(".R1.filtered.fastq.gz", "", rownames(st)) + rownames(st) <- gsub("(.R1)?.trim.fastq.gz", "", rownames(st)) rownames(st) <- gsub(".dd\$", "", rownames(st)) - rownames(st.raw) <- gsub(".R1.filtered.fastq.gz", "", rownames(st.raw)) + rownames(st.raw) <- gsub("(.R1)?.trim.fastq.gz", "", rownames(st.raw)) rownames(st.raw) <- gsub(".dd\$", "", rownames(st.raw)) # Write modified data (note we only keep the no-chimera reads for the next stage) diff --git a/modules/nf-core/cutadapt/environment.yml b/modules/nf-core/cutadapt/environment.yml index d32a8f9..b1ebb92 100644 --- a/modules/nf-core/cutadapt/environment.yml +++ b/modules/nf-core/cutadapt/environment.yml @@ -1,7 +1,7 @@ -name: cutadapt +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::cutadapt=3.4 + - bioconda::cutadapt=5.0 diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf index 69a17ff..84d97d4 100644 --- a/modules/nf-core/cutadapt/main.nf +++ b/modules/nf-core/cutadapt/main.nf @@ -4,8 +4,8 @@ process CUTADAPT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cutadapt:3.4--py39h38f01e4_1' : - 'biocontainers/cutadapt:3.4--py39h38f01e4_1' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/17/1758869538eb8e658077cc14cd7a4e76fd9b6d73d3a68f85a70bf292e39e27c5/data' : + 'community.wave.seqera.io/library/cutadapt:5.0--991bbd2e184b7014' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/cutadapt/meta.yml b/modules/nf-core/cutadapt/meta.yml index 5ecfe27..86179a5 100644 --- a/modules/nf-core/cutadapt/meta.yml +++ b/modules/nf-core/cutadapt/meta.yml @@ -12,35 +12,50 @@ tools: documentation: https://cutadapt.readthedocs.io/en/stable/index.html doi: 10.14806/ej.17.1.200 licence: ["MIT"] + identifier: biotools:cutadapt input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: The trimmed/modified fastq reads - pattern: "*fastq.gz" - - log: - type: file - description: cuatadapt log file - pattern: "*cutadapt.log" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.trim.fastq.gz": + type: file + description: The trimmed/modified fastq reads + pattern: "*fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: cuatadapt log file + pattern: "*cutadapt.log" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@kevinmenden" diff --git a/modules/nf-core/cutadapt/tests/main.nf.test b/modules/nf-core/cutadapt/tests/main.nf.test index b7ea6ef..36927bd 100644 --- a/modules/nf-core/cutadapt/tests/main.nf.test +++ b/modules/nf-core/cutadapt/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { input[0] = [ [ id: 'test', single_end:true ], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] """ @@ -47,8 +47,8 @@ nextflow_process { input[0] = [ [ id: 'test', single_end:false ], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] """ diff --git a/modules/nf-core/cutadapt/tests/main.nf.test.snap b/modules/nf-core/cutadapt/tests/main.nf.test.snap index 2a18d5e..6817ac2 100644 --- a/modules/nf-core/cutadapt/tests/main.nf.test.snap +++ b/modules/nf-core/cutadapt/tests/main.nf.test.snap @@ -7,7 +7,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-01T11:39:48.365966549" + "timestamp": "2024-03-06T10:27:15.235936866" }, "sarscov2 Illumina paired-end [fastq]": { "content": [ @@ -17,30 +17,30 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-01T11:40:00.412014817" + "timestamp": "2024-03-06T10:27:24.38468252" }, "versions_paired_end": { "content": [ [ - "versions.yml:md5,d37c5b9e465accf6d836972608795071" + "versions.yml:md5,8428231c6f665759beec10b0aba11075" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-02-01T11:40:00.418994067" + "timestamp": "2025-04-01T10:51:18.715277676" }, "versions_single_end": { "content": [ [ - "versions.yml:md5,d37c5b9e465accf6d836972608795071" + "versions.yml:md5,8428231c6f665759beec10b0aba11075" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-02-01T11:39:48.343259719" + "timestamp": "2025-04-01T10:51:14.492236529" } } \ No newline at end of file diff --git a/modules/nf-core/cutadapt/tests/tags.yml b/modules/nf-core/cutadapt/tests/tags.yml deleted file mode 100644 index f64f997..0000000 --- a/modules/nf-core/cutadapt/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -cutadapt: - - modules/nf-core/cutadapt/** diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 2b2e62b..c8d9d02 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -29,9 +29,10 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + ontologies: [] output: - - html: - - meta: + html: + - - meta: type: map description: | Groovy Map containing sample information @@ -40,8 +41,9 @@ output: type: file description: FastQC report pattern: "*_{fastqc.html}" - - zip: - - meta: + ontologies: [] + zip: + - - meta: type: map description: | Groovy Map containing sample information @@ -50,11 +52,14 @@ output: type: file description: FastQC report archive pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/fasttree/environment.yml b/modules/nf-core/fasttree/environment.yml index 2650948..48f9ff8 100644 --- a/modules/nf-core/fasttree/environment.yml +++ b/modules/nf-core/fasttree/environment.yml @@ -1,7 +1,7 @@ -name: fasttree +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fasttree=2.1.10 diff --git a/modules/nf-core/fasttree/meta.yml b/modules/nf-core/fasttree/meta.yml index 60fec06..6146189 100644 --- a/modules/nf-core/fasttree/meta.yml +++ b/modules/nf-core/fasttree/meta.yml @@ -1,28 +1,38 @@ name: fasttree -description: Produces a Newick format phylogeny from a multiple sequence alignment. Capable of bacterial genome size alignments. +description: Produces a Newick format phylogeny from a multiple sequence alignment. + Capable of bacterial genome size alignments. keywords: - phylogeny - newick + - alignment tools: - fasttree: - description: FastTree infers approximately-maximum-likelihood phylogenetic trees from alignments of nucleotide or protein sequences + description: FastTree infers approximately-maximum-likelihood phylogenetic trees + from alignments of nucleotide or protein sequences homepage: http://www.microbesonline.org/fasttree/ documentation: http://www.microbesonline.org/fasttree/#Usage licence: ["GPL v2"] + identifier: biotools:fasttree input: - alignment: type: file description: A FASTA format multiple sequence alignment file pattern: "*.{fasta,fas,fa,mfa}" + ontologies: [] output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - phylogeny: - type: file - description: A phylogeny in Newick format - pattern: "*.{tre}" + phylogeny: + - "*.tre": + type: file + description: A phylogeny in Newick format + pattern: "*.{tre}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@aunderwo" maintainers: diff --git a/modules/nf-core/fasttree/tests/main.nf.test b/modules/nf-core/fasttree/tests/main.nf.test new file mode 100644 index 0000000..1ae1eca --- /dev/null +++ b/modules/nf-core/fasttree/tests/main.nf.test @@ -0,0 +1,31 @@ + +nextflow_process { + + name "Test Process FASTTREE" + script "../main.nf" + process "FASTTREE" + + tag "modules" + tag "modules_nfcore" + tag "fasttree" + + test("test-fasttree") { + + when { + process { + """ + input[0] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/informative_sites.fas', checkIfExists: true) ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/fasttree/tests/main.nf.test.snap b/modules/nf-core/fasttree/tests/main.nf.test.snap new file mode 100644 index 0000000..70fcd64 --- /dev/null +++ b/modules/nf-core/fasttree/tests/main.nf.test.snap @@ -0,0 +1,25 @@ +{ + "test-fasttree": { + "content": [ + { + "0": [ + "fasttree_phylogeny.tre:md5,63a886117535847c1e66fa4487f3b7d2" + ], + "1": [ + "versions.yml:md5,191c8d707d56355a8ff3726902294b48" + ], + "phylogeny": [ + "fasttree_phylogeny.tre:md5,63a886117535847c1e66fa4487f3b7d2" + ], + "versions": [ + "versions.yml:md5,191c8d707d56355a8ff3726902294b48" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-28T13:39:53.138141" + } +} \ No newline at end of file diff --git a/modules/nf-core/mafft/align/tests/main.nf.test b/modules/nf-core/mafft/align/tests/main.nf.test index 660a897..62ed3c0 100644 --- a/modules/nf-core/mafft/align/tests/main.nf.test +++ b/modules/nf-core/mafft/align/tests/main.nf.test @@ -246,4 +246,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/nf-core/mmseqs/createdb/environment.yml b/modules/nf-core/mmseqs/createdb/environment.yml index 69afa60..072223f 100644 --- a/modules/nf-core/mmseqs/createdb/environment.yml +++ b/modules/nf-core/mmseqs/createdb/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::mmseqs2=17.b804f + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/createdb/main.nf b/modules/nf-core/mmseqs/createdb/main.nf index 6f8d5b1..2a0aa0b 100644 --- a/modules/nf-core/mmseqs/createdb/main.nf +++ b/modules/nf-core/mmseqs/createdb/main.nf @@ -1,18 +1,18 @@ process MMSEQS_CREATEDB { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mmseqs2:17.b804f--hd6d6fdc_1': - 'biocontainers/mmseqs2:17.b804f--hd6d6fdc_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" input: tuple val(meta), path(sequence) output: tuple val(meta), path("${prefix}/"), emit: db - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -33,7 +33,7 @@ process MMSEQS_CREATEDB { createdb \\ ${sequence_name} \\ ${prefix}/${prefix} \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -45,6 +45,7 @@ process MMSEQS_CREATEDB { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ + echo ${args} mkdir -p ${prefix} touch ${prefix}/${prefix} diff --git a/modules/nf-core/mmseqs/createdb/meta.yml b/modules/nf-core/mmseqs/createdb/meta.yml index c392a36..14b6f7d 100644 --- a/modules/nf-core/mmseqs/createdb/meta.yml +++ b/modules/nf-core/mmseqs/createdb/meta.yml @@ -29,9 +29,11 @@ input: description: Input sequences in FASTA/Q (zipped or unzipped) format to parse into an mmseqs database pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,fastq,fastq.gz,fq,fq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ output: - - db: - - meta: + db: + - - meta: type: map description: | Groovy Map containing sample information @@ -39,11 +41,13 @@ output: - ${prefix}/: type: directory description: The created MMseqs2 database - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@Joon-Klaps" maintainers: diff --git a/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap b/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap index 9eee149..b066e33 100644 --- a/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap +++ b/modules/nf-core/mmseqs/createdb/tests/main.nf.test.snap @@ -20,14 +20,14 @@ ] ], [ - "versions.yml:md5,c62b08152082097334109fe08ec6333a" + "versions.yml:md5,02a6806d10988baae0ab644d671f113c" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T14:11:57.883871" + "timestamp": "2025-11-01T16:18:26.983758708" }, "Should build an mmseqs db from a zipped amino acid sequence file": { "content": [ @@ -49,13 +49,13 @@ ] ], [ - "versions.yml:md5,c62b08152082097334109fe08ec6333a" + "versions.yml:md5,02a6806d10988baae0ab644d671f113c" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T14:12:10.986433" + "timestamp": "2025-11-01T16:18:31.492266142" } } \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createindex/environment.yml b/modules/nf-core/mmseqs/createindex/environment.yml index 69afa60..072223f 100644 --- a/modules/nf-core/mmseqs/createindex/environment.yml +++ b/modules/nf-core/mmseqs/createindex/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::mmseqs2=17.b804f + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/createindex/main.nf b/modules/nf-core/mmseqs/createindex/main.nf index 1da528d..fbff56f 100644 --- a/modules/nf-core/mmseqs/createindex/main.nf +++ b/modules/nf-core/mmseqs/createindex/main.nf @@ -1,11 +1,12 @@ process MMSEQS_CREATEINDEX { tag "${meta.id}" label 'process_high' + label 'process_high_memory' conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/mmseqs2:17.b804f--hd6d6fdc_1' - : 'biocontainers/mmseqs2:17.b804f--hd6d6fdc_1'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" input: tuple val(meta), path(db) @@ -20,8 +21,6 @@ process MMSEQS_CREATEINDEX { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: "*.dbtype" - def prefix = task.ext.prefix ?: "${meta.id}" - """ DB_INPUT_PATH_NAME=\$(find -L "${db}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) @@ -30,7 +29,8 @@ process MMSEQS_CREATEINDEX { \${DB_INPUT_PATH_NAME} \\ tmp1 \\ ${args} \\ - --threads ${task.cpus} + --threads ${task.cpus} \\ + --split-memory-limit ${(task.memory.toGiga() * 0.8) as int}G cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -43,7 +43,7 @@ process MMSEQS_CREATEINDEX { """ DB_INPUT_PATH_NAME=\$(find -L "${db}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) - touch "\${DB_PATH_NAME}.idx" + touch "\${DB_INPUT_PATH_NAME}.idx" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/mmseqs/createindex/meta.yml b/modules/nf-core/mmseqs/createindex/meta.yml index c118ea3..58b65c7 100644 --- a/modules/nf-core/mmseqs/createindex/meta.yml +++ b/modules/nf-core/mmseqs/createindex/meta.yml @@ -28,8 +28,8 @@ input: Directory containing the DB to be indexed pattern: "*" output: - - db_indexed: - - meta: + db_indexed: + - - meta: type: directory description: | Directory containing the DB and the generated indexes @@ -39,12 +39,14 @@ output: description: | Directory containing the DB and the generated indexes pattern: "*" - - versions: - - versions.yml: - type: file - description: | - File containing software versions - pattern: "versions.yml" + versions: + - versions.yml: + type: file + description: | + File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@JoseEspinosa" maintainers: diff --git a/modules/nf-core/mmseqs/createindex/tests/main.nf.test.snap b/modules/nf-core/mmseqs/createindex/tests/main.nf.test.snap index f18d49d..499f8ab 100644 --- a/modules/nf-core/mmseqs/createindex/tests/main.nf.test.snap +++ b/modules/nf-core/mmseqs/createindex/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "versions": { "content": [ [ - "versions.yml:md5,9745ec52ea0356d97ef2f5c18c01d7e7" + "versions.yml:md5,35132b6bcef6fb9a674a5f346c57282d" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T17:15:56.23597" + "timestamp": "2025-11-01T16:18:37.261481775" }, "createindex_filtered_files": { "content": [ @@ -18,7 +18,7 @@ "mmseqs.dbtype:md5,f1d3ff8443297732862df21dc4e57262", "mmseqs.fasta:md5,b40600ad3be77f076df716e6cf99c64c", "mmseqs.idx.dbtype:md5,9a198d4f48144e20661df7fd2dc41bf7", - "mmseqs.idx.index:md5,7c1dc5d8044ab7e0535d0ba4d7b7ab07", + "mmseqs.idx.index:md5,3451d87bd8a5a7182d36d53edd213c59", "mmseqs.index:md5,c012bdab1c61eeafcb99d1b26650f3d0", "mmseqs.lookup:md5,fa898551a6b303614ae6e29c237b7fc6", "mmseqs.source:md5,16bef02c30aadbfa8d035596502f0aa2", @@ -55,8 +55,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T17:15:56.186115" + "timestamp": "2025-11-01T16:18:37.253778342" } } \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createtsv/environment.yml b/modules/nf-core/mmseqs/createtsv/environment.yml index 69afa60..072223f 100644 --- a/modules/nf-core/mmseqs/createtsv/environment.yml +++ b/modules/nf-core/mmseqs/createtsv/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::mmseqs2=17.b804f + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/createtsv/main.nf b/modules/nf-core/mmseqs/createtsv/main.nf index 3ab0159..1162944 100644 --- a/modules/nf-core/mmseqs/createtsv/main.nf +++ b/modules/nf-core/mmseqs/createtsv/main.nf @@ -1,37 +1,39 @@ - process MMSEQS_CREATETSV { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mmseqs2:17.b804f--hd6d6fdc_1': - 'biocontainers/mmseqs2:17.b804f--hd6d6fdc_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" input: tuple val(meta), path(db_result) - tuple val(meta2), path(db_query) - tuple val(meta3), path(db_target) + tuple val(meta2), path(db_query, stageAs: "db_query") + tuple val(meta3), path(db_target, stageAs: "db_target") output: tuple val(meta), path("*.tsv"), emit: tsv - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: "*.dbtype" // database generated by mmyseqs cluster | search | taxonomy | ... - def args3 = task.ext.args3 ?: "*.dbtype" // database generated by mmyseqs/createdb - def args4 = task.ext.args4 ?: "*.dbtype" // database generated by mmyseqs/createdb + def args2 = task.ext.args2 ?: "*.dbtype" + // database generated by mmyseqs cluster | search | taxonomy | ... + def args3 = task.ext.args3 ?: "*.dbtype" + // database generated by mmyseqs/createdb + def args4 = task.ext.args4 ?: "*.dbtype" + // database generated by mmyseqs/createdb def prefix = task.ext.prefix ?: "${meta.id}" """ # Extract files with specified args based suffix | remove suffix | isolate longest common substring of files - DB_RESULT_PATH_NAME=\$(find -L "$db_result/" -maxdepth 1 -name "$args2" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) - DB_QUERY_PATH_NAME=\$(find -L "$db_query/" -maxdepth 1 -name "$args3" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) - DB_TARGET_PATH_NAME=\$(find -L "$db_target/" -maxdepth 1 -name "$args4" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_RESULT_PATH_NAME=\$(find -L "${db_result}/" -maxdepth 1 -name "${args2}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_QUERY_PATH_NAME=\$(find -L "${db_query}/" -maxdepth 1 -name "${args3}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) + DB_TARGET_PATH_NAME=\$(find -L "${db_target}/" -maxdepth 1 -name "${args4}" | sed 's/\\.[^.]*\$//' | sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' ) mmseqs \\ createtsv \\ @@ -39,7 +41,7 @@ process MMSEQS_CREATETSV { \$DB_TARGET_PATH_NAME \\ \$DB_RESULT_PATH_NAME \\ ${prefix}.tsv \\ - $args \\ + ${args} \\ --threads ${task.cpus} cat <<-END_VERSIONS > versions.yml @@ -52,6 +54,7 @@ process MMSEQS_CREATETSV { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ + echo ${args} touch ${prefix}.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/mmseqs/createtsv/meta.yml b/modules/nf-core/mmseqs/createtsv/meta.yml index 5a50ff3..f6df598 100644 --- a/modules/nf-core/mmseqs/createtsv/meta.yml +++ b/modules/nf-core/mmseqs/createtsv/meta.yml @@ -48,8 +48,8 @@ input: description: an MMseqs2 database with target data output: #Only when we have meta - - tsv: - - meta: + tsv: + - - meta: type: map description: | Groovy Map containing sample information @@ -59,11 +59,15 @@ output: description: The resulting tsv file created using the query, target and result MMseqs databases pattern: "*.{tsv}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@Joon-Klaps" maintainers: diff --git a/modules/nf-core/mmseqs/createtsv/tests/main.nf.test b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test index 1aa7463..0e532d2 100644 --- a/modules/nf-core/mmseqs/createtsv/tests/main.nf.test +++ b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test @@ -244,4 +244,4 @@ nextflow_process { ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap index a70f839..c88667a 100644 --- a/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap +++ b/modules/nf-core/mmseqs/createtsv/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,ce808eb9a57e201a48afec56168f9e77" + "versions.yml:md5,7060e304518b4ebce491bdba11f7af56" ], "tsv": [ [ @@ -24,15 +24,15 @@ ] ], "versions": [ - "versions.yml:md5,ce808eb9a57e201a48afec56168f9e77" + "versions.yml:md5,7060e304518b4ebce491bdba11f7af56" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T17:29:15.220926" + "timestamp": "2025-11-01T16:20:20.125440957" }, "mmseqs/createtsv - sarscov2 - cluster - stub": { "content": [ @@ -47,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,ce808eb9a57e201a48afec56168f9e77" + "versions.yml:md5,7060e304518b4ebce491bdba11f7af56" ], "tsv": [ [ @@ -59,15 +59,15 @@ ] ], "versions": [ - "versions.yml:md5,ce808eb9a57e201a48afec56168f9e77" + "versions.yml:md5,7060e304518b4ebce491bdba11f7af56" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T17:29:32.089204" + "timestamp": "2025-11-01T16:20:24.652713814" }, "mmseqs/createtsv - bacteroides_fragilis - taxonomy": { "content": [ @@ -78,11 +78,11 @@ "id": "test_query", "single_end": false }, - "test_query.tsv:md5,9179f5c85b8b87a4dc998c9d17840161" + "test_query.tsv:md5,7e42aeb7daeeb54739e84fc6ecb0b5fc" ] ], "1": [ - "versions.yml:md5,ce808eb9a57e201a48afec56168f9e77" + "versions.yml:md5,7060e304518b4ebce491bdba11f7af56" ], "tsv": [ [ @@ -90,19 +90,19 @@ "id": "test_query", "single_end": false }, - "test_query.tsv:md5,9179f5c85b8b87a4dc998c9d17840161" + "test_query.tsv:md5,7e42aeb7daeeb54739e84fc6ecb0b5fc" ] ], "versions": [ - "versions.yml:md5,ce808eb9a57e201a48afec56168f9e77" + "versions.yml:md5,7060e304518b4ebce491bdba11f7af56" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T17:28:41.472818" + "timestamp": "2025-11-01T16:20:11.522513714" }, "mmseqs/createtsv - sarscov2 - cluster": { "content": [ @@ -117,7 +117,7 @@ ] ], "1": [ - "versions.yml:md5,ce808eb9a57e201a48afec56168f9e77" + "versions.yml:md5,7060e304518b4ebce491bdba11f7af56" ], "tsv": [ [ @@ -129,14 +129,14 @@ ] ], "versions": [ - "versions.yml:md5,ce808eb9a57e201a48afec56168f9e77" + "versions.yml:md5,7060e304518b4ebce491bdba11f7af56" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T17:28:58.633976" + "timestamp": "2025-11-01T16:20:16.048107949" } } \ No newline at end of file diff --git a/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config b/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config index f08205d..95d4c65 100644 --- a/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config +++ b/modules/nf-core/mmseqs/createtsv/tests/taxonomy.nextflow.config @@ -2,6 +2,7 @@ process { withName: MMSEQS_TAXONOMY { ext.args = '--search-type 2' + memory = 7.GB } } diff --git a/modules/nf-core/mmseqs/easysearch/environment.yml b/modules/nf-core/mmseqs/easysearch/environment.yml index 69afa60..072223f 100644 --- a/modules/nf-core/mmseqs/easysearch/environment.yml +++ b/modules/nf-core/mmseqs/easysearch/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::mmseqs2=17.b804f + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/easysearch/main.nf b/modules/nf-core/mmseqs/easysearch/main.nf index bfc5e45..a7618fa 100644 --- a/modules/nf-core/mmseqs/easysearch/main.nf +++ b/modules/nf-core/mmseqs/easysearch/main.nf @@ -4,8 +4,8 @@ process MMSEQS_EASYSEARCH { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/mmseqs2:17.b804f--hd6d6fdc_1' - : 'biocontainers/mmseqs2:17.b804f--hd6d6fdc_1'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" input: tuple val(meta), path(fasta) @@ -49,6 +49,8 @@ process MMSEQS_EASYSEARCH { def args2 = task.ext.args2 ?: "*.dbtype" prefix = task.ext.prefix ?: "${meta.id}" """ + echo ${args} + echo ${args2} touch ${prefix}.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/mmseqs/easysearch/meta.yml b/modules/nf-core/mmseqs/easysearch/meta.yml index 972fbe3..e1ef1a0 100644 --- a/modules/nf-core/mmseqs/easysearch/meta.yml +++ b/modules/nf-core/mmseqs/easysearch/meta.yml @@ -25,6 +25,7 @@ input: type: file description: Fasta file pattern: "*.{fa,fasta}" + ontologies: [] - - meta2: type: map description: | @@ -34,8 +35,8 @@ input: type: directory description: an MMseqs2 database with target data, e.g. uniref90 output: - - tsv: - - meta: + tsv: + - - meta: type: map description: | Groovy Map containing input fasta file information @@ -44,11 +45,15 @@ output: type: file description: tsv file with the results of the search pattern: "*.{tsv}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@luisas" maintainers: diff --git a/modules/nf-core/mmseqs/easysearch/tests/main.nf.test.snap b/modules/nf-core/mmseqs/easysearch/tests/main.nf.test.snap index b56170f..3602dbe 100644 --- a/modules/nf-core/mmseqs/easysearch/tests/main.nf.test.snap +++ b/modules/nf-core/mmseqs/easysearch/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,0459b89b5e5e838b3d5ee718443e08da" + "versions.yml:md5,49e9163a8d6fe64452d79e4442d3cbb0" ], "tsv": [ [ @@ -24,15 +24,15 @@ ] ], "versions": [ - "versions.yml:md5,0459b89b5e5e838b3d5ee718443e08da" + "versions.yml:md5,49e9163a8d6fe64452d79e4442d3cbb0" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T18:11:14.000659" + "timestamp": "2025-11-01T16:21:23.344757638" }, "sarscov2 - illumina - contigs - fasta - stub": { "content": [ diff --git a/modules/nf-core/mmseqs/search/environment.yml b/modules/nf-core/mmseqs/search/environment.yml index 69afa60..072223f 100644 --- a/modules/nf-core/mmseqs/search/environment.yml +++ b/modules/nf-core/mmseqs/search/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::mmseqs2=17.b804f + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/search/main.nf b/modules/nf-core/mmseqs/search/main.nf index 31892f5..2f43220 100644 --- a/modules/nf-core/mmseqs/search/main.nf +++ b/modules/nf-core/mmseqs/search/main.nf @@ -3,9 +3,9 @@ process MMSEQS_SEARCH { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mmseqs2:17.b804f--hd6d6fdc_1': - 'biocontainers/mmseqs2:17.b804f--hd6d6fdc_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" input: tuple val(meta), path(db_query) @@ -13,7 +13,7 @@ process MMSEQS_SEARCH { output: tuple val(meta), path("${prefix}/"), emit: db_search - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -40,7 +40,7 @@ process MMSEQS_SEARCH { \$DB_TARGET_PATH_NAME \\ ${prefix}/${prefix} \\ tmp1 \\ - $args \\ + ${args} \\ --threads ${task.cpus} cat <<-END_VERSIONS > versions.yml @@ -52,8 +52,11 @@ process MMSEQS_SEARCH { stub: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - if ("$db_query" == "${prefix}" || "$db_target" == "${prefix}" ) error "Input and output names of databases are the same, set prefix in module configuration to disambiguate!" + if ("${db_query}" == "${prefix}" || "${db_target}" == "${prefix}") { + error("Input and output names of databases are the same, set prefix in module configuration to disambiguate!") + } """ + echo ${args} mkdir -p ${prefix} touch ${prefix}/${prefix}.{0..9} touch ${prefix}/${prefix}.dbtype diff --git a/modules/nf-core/mmseqs/search/meta.yml b/modules/nf-core/mmseqs/search/meta.yml index 8490c8b..1cbaf30 100644 --- a/modules/nf-core/mmseqs/search/meta.yml +++ b/modules/nf-core/mmseqs/search/meta.yml @@ -28,6 +28,7 @@ input: - db_query: type: file description: Query database + ontologies: [] - - meta2: type: map description: | @@ -36,9 +37,10 @@ input: - db_target: type: file description: Target database + ontologies: [] output: - - db_search: - - meta: + db_search: + - - meta: type: map description: | Groovy Map containing sample information @@ -46,11 +48,13 @@ output: - ${prefix}/: type: directory description: an MMseqs2 database with search results - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@Joon-Klaps" maintainers: diff --git a/modules/nf-core/mmseqs/search/tests/main.nf.test.snap b/modules/nf-core/mmseqs/search/tests/main.nf.test.snap index 3231487..958a897 100644 --- a/modules/nf-core/mmseqs/search/tests/main.nf.test.snap +++ b/modules/nf-core/mmseqs/search/tests/main.nf.test.snap @@ -9,15 +9,15 @@ ], { "MMSEQS_SEARCH": { - "mmseqs": "17.b804f" + "mmseqs": "18.8cc5c" } } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T18:14:57.259665" + "timestamp": "2025-11-01T16:21:39.315595228" }, "sarscov2 - test-search - stub": { "content": [ @@ -45,7 +45,7 @@ ] ], "1": [ - "versions.yml:md5,55da7c5f326efc78452de61273aaed5b" + "versions.yml:md5,c4c42b0dc1e8e97c966286c1a71628dd" ], "db_search": [ [ @@ -70,19 +70,19 @@ ] ], "versions": [ - "versions.yml:md5,55da7c5f326efc78452de61273aaed5b" + "versions.yml:md5,c4c42b0dc1e8e97c966286c1a71628dd" ] }, { "MMSEQS_SEARCH": { - "mmseqs": "17.b804f" + "mmseqs": "18.8cc5c" } } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T18:15:12.804291" + "timestamp": "2025-11-01T16:21:44.354292164" } } \ No newline at end of file diff --git a/modules/nf-core/mmseqs/taxonomy/environment.yml b/modules/nf-core/mmseqs/taxonomy/environment.yml index 69afa60..072223f 100644 --- a/modules/nf-core/mmseqs/taxonomy/environment.yml +++ b/modules/nf-core/mmseqs/taxonomy/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::mmseqs2=17.b804f + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/taxonomy/main.nf b/modules/nf-core/mmseqs/taxonomy/main.nf index d73bf03..4682fb1 100644 --- a/modules/nf-core/mmseqs/taxonomy/main.nf +++ b/modules/nf-core/mmseqs/taxonomy/main.nf @@ -1,27 +1,29 @@ process MMSEQS_TAXONOMY { - tag "$meta.id" + tag "${meta.id}" label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mmseqs2:17.b804f--hd6d6fdc_1': - 'biocontainers/mmseqs2:17.b804f--hd6d6fdc_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" input: tuple val(meta), path(db_query) - path(db_target) + path db_target output: tuple val(meta), path("${prefix}_taxonomy"), emit: db_taxonomy - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: "*.dbtype" //represents the db_query - def args3 = task.ext.args3 ?: "*.dbtype" //represents the db_target + def args2 = task.ext.args2 ?: "*.dbtype" + //represents the db_query + def args3 = task.ext.args3 ?: "*.dbtype" + //represents the db_target prefix = task.ext.prefix ?: "${meta.id}" """ @@ -37,7 +39,7 @@ process MMSEQS_TAXONOMY { \$DB_TARGET_PATH_NAME \\ ${prefix}_taxonomy/${prefix} \\ tmp1 \\ - $args \\ + ${args} \\ --threads ${task.cpus} cat <<-END_VERSIONS > versions.yml @@ -49,8 +51,8 @@ process MMSEQS_TAXONOMY { stub: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - """ + echo ${args} mkdir -p ${prefix}_taxonomy touch ${prefix}_taxonomy/${prefix}.{0..25} touch ${prefix}_taxonomy/${prefix}.dbtype diff --git a/modules/nf-core/mmseqs/taxonomy/meta.yml b/modules/nf-core/mmseqs/taxonomy/meta.yml index 15756fe..f1c2bcf 100644 --- a/modules/nf-core/mmseqs/taxonomy/meta.yml +++ b/modules/nf-core/mmseqs/taxonomy/meta.yml @@ -28,12 +28,12 @@ input: - db_query: type: directory description: An MMseqs2 database with query data - - - db_target: - type: directory - description: an MMseqs2 database with target data including the taxonomy classification + - db_target: + type: directory + description: an MMseqs2 database with target data including the taxonomy classification output: - - db_taxonomy: - - meta: + db_taxonomy: + - - meta: type: map description: | Groovy Map containing sample information @@ -41,11 +41,13 @@ output: - ${prefix}_taxonomy: type: directory description: An MMseqs2 database with target data including the taxonomy classification - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@darcy220606" maintainers: diff --git a/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test index 95f1bc2..87322cb 100644 --- a/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test +++ b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test @@ -78,4 +78,3 @@ nextflow_process { } } } - diff --git a/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap index 4402c73..1f205ce 100644 --- a/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap +++ b/modules/nf-core/mmseqs/taxonomy/tests/main.nf.test.snap @@ -8,14 +8,14 @@ "test_query.index" ], [ - "versions.yml:md5,d86f3223ff4a4d664228707b581dca8a" + "versions.yml:md5,4d20eea52ed63340786e9a41c347e25d" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T16:28:40.091017" + "timestamp": "2025-11-01T16:23:05.886445221" }, "mmseqs/taxonomy - bacteroides_fragilis - genome_nt - stub": { "content": [ @@ -59,7 +59,7 @@ ] ], "1": [ - "versions.yml:md5,d86f3223ff4a4d664228707b581dca8a" + "versions.yml:md5,4d20eea52ed63340786e9a41c347e25d" ], "db_taxonomy": [ [ @@ -100,14 +100,14 @@ ] ], "versions": [ - "versions.yml:md5,d86f3223ff4a4d664228707b581dca8a" + "versions.yml:md5,4d20eea52ed63340786e9a41c347e25d" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T16:48:57.634552" + "timestamp": "2025-11-01T16:23:10.99421377" } } \ No newline at end of file diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/environment.yml b/modules/nf-core/mmseqs/tsv2exprofiledb/environment.yml index 69afa60..072223f 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/environment.yml +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::mmseqs2=17.b804f + - bioconda::mmseqs2=18.8cc5c diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf b/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf index 086898f..f17a800 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/main.nf @@ -4,14 +4,14 @@ process MMSEQS_TSV2EXPROFILEDB { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://depot.galaxyproject.org/singularity/mmseqs2:17.b804f--hd6d6fdc_1' - : 'biocontainers/mmseqs2:17.b804f--hd6d6fdc_1'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fe/fe49c17754753d6cd9a31e5894117edaf1c81e3d6053a12bf6dc8f3af1dffe23/data' + : 'community.wave.seqera.io/library/mmseqs2:18.8cc5c--af05c9a98d9f6139'}" input: path database output: - path (database) , emit: db_exprofile + path (database), emit: db_exprofile path "versions.yml", emit: versions when: diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml b/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml index 284e168..c428ad8 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/meta.yml @@ -18,24 +18,26 @@ tools: licence: ["GPL v3"] identifier: biotools:mmseqs input: - - - database: + - database: + type: directory + description: | + Directory containing the database to be indexed + pattern: "*" +output: + db_exprofile: + - database: type: directory description: | - Directory containing the database to be indexed + Directory containing the expandable profile database pattern: "*" -output: - - db_exprofile: - - database: - type: directory - description: | - Directory containing the expandable profile database - pattern: "*" - - versions: - - versions.yml: - type: file - description: | - File containing software versions - pattern: "versions.yml" + versions: + - versions.yml: + type: file + description: | + File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@JoseEspinosa" maintainers: diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/tests/main.nf.test b/modules/nf-core/mmseqs/tsv2exprofiledb/tests/main.nf.test index 04994ca..9ddfbf8 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/tests/main.nf.test +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/tests/main.nf.test @@ -72,4 +72,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/nf-core/mmseqs/tsv2exprofiledb/tests/main.nf.test.snap b/modules/nf-core/mmseqs/tsv2exprofiledb/tests/main.nf.test.snap index d91350e..6f54e08 100644 --- a/modules/nf-core/mmseqs/tsv2exprofiledb/tests/main.nf.test.snap +++ b/modules/nf-core/mmseqs/tsv2exprofiledb/tests/main.nf.test.snap @@ -55,14 +55,14 @@ "tmp" ], [ - "versions.yml:md5,8865a91acd412cbac3add1a236c78ee2" + "versions.yml:md5,2bc25ea7ff4b6aba3f30c9f62ce87559" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T18:17:10.037107" + "timestamp": "2025-11-01T16:23:20.815325767" }, "sarscov2 - db": { "content": [ @@ -137,13 +137,13 @@ "tmp" ], [ - "versions.yml:md5,8865a91acd412cbac3add1a236c78ee2" + "versions.yml:md5,2bc25ea7ff4b6aba3f30c9f62ce87559" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.10.0" }, - "timestamp": "2025-01-20T18:16:54.792553" + "timestamp": "2025-11-01T16:23:15.838462817" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index d430da5..d02016a 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.28 + - bioconda::multiqc=1.32 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index f3b5704..5d0780a 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.28--pyhdfd78af_0' : - 'biocontainers/multiqc:1.28--pyhdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c6c120d559d7ee04c7442b61ad7cf5a9e8970be5feefb37d68eeaa60c1034eb/data' : + 'community.wave.seqera.io/library/multiqc:1.32--d58f60e4deb769bf' }" input: path multiqc_files, stageAs: "?/*" @@ -52,6 +52,7 @@ process MULTIQC { stub: """ mkdir multiqc_data + touch multiqc_data/.stub mkdir multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index b16c187..ce30eb7 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -15,57 +15,71 @@ tools: licence: ["GPL-3.0-or-later"] identifier: biotools:multiqc input: - - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections - in multiqc_config. - pattern: "*.{yml,yaml}" - - - multiqc_logo: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + ontologies: [] + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + ontologies: [] + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + report: + - "*multiqc_report.html": type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - - replace_names: + description: MultiQC report file + pattern: "multiqc_report.html" + ontologies: [] + data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + plots: + - "*_plots": type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - - sample_names: + description: Plots created by MultiQC + pattern: "*_data" + ontologies: [] + versions: + - versions.yml: type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" -output: - - report: - - "*multiqc_report.html": - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - - "*_data": - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - - "*_plots": - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 26d7430..a88bafd 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,b05075d2d2b4f485c0d627a5c8e475b2" + "versions.yml:md5,737bb2c7cad54ffc2ec020791dc48b8f" ] ], "meta": { - "nf-test": "0.9.0", + "nf-test": "0.9.3", "nextflow": "24.10.4" }, - "timestamp": "2025-03-26T16:05:18.927925" + "timestamp": "2025-10-27T13:33:24.356715" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,b05075d2d2b4f485c0d627a5c8e475b2" + "versions.yml:md5,737bb2c7cad54ffc2ec020791dc48b8f" ] ], "meta": { - "nf-test": "0.9.0", + "nf-test": "0.9.3", "nextflow": "24.10.4" }, - "timestamp": "2025-03-26T16:05:55.639955" + "timestamp": "2025-10-27T13:34:11.103619" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,b05075d2d2b4f485c0d627a5c8e475b2" + "versions.yml:md5,737bb2c7cad54ffc2ec020791dc48b8f" ] ], "meta": { - "nf-test": "0.9.0", + "nf-test": "0.9.3", "nextflow": "24.10.4" }, - "timestamp": "2025-03-26T16:05:44.067369" + "timestamp": "2025-10-27T13:34:04.615233" } } \ No newline at end of file diff --git a/modules/nf-core/muscle/tests/main.nf.test b/modules/nf-core/muscle/tests/main.nf.test index c835c22..6f3a4b1 100644 --- a/modules/nf-core/muscle/tests/main.nf.test +++ b/modules/nf-core/muscle/tests/main.nf.test @@ -92,4 +92,4 @@ nextflow_process { } } -} \ No newline at end of file +} diff --git a/modules/nf-core/muscle5/super5/tests/main.nf.test b/modules/nf-core/muscle5/super5/tests/main.nf.test index 1199438..d1b7983 100644 --- a/modules/nf-core/muscle5/super5/tests/main.nf.test +++ b/modules/nf-core/muscle5/super5/tests/main.nf.test @@ -81,4 +81,4 @@ nextflow_process { ) } } -} \ No newline at end of file +} diff --git a/nextflow.config b/nextflow.config index 1a329c6..c9219c3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -8,32 +8,28 @@ // Global default params, used in configs params { - - // TODO nf-core: Specify your pipeline's command line flags - // Input options (sample sheet) input = null - // References - // TODO: some of these (igenomes*, genome*) don't make sense - // in the context of this workflow. Remove? - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false - fasta = null // MultiQC options multiqc_config = null multiqc_title = null multiqc_logo = null max_multiqc_email_size = '25.MB' multiqc_methods_description = null - // TODO: this needs to be removed or made more specific + // TODO: these aren't used at the moment. With some it's possible + // to use the name as a key to preset parameters, and with others + // run a Figaro-like step in preqc to evaluate reads and determine + // potential downstream settings. amplicon = "16S" amplicon_name = "" min_predicted_length = 0 max_predicted_length = 0 - - // NYI, experimental paired_type = "overlapping" // "full_length", "overlapping", "dovetail", "mix", "nonoverlapping" + + // TODO: platform is used in a generally hacky way here, but we + // could use this to possibly preset some parameters platform = "illumina" // "illumina", "pacbio"; ONT, 454, Element, others may be added + + // TODO: remove and determine on the fly from the sample sheet strategy = "paired" // method for trimming and denoising reads; "single" or "paired" // QC @@ -49,16 +45,20 @@ params { // Trimming skip_trimming = false - // TODO: implement trimming/filtering subworkflow trimmer = "dada2" // when true (default), this sets cutadapt's trimming - // (which uses linked adapters) to require *both* primers be present. + // (which uses linked adapters) to require + // *both* primers be present. + // With some kits like StrainID this can be an issue (can have // some truncated reads at the 5' or 3' end) and so can be relaxed // by setting to false. - // TODO: may make this less platform-specific - pacbio_strict_match = true + cutadapt_strict_match = true + + // for paired end data only: set if there is a potential for R1 + // to sequence into the 5' primer for R2 (e.g., as seen with ITS) + cutadapt_dovetail = false // required for cutadapt for_primer = "" @@ -84,11 +84,7 @@ params { rmPhiX = false // learnErrors options - // TODO: deprecate quality_binning - // Note this is the original fix for binned quality data, which - // simply flattens the rate. There are better methods for this now, - // including a supported method added to dada2 in more recent - // releases + // TODO: deprecate quality_binning in favor of models // Set to true if using binned qualities (NovaSeq, PacBio Revio) quality_binning = false // NYI @@ -100,7 +96,8 @@ params { // this is currently required to be set if makeBinnedQualErrfun is set learnerrors_quality_bins = "" - // TODO: some of the common ones overlap with dada options, // so we should try finding some way to make this simpler + // TODO: some of the common ones overlap with dada options, + // so we should try finding some way to make this simpler // between the two learnerrors_opts = "" @@ -120,11 +117,7 @@ params { min_overlap = 20 max_mismatch = 0 trim_overhang = false - - // TODO: test using false instead of string just_concatenate = false - // For rescuing unmerged reads ones that don't overlap; this should - // be off unless really needed. rescue_unmerged = false // Pre-chimera sequence tables. This pulls in one or more sequence tables @@ -139,13 +132,11 @@ params { skip_chimera_detection = false removeBimeraDenovo_options = "" - // NYI: Optional clustering - // VSEARCH clustering to %identity + generation of new seqtab - + // General ASV filtering min_asv_len = 0 // Only run if set > 1 max_asv_len = 0 // Only run if set > 1 - // ASV filtering: + // Search-based filtering // This is still alpha!!! search_filter = "none" // currently only "mmseqs" search_filter_dryrun = true @@ -154,12 +145,10 @@ params { mmseqs_fasta = "" // FASTA sequences to format mmseqs_database = "" // path to database with prefix name infernal_model = "" // NYI - tax_filter = false tax_filter_rank = "Phylum" // other options to be added when needed - // Taxonomic assignment // TODO: set flag to skip these explicitly // skip_taxonomic_assignment = false @@ -177,7 +166,7 @@ params { aligner = 'DECIPHER' // default // infernalCM = false - // Phylogenetic analysis + // Phylogenetic analysis, requires MSA above skip_tree = false phylo_tool = 'fasttree' // default, current alternative is 'phangorn' @@ -191,8 +180,6 @@ params { id_type = "md5" // simple, md5; others may be added // other parameters - // NYI, intended to be a consistent random seed used - // across the workflow when needed random_seed = 100 // Boilerplate options @@ -356,12 +343,6 @@ plugins { id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } -// Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] -} // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. diff --git a/nextflow_schema.json b/nextflow_schema.json index 4949fd8..6aa3df1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -43,38 +43,6 @@ } } }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", - "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", - "hidden": true - }, - "fasta": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.", - "hidden": true - } - } - }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -274,9 +242,6 @@ { "$ref": "#/definitions/input_output_options" }, - { - "$ref": "#/definitions/reference_genome_options" - }, { "$ref": "#/definitions/institutional_config_options" }, @@ -288,11 +253,6 @@ } ], "properties": { - "pacbio_strict_match": { - "type": "boolean", - "default": true, - "description": "PacBio `cutadapt`. Whether both primers need to be required ('True', default) or only one primer ('False')" - }, "trim_for": { "type": "integer", "default": 0, @@ -607,6 +567,13 @@ }, "learnerrors_quality_bins": { "type": "string" + }, + "cutadapt_strict_match": { + "type": "boolean", + "default": true + }, + "cutadapt_dovetail": { + "type": "boolean" } } } diff --git a/subworkflows/local/filter_and_trim.nf b/subworkflows/local/filter_and_trim.nf index d29a812..cac591b 100644 --- a/subworkflows/local/filter_and_trim.nf +++ b/subworkflows/local/filter_and_trim.nf @@ -1,7 +1,7 @@ include { ILLUMINA_DADA2_FILTER_AND_TRIM } from '../../modules/local/illumina_filterandtrim' include { PACBIO_DADA2_FILTER_AND_TRIM } from '../../modules/local/pacbio_filterandtrim' -include { ILLUMINA_CUTADAPT } from '../../modules/local/illumina_cutadapt' -include { PACBIO_CUTADAPT } from '../../modules/local/pacbio_cutadapt' +include { CUTADAPT as SHORT_READ_CUTADAPT } from '../../modules/nf-core/cutadapt' +include { CUTADAPT as LONG_READ_CUTADAPT } from '../../modules/nf-core/cutadapt' include { MERGE_TRIM_TABLES } from '../../modules/local/mergetrimtables' workflow FILTER_AND_TRIM { @@ -10,6 +10,7 @@ workflow FILTER_AND_TRIM { ch_input main: + ch_versions = Channel.empty() ch_reports = Channel.empty() ch_trimmed = Channel.empty() ch_trimmed_R1 = Channel.empty() @@ -31,19 +32,30 @@ workflow FILTER_AND_TRIM { def trimmer = params.platform == "pacbio" ? "cutadapt" : params.trimmer if (params.platform == "pacbio") { - PACBIO_CUTADAPT( - ch_input, - params.for_primer, - rev_primer_rc + LONG_READ_CUTADAPT( + ch_input + .map { meta, reads -> + [ + [ id: meta.id, + single_end: meta.single_end, + for: params.for_primer, + rev: params.rev_primer, + for_rc: for_primer_rc, + rev_rc: rev_primer_rc], + reads + ] + } ) // TODO: this could be modified/split into a `DADA2`-only step // PACBIO_DADA2_FILTER_AND_TRIM( // PACBIO_CUTADAPT.out.cutadapt_trimmed // ) - ch_trimmed = PACBIO_CUTADAPT.out.trimmed - ch_reports = PACBIO_CUTADAPT.out.trimmed_report.collect() - ch_multiqc_files = ch_multiqc_files.mix(PACBIO_CUTADAPT.out.cutadapt_json) + ch_trimmed = LONG_READ_CUTADAPT.out.reads + ch_reports = LONG_READ_CUTADAPT.out.log.collect{it[1]} + // ch_multiqc_files = ch_multiqc_files.mix(LONG_READ_CUTADAPT.out.cutadapt_json) + ch_versions = ch_versions.mix(LONG_READ_CUTADAPT.out.versions) + } else { // this handles both paired and single-end data if (trimmer == "dada2") { @@ -53,16 +65,27 @@ workflow FILTER_AND_TRIM { ch_trimmed = ILLUMINA_DADA2_FILTER_AND_TRIM.out.trimmed ch_reports = ILLUMINA_DADA2_FILTER_AND_TRIM.out.trimmed_report.collect() } else if (trimmer == "cutadapt") { - ILLUMINA_CUTADAPT( - ch_input, - params.for_primer, - params.rev_primer, - for_primer_rc, - rev_primer_rc + + // this currently requires passing in the primers via meta + // for each sample + SHORT_READ_CUTADAPT( + ch_input + .map { meta, reads -> + [ + [ id: meta.id, + single_end: meta.single_end, + for: params.for_primer, + rev: params.rev_primer, + for_rc: for_primer_rc, + rev_rc: rev_primer_rc], + reads + ] + } ) - ch_trimmed = ILLUMINA_CUTADAPT.out.trimmed - ch_reports = ILLUMINA_CUTADAPT.out.trimmed_report.collect() - ch_multiqc_files = ch_multiqc_files.mix(ILLUMINA_CUTADAPT.out.cutadapt_json) + ch_trimmed = SHORT_READ_CUTADAPT.out.reads + ch_reports = SHORT_READ_CUTADAPT.out.log.collect{it[1]} + // ch_multiqc_files = ch_multiqc_files.mix(SHORT_READ_CUTADAPT.out.log.collect{it[1]}) + ch_versions = ch_versions.mix(SHORT_READ_CUTADAPT.out.versions) } } @@ -74,6 +97,7 @@ workflow FILTER_AND_TRIM { emit: trimmed_report = MERGE_TRIM_TABLES.out.trimmed_report // channel: [ RDS ] trimmed = ch_trimmed + versions = ch_versions ch_multiqc_files } diff --git a/subworkflows/local/qualitycontrol.nf b/subworkflows/local/qualitycontrol.nf index 7dc4ae0..db2cabd 100644 --- a/subworkflows/local/qualitycontrol.nf +++ b/subworkflows/local/qualitycontrol.nf @@ -12,8 +12,6 @@ workflow QUALITY_CONTROL { main: ch_versions = Channel.empty() - ch_readtracking.dump() - READ_TRACKING( ch_readtracking.collect() ) diff --git a/subworkflows/local/utils_nfcore_tada_pipeline/main.nf b/subworkflows/local/utils_nfcore_tada_pipeline/main.nf index 0c4d1c9..4503554 100644 --- a/subworkflows/local/utils_nfcore_tada_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_tada_pipeline/main.nf @@ -151,7 +151,8 @@ workflow PIPELINE_COMPLETION { // Check and validate pipeline parameters // def validateInputParameters() { - genomeExistsError() + // TODO: move input checking here + // genomeExistsError() } // // Validate channels from input samplesheet @@ -190,7 +191,8 @@ def genomeExistsError() { "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" error(error_string) } -}// +} +// // Generate methods description for MultiQC // def toolCitationText() {