diff --git a/main.nf b/main.nf index 79069ce..0dea5f4 100644 --- a/main.nf +++ b/main.nf @@ -74,8 +74,8 @@ workflow { .flatMap { library, fq_files -> def fq_list = fq_files instanceof List ? fq_files : [fq_files] - fq_list.findAll { it.baseName.contains('.1.trimmed') }.collect { fq1 -> - def chunk_name = fq1.baseName.split(".1.trimmed")[0] + fq_list.findAll { it.baseName.contains('.1.trimmed.fastq') }.collect { fq1 -> + def chunk_name = fq1.baseName.split(".1.trimmed.fastq")[0] [library, chunk_name, fq1] } } @@ -85,18 +85,17 @@ workflow { .flatMap { library, fq_files -> def fq_list = fq_files instanceof List ? fq_files : [fq_files] def chunk_groups = fq_list.groupBy { - it.baseName.replaceAll(/\.[12]\.trimmed$/, '') + it.baseName.replaceAll(/\.[12]\.trimmed\.fastq$/, '') } chunk_groups.collect { chunk_prefix, files -> - def r1 = files.find { it.baseName.contains('.1.trimmed') } - def r2 = files.find { it.baseName.contains('.2.trimmed') } + def r1 = files.find { it.baseName.contains('.1.trimmed.fastq') } + def r2 = files.find { it.baseName.contains('.2.trimmed.fastq') } [library, chunk_prefix, [r1, r2]] } } } - alignReads( passed_bams.combine(fastq_chunks, by:0), params.reference_list.bwa_index ) mergeAndMarkDuplicates( alignReads.out.bam_files.groupTuple() ) md_bams = mergeAndMarkDuplicates.out.md_bams diff --git a/modules/fastp.nf b/modules/fastp.nf index 95b68af..e401773 100644 --- a/modules/fastp.nf +++ b/modules/fastp.nf @@ -8,13 +8,13 @@ process fastp { tuple val(library), path(bam) output: - tuple val(library), path("*.trimmed.fastq"), emit: trimmed_fastq + tuple val(library), path("*.trimmed.fastq.gz"), emit: trimmed_fastq tuple val(library), path("${library}.fastp.json"), emit: fastp_json tuple val("${task.process}"), val('samtools'), eval('samtools --version | head -n 1 | sed \'s/^samtools //\''), topic: versions tuple val("${task.process}"), val('fastp'), eval('fastp --version 2>&1 | cut -f 2 -d " "'), topic: versions script: - def fastp_args = params.single_end ? "--out1 ${library}.1.trimmed.fastq" : "--interleaved_in --out1 ${library}.1.trimmed.fastq --out2 ${library}.2.trimmed.fastq" + def fastp_args = params.single_end ? "--out1 ${library}.1.trimmed.fastq.gz" : "--interleaved_in --out1 ${library}.1.trimmed.fastq.gz --out2 ${library}.2.trimmed.fastq.gz" """ set +o pipefail inst_name=\$(samtools view ${bam} | head -n 1 | cut -d ":" -f 1) diff --git a/modules/multiqc.nf b/modules/multiqc.nf index 5788409..18cb78a 100644 --- a/modules/multiqc.nf +++ b/modules/multiqc.nf @@ -16,6 +16,8 @@ process multiqc { extra_fn_clean_exts: - '.md' - '_combined_fastp' + use_filename_as_sample_name: + - picard/gcbias custom_plot_config: picard_insert_size: xmax: 1000 diff --git a/nextflow.config b/nextflow.config index e28e777..91db943 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,7 +11,6 @@ params { email = 'undefined' flowcell = 'undefined' ubam_dir = './' - project = 'project_undefined' workflow = 'EM-seq' outputDir = "em-seq_output" tmp_dir = '/tmp' @@ -23,7 +22,7 @@ params { single_end = false // NEB only - path_to_ngs_agg = "/mnt/bioinfo/prg/ngs-aggregate_results/current" + path_to_ngs_agg = null workflow_name_modifier = null } @@ -47,7 +46,6 @@ profiles { apptainer.enabled = false } mamba { - conda.cacheDir = '/data/seq-shepherd/.conda/envs' conda.enabled = true conda.useMamba = true docker.enabled = false @@ -58,7 +56,6 @@ profiles { apptainer.enabled = false } micromamba { - conda.cacheDir = '/data/seq-shepherd/.conda/envs' conda.enabled = true conda.useMamba = false conda.useMicromamba = true @@ -80,9 +77,6 @@ profiles { env { PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" } executor {