From 56b776b02958175a2553243b57025f4625c81adb Mon Sep 17 00:00:00 2001 From: "William Lautert Dutra, MSc" <79327373+WilliamLautertD@users.noreply.github.com> Date: Mon, 27 Apr 2026 12:07:11 -0400 Subject: [PATCH] Fix BAM_COVERAGE blacklist variable initialization --- README.md | 9 ++- chipseq_main.nf | 111 ++++++++++++++++++++++++++++++++++++- chipseq_nextflow.config | 8 +++ config/chipseq_samples.tsv | 6 +- 4 files changed, 125 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0b6fc1a..29b88c8 100644 --- a/README.md +++ b/README.md @@ -58,13 +58,16 @@ For ChIP-seq, a separate generic Nextflow workflow is available in `chipseq_main - Input FASTQ files are fully controlled by `config/chipseq_samples.tsv` (no fixed filename pattern is required). - Sample names can be any value in the `sample` column. -- Paths and run parameters are set in `chipseq_nextflow.config`, including reference genome, mapping filters, and bigWig options. +- The `control_sample` column pairs each ChIP sample with the exact Input sample ID for downstream `bamCompare` output. +- Paths and run parameters are set in `chipseq_nextflow.config`, including reference genome, mapping filters, bigWig options, and `bamCompare` settings (ratio mode with duplicate ignoring, no scale-factor normalization). +- `params.bwa_index_prefix` (or `params.reference_fasta`) must point to an existing BWA index basename with `.amb/.ann/.bwt/.pac/.sa` files; `.fa` basename variants are auto-detected (e.g. `hg19` -> `hg19.fa`). +- If `bwa_index_prefix` is accidentally provided without a leading `/`, the workflow will try the absolute form and warn if it auto-corrects. ### Files -- `chipseq_main.nf` - ChIP-seq QC, trimming, mapping/filtering, optional bigWig generation, and MultiQC. +- `chipseq_main.nf` - ChIP-seq QC, trimming, mapping/filtering, optional bigWig generation, optional ChIP/Input `bamCompare`, and MultiQC. - `chipseq_nextflow.config` - ChIP-seq pipeline parameters and runtime profiles. -- `config/chipseq_samples.tsv` - sample sheet template; edit file paths and sample IDs freely. +- `config/chipseq_samples.tsv` - sample sheet template; edit file paths, sample IDs, and `control_sample` pairing. ### Quick start diff --git a/chipseq_main.nf b/chipseq_main.nf index e5c1321..8406976 100644 --- a/chipseq_main.nf +++ b/chipseq_main.nf @@ -99,7 +99,7 @@ process BWA_MEM_FILTER { tuple val(meta), path("${meta.id}.filtered.bam"), path("${meta.id}.filtered.bam.bai"), path("${meta.id}.flagstat.tsv"), emit: bam script: - def ref = params.bwa_index_prefix ?: params.reference_fasta + def ref = params.bwa_resolved_reference ?: resolveBwaReference() """ bwa mem -t ${task.cpus} ${ref} ${r1} ${r2} \ | samtools collate -@ ${task.cpus} -O -u - \ @@ -151,6 +151,36 @@ process BAM_COVERAGE { """ } + +process BAM_COMPARE { + tag "${meta.id}_vs_${controlId}" + publishDir "${params.outdir}/coverage/bamcompare", mode: 'copy' + + input: + tuple val(controlId), val(meta), path(chipBam), path(chipBai), path(inputBam), path(inputBai) + + output: + tuple val(meta), path("${meta.id}.vs.${controlId}.ratio.bw"), emit: bw + + script: + """ + bamCompare \ + -b1 ${chipBam} \ + -b2 ${inputBam} \ + -o ${meta.id}.vs.${controlId}.ratio.bw \ + --ignoreDuplicates \ + --numberOfProcessors ${task.cpus} \ + --operation ratio \ + --smoothLength ${params.bamcompare_smooth_length} \ + --binSize ${params.bamcompare_binsize} + """ + + stub: + """ + touch ${meta.id}.vs.${controlId}.ratio.bw + """ +} + process MULTIQC { publishDir "${params.outdir}/qc", mode: 'copy' @@ -171,7 +201,56 @@ process MULTIQC { """ } + +def resolveBwaReference() { + def rawRef = (params.bwa_index_prefix ?: params.reference_fasta)?.toString()?.trim() + if (!rawRef) { + error "Set either params.bwa_index_prefix or params.reference_fasta in chipseq_nextflow.config" + } + + def expectedExts = ['.amb', '.ann', '.bwt', '.pac', '.sa'] + def candidates = [] + + def addCandidate = { String c -> + if (c && !candidates.contains(c)) { + candidates << c + } + } + + addCandidate(rawRef) + if (!rawRef.startsWith('/')) { + addCandidate("/${rawRef}") + } + + if (!rawRef.endsWith('.fa') && !rawRef.endsWith('.fasta')) { + addCandidate("${rawRef}.fa") + if (!rawRef.startsWith('/')) { + addCandidate("/${rawRef}.fa") + } + } + + for (candidate in candidates) { + def allExist = expectedExts.every { ext -> file("${candidate}${ext}").exists() } + if (allExist) { + return candidate + } + } + + def missing = expectedExts.findAll { ext -> !file("${candidates[0]}${ext}").exists() } + error "BWA index files are missing for '${rawRef}'. Tried candidates: ${candidates.join(', ')}: ${missing.join(', ')}. Set params.bwa_index_prefix to the index basename (e.g. /path/to/hg19) or params.reference_fasta to a FASTA with generated BWA index files." +} + +def validateBwaReference() { + def resolvedRef = resolveBwaReference() + params.bwa_resolved_reference = resolvedRef + if (resolvedRef != (params.bwa_index_prefix ?: params.reference_fasta)?.toString()?.trim()) { + log.warn "Resolved BWA reference path '${params.bwa_index_prefix ?: params.reference_fasta}' to '${resolvedRef}'. Prefer using the absolute path in config." + } +} + workflow { + validateBwaReference() + samples_ch = Channel .fromPath(params.samples) .splitCsv(header: true, sep: '\t') @@ -184,12 +263,21 @@ workflow { error "Sample ${sampleId} is missing fastq_r1 or fastq_r2" } + def condition = row.condition ?: 'unspecified' + def controlSample = row.control_sample?.trim() + if (condition.toString().equalsIgnoreCase('Input')) { + controlSample = '' + } else if (!controlSample) { + error "Sample ${sampleId} is missing control_sample. Provide the Input sample ID to pair for bamCompare." + } + def meta = [ id: sampleId, - condition: row.condition ?: 'unspecified', + condition: condition, replicate: row.replicate ?: '1', library: row.read_group_library ?: 'lib1', - unit: row.read_group_unit ?: 'unit1' + unit: row.read_group_unit ?: 'unit1', + control_sample: controlSample ] tuple(meta, file(row.fastq_r1), file(row.fastq_r2)) } @@ -205,6 +293,23 @@ workflow { BAM_COVERAGE(BWA_MEM_FILTER.out.bam) } + if (params.make_bamcompare) { + input_bams_ch = BWA_MEM_FILTER.out.bam + .filter { meta, bam, bai, flagstat -> meta.condition.toString().equalsIgnoreCase('Input') } + .map { meta, bam, bai, flagstat -> tuple(meta.id, bam, bai) } + + chip_bams_ch = BWA_MEM_FILTER.out.bam + .filter { meta, bam, bai, flagstat -> !meta.condition.toString().equalsIgnoreCase('Input') } + .map { meta, bam, bai, flagstat -> tuple(meta.control_sample, meta, bam, bai) } + + chip_input_pairs_ch = chip_bams_ch.join(input_bams_ch, by: 0) + .map { controlId, meta, chipBam, chipBai, inputBam, inputBai -> + tuple(controlId, meta, chipBam, chipBai, inputBam, inputBai) + } + + BAM_COMPARE(chip_input_pairs_ch) + } + multiqc_inputs = FASTQC_RAW.out.html .mix(FASTQC_RAW.out.zip) .mix(FASTQC_TRIMMED.out.html) diff --git a/chipseq_nextflow.config b/chipseq_nextflow.config index 0e51aaf..2adb6fd 100644 --- a/chipseq_nextflow.config +++ b/chipseq_nextflow.config @@ -5,17 +5,22 @@ params { outdir = 'results/chipseq_pipeline' reference_fasta = '/path/to/reference/genome.fa' + // Set to BWA index basename when available, e.g. '/path/to/hg19' (expects .amb/.ann/.bwt/.pac/.sa) bwa_index_prefix = '' min_mapq = 30 exclude_flags = 3332 make_bigwig = true + make_bamcompare = true effective_genome_size = 2913022398 bigwig_binsize = 25 bigwig_normalization = 'CPM' blacklist_bed = '' + bamcompare_binsize = 200 + bamcompare_smooth_length = 600 + fastp_extra = '' } @@ -38,6 +43,9 @@ process { withName: BAM_COVERAGE { cpus = 8 } + withName: BAM_COMPARE { + cpus = 8 + } } profiles { diff --git a/config/chipseq_samples.tsv b/config/chipseq_samples.tsv index e7789fb..e6e99f6 100644 --- a/config/chipseq_samples.tsv +++ b/config/chipseq_samples.tsv @@ -1,3 +1,3 @@ -sample fastq_r1 fastq_r2 condition replicate read_group_library read_group_unit -RPE_H3K27ac_rep1 /path/to/any_filename_R1.fastq.gz /path/to/any_filename_R2.fastq.gz H3K27ac 1 libA unitA -RPE_Input_rep1 /path/to/input_R1_001.fastq.gz /path/to/input_R2_001.fastq.gz Input 1 libA unitA +sample fastq_r1 fastq_r2 condition replicate read_group_library read_group_unit control_sample +RPE_H3K27ac_rep1 /path/to/any_filename_R1.fastq.gz /path/to/any_filename_R2.fastq.gz H3K27ac 1 libA unitA RPE_Input_rep1 +RPE_Input_rep1 /path/to/input_R1_001.fastq.gz /path/to/input_R2_001.fastq.gz Input 1 libA unitA