diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2c402453..c0e05d22 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -174,6 +174,7 @@ jobs:
- test_peak_callers_gopeaks
- test_peak_callers_epic2
- test_peak_callers_preset_extended
+ - test_peak_qc_multi_caller
- test_consensus_peaks_group
- test_consensus_peaks_all
- test_consensus_peaks_invalid
diff --git a/assets/multiqc/frip_score_header.txt b/assets/multiqc/frip_score_header.txt
index a0cab909..ca7f9aae 100644
--- a/assets/multiqc/frip_score_header.txt
+++ b/assets/multiqc/frip_score_header.txt
@@ -2,9 +2,9 @@
#parent_id: 'peak_qc'
#parent_name: 'Peak QC'
#parent_description: 'This section contains peak-based QC reports'
-#section_name: 'Sample FRiP score'
+#section_name: 'Sample FRiP score (per caller)'
#description: "is generated by calculating the fraction of all mapped fragments that fall
-# into the peak regions called by either MACS2 or SEACR.
+# into the peak regions called by each caller (e.g. MACS2, SEACR, GoPeaks).
# See FRiP score."
#plot_type: 'bargraph'
#anchor: 'primary_fripscore'
diff --git a/assets/multiqc/peak_counts_header.txt b/assets/multiqc/peak_counts_header.txt
index c6145c8a..69532d05 100644
--- a/assets/multiqc/peak_counts_header.txt
+++ b/assets/multiqc/peak_counts_header.txt
@@ -2,8 +2,8 @@
#parent_id: 'peak_qc'
#parent_name: 'Peak QC'
#parent_description: 'This section contains peak-based QC reports'
-#section_name: 'Sample Peak Count'
-#description: 'Calculated from the total number of peaks called by MACS2 or SEACR'
+#section_name: 'Sample Peak Count (per caller)'
+#description: 'Calculated from the total number of peaks called by each peak caller'
#plot_type: 'bargraph'
#anchor: 'primary_peakcounts'
#pconfig:
diff --git a/assets/multiqc/peak_reprod_header.txt b/assets/multiqc/peak_reprod_header.txt
index daccd629..54fad200 100644
--- a/assets/multiqc/peak_reprod_header.txt
+++ b/assets/multiqc/peak_reprod_header.txt
@@ -2,8 +2,8 @@
#parent_id: 'peak_qc'
#parent_name: 'Peak QC'
#parent_description: 'This section contains peak-based QC reports'
-#section_name: 'Sample Peak reproducibility %'
-#description: 'Calculated from the total number of overlapping peaks within group replicate sets'
+#section_name: 'Sample Peak reproducibility % (per caller)'
+#description: 'Calculated from the total number of overlapping peaks within group replicate sets for each caller'
#plot_type: 'bargraph'
#anchor: 'primary_peakrepro'
#pconfig:
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 7c4f2ab8..2dba5f25 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -41,6 +41,11 @@ def parse_args(args=None):
"USE_CONTROL",
help="Boolean for whether or not the user has specified the pipeline must normalise against a control",
)
+ parser.add_argument(
+ "--allow-cross-condition-controls",
+ action="store_true",
+ help="Allow targets to use controls from other conditions when no exact condition match exists",
+ )
return parser.parse_args(args)
@@ -63,7 +68,7 @@ def print_error(error, context="Line", context_str=""):
sys.exit(1)
-def check_samplesheet(file_in, file_out, use_control):
+def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_controls=False):
"""
This function checks that the samplesheet follows the following structure:
@@ -83,6 +88,15 @@ def check_samplesheet(file_in, file_out, use_control):
control_names_list = []
sample_run_dict = {}
control_condition_map = {}
+ missing_control_errors = []
+ missing_control_warnings = []
+ cross_condition_warnings = []
+ legacy_na_warnings = []
+ warning_lines = []
+
+ def record_warning(message):
+ print(message)
+ warning_lines.append(message)
with open(file_in, "r") as fin:
## Check header
@@ -239,6 +253,13 @@ def check_samplesheet(file_in, file_out, use_control):
## Check control group exists
for ctrl in control_names_list:
if ctrl != "" and ctrl not in sample_names_list:
+ if allow_cross_condition_controls:
+ record_warning(
+ "WARNING: Control entry '{}' does not match any group entry; proceeding because "
+ "--allow_cross_condition_controls was set. This may indicate a typo. "
+ "Control-required callers (epic2/span) will be skipped; SEACR/MACS2 will run without control.".format(ctrl)
+ )
+ continue
print_error(
"Each control entry must match at least one group entry! Unmatched control entry: {}.".format(ctrl)
)
@@ -269,7 +290,7 @@ def check_samplesheet(file_in, file_out, use_control):
)
if use_control == "false" and control_present:
- print(
+ record_warning(
"WARNING: Parameter --use_control was set to false, but an control group was found in " + str(file_in) + "."
)
@@ -281,21 +302,114 @@ def check_samplesheet(file_in, file_out, use_control):
if info[-1] == "1":
control_condition_map.setdefault(info[0], set()).add(info[1])
- # Warn on condition mismatch between targets and controls
- for sample_key, reps in sample_run_dict.items():
- for replicate, infos in reps.items():
- for info in infos:
- if info[-1] == "0" and info[3] != "":
- ctrl_conditions = control_condition_map.get(info[3], set())
- if ctrl_conditions and info[1] not in ctrl_conditions and "NA" not in ctrl_conditions:
+ # Validate condition-specific controls for targets
+ if has_condition:
+ for sample_key, reps in sample_run_dict.items():
+ for replicate, infos in reps.items():
+ for info in infos:
+ if info[-1] == "0" and info[3] != "":
+ ctrl_conditions = control_condition_map.get(info[3], set())
sample_id = "{}_{}_rep{}".format(info[0], info[1], info[2])
- print(
- "WARNING: No control found for target condition; will fall back to other conditions if available.\n"
- "Target sample_id: {} | control_group: {} | expected condition: {}".format(
- sample_id, info[3], info[1]
+ if not ctrl_conditions:
+ entry = {
+ "sample_id": sample_id,
+ "group": info[0],
+ "condition": info[1],
+ "replicate": info[2],
+ "control_group": info[3],
+ "control_conditions": "NONE",
+ }
+ if allow_cross_condition_controls:
+ missing_control_warnings.append(entry)
+ continue
+ missing_control_errors.append(entry)
+ continue
+ if info[1] in ctrl_conditions:
+ continue
+ if "NA" in ctrl_conditions:
+ legacy_na_warnings.append(
+ {
+ "sample_id": sample_id,
+ "group": info[0],
+ "condition": info[1],
+ "replicate": info[2],
+ "control_group": info[3],
+ "control_conditions": ",".join(sorted(ctrl_conditions)),
+ }
+ )
+ continue
+ if allow_cross_condition_controls:
+ cross_condition_warnings.append(
+ {
+ "sample_id": sample_id,
+ "group": info[0],
+ "condition": info[1],
+ "replicate": info[2],
+ "control_group": info[3],
+ "control_conditions": ",".join(sorted(ctrl_conditions)),
+ }
)
+ continue
+ missing_control_errors.append(
+ {
+ "sample_id": sample_id,
+ "group": info[0],
+ "condition": info[1],
+ "replicate": info[2],
+ "control_group": info[3],
+ "control_conditions": ",".join(sorted(ctrl_conditions)),
+ }
)
+ if missing_control_errors:
+ print("ERROR: Missing condition-matched controls for target samples (fail-fast).")
+ for entry in missing_control_errors:
+ print(
+ " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | "
+ "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry)
+ )
+ print(
+ "Remediation: add control rows for the missing condition(s), or set controls to legacy NA "
+ "consistently. If you intentionally want cross-condition controls, rerun with "
+ "--allow_cross_condition_controls."
+ )
+ sys.exit(1)
+
+ if missing_control_warnings:
+ record_warning(
+ "WARNING: No control rows exist for one or more control groups; proceeding because "
+ "--allow_cross_condition_controls was set. Control-required callers (epic2/span) will be skipped "
+ "for these samples; SEACR/MACS2 will run without control."
+ )
+ for entry in missing_control_warnings:
+ record_warning(
+ " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | "
+ "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry)
+ )
+
+ if legacy_na_warnings:
+ record_warning("WARNING: Control rows with condition=NA used for targets with explicit conditions (legacy mode).")
+ for entry in legacy_na_warnings:
+ record_warning(
+ " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | "
+ "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry)
+ )
+
+ if cross_condition_warnings:
+ record_warning(
+ "WARNING: No exact condition-matched controls found; proceeding with cross-condition controls "
+ "because --allow_cross_condition_controls was set."
+ )
+ for entry in cross_condition_warnings:
+ record_warning(
+ " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | "
+ "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry)
+ )
+
+ with open("samplesheet.warnings.txt", "w") as warn:
+ if warning_lines:
+ warn.write("\n".join(warning_lines) + "\n")
+
## Write validated samplesheet with appropriate columns
if len(sample_run_dict) > 0:
out_dir = os.path.dirname(file_out)
@@ -326,14 +440,19 @@ def check_samplesheet(file_in, file_out, use_control):
# print_error("Control group must match within technical replicates", tech_rep[2])
## Write to file
- for idx, sample_info in enumerate(sample_run_dict[sample_key][replicate]):
- sample_id = "{}_{}_rep{}_T{}".format(sample_info[0], sample_info[1], replicate, idx + 1)
+ for sample_info in sample_run_dict[sample_key][replicate]:
+ sample_id = "{}_{}_rep{}".format(sample_info[0], sample_info[1], replicate)
fout.write(",".join([sample_id] + sample_info[:7] + [sample_info[-1]]) + "\n")
def main(args=None):
args = parse_args(args)
- check_samplesheet(args.FILE_IN, args.FILE_OUT, args.USE_CONTROL)
+ check_samplesheet(
+ args.FILE_IN,
+ args.FILE_OUT,
+ args.USE_CONTROL,
+ allow_cross_condition_controls=args.allow_cross_condition_controls,
+ )
if __name__ == "__main__":
diff --git a/conf/flowswitch.config b/conf/flowswitch.config
index a79202c8..8b0df7d8 100644
--- a/conf/flowswitch.config
+++ b/conf/flowswitch.config
@@ -53,7 +53,7 @@ if (params.peakcaller) {
params.callers = caller_preset_map.standard
}
-if(params.consensus_peak_mode == 'all') { params.run_consensus_all = true }
+if(params.consensus_peak_mode == 'all' || params.consensus_grouping == 'all') { params.run_consensus_all = true }
if(params.remove_linear_duplicates) { params.run_remove_linear_dups = true }
if(params.skip_removeduplicates || !params.run_mark_dups) { params.run_remove_dups = false }
diff --git a/conf/modules.config b/conf/modules.config
index 676eb44a..43693c7b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -617,7 +617,7 @@ if(params.run_peak_calling && (params.normalisation_mode != "Spikein" && params.
"--normalizeUsing ${params.normalisation_mode}",
params.extend_fragments ? '--extendReads' : '',
].join(' ').trim()
- ext.prefix = { "${meta.id}.bedgraph" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}.bedgraph" }
publishDir = [
enabled: false
]
@@ -637,8 +637,18 @@ if(params.run_peak_calling) {
]
}
+ withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:NORMALISATION_SCOPE_REFERENCE_REPORT' {
+ publishDir = [
+ path: { "${params.outdir}/03_peak_calling/00_normalisation_factors" },
+ mode: "${params.publish_dir_mode}",
+ pattern: "normalisation_scope_reference.tsv",
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ enabled: true
+ ]
+ }
+
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_SORT' {
- ext.prefix = { "${meta.id}.sorted" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}.sorted" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/01_bam_to_bedgraph" },
mode: "${params.publish_dir_mode}",
@@ -648,7 +658,7 @@ if(params.run_peak_calling) {
}
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDCLIP' {
- ext.prefix = { "${meta.id}.clipped" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}.clipped" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/02_clip_bed" },
mode: "${params.publish_dir_mode}",
@@ -658,6 +668,7 @@ if(params.run_peak_calling) {
}
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDGRAPHTOBIGWIG' {
+ ext.prefix = { "${meta.sample_id ?: meta.id}" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/03_bed_to_bigwig" },
mode: "${params.publish_dir_mode}",
@@ -672,7 +683,7 @@ if(params.run_peak_calling && 'seacr' in params.callers) {
process {
withName: '.*:CUTANDRUN:.*SEACR_.*' {
ext.args = "${params.seacr_norm} ${params.seacr_stringent}"
- ext.prefix = { "${meta.id}_${meta.caller}.seacr.peaks" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}.seacr.peaks" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/04_called_peaks/seacr" },
mode: "${params.publish_dir_mode}",
@@ -691,7 +702,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) {
params.macs2_pvalue ? "-p ${params.macs2_pvalue}" : "-q ${params.macs2_qvalue}",
params.macs2_narrow_peak ? '' : "--broad --broad-cutoff ${params.macs2_broad_cutoff}"
].join(' ').trim()
- ext.prefix = { "${meta.id}_${meta.caller}" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" },
mode: "${params.publish_dir_mode}",
@@ -702,7 +713,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) {
withName: '.*:CUTANDRUN:.*MACS2_CALLPEAK_NARROW' {
ext.args = "--nomodel --shift -75 --extsize 150 --keep-dup all -q 0.05 --format BAMPE"
- ext.prefix = { "${meta.id}_${meta.caller}" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" },
mode: "${params.publish_dir_mode}",
@@ -713,7 +724,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) {
withName: '.*:CUTANDRUN:.*MACS2_CALLPEAK_BROAD' {
ext.args = "--nomodel --shift -75 --extsize 150 --keep-dup all -q 0.05 --format BAMPE --broad --broad-cutoff 0.1"
- ext.prefix = { "${meta.id}_${meta.caller}" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" },
mode: "${params.publish_dir_mode}",
@@ -727,7 +738,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) {
if(params.run_peak_calling && params.callers.any { it.startsWith('gopeaks') }) {
process {
withName: '.*:CUTANDRUN:.*GOPEAKS_CALLPEAK.*' {
- ext.prefix = { "${meta.id}_${meta.caller}" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" },
mode: "${params.publish_dir_mode}",
@@ -741,7 +752,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('gopeaks') }) {
if(params.run_peak_calling && params.callers.any { it.startsWith('epic2') }) {
process {
withName: '.*:CUTANDRUN:.*EPIC2_CALLPEAK.*' {
- ext.prefix = { "${meta.id}_${meta.caller}" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" },
mode: "${params.publish_dir_mode}",
@@ -755,7 +766,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('epic2') }) {
if(params.run_peak_calling && params.callers.any { it.startsWith('span') }) {
process {
withName: '.*:CUTANDRUN:.*SPAN_OMNIPEAKS.*' {
- ext.prefix = { "${meta.id}_${meta.caller}" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" },
mode: "${params.publish_dir_mode}",
@@ -800,6 +811,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) {
ext.args = "-f 1-6"
ext.ext = "bed"
ext.suffix = { ".${meta.caller}.peaks" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}" }
publishDir = [
path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" },
mode: "${params.publish_dir_mode}",
@@ -810,7 +822,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) {
}
}
-if(params.run_peak_calling && params.callers[0] == 'seacr') {
+if(params.run_peak_calling && params.callers && params.callers.contains('seacr')) {
process {
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:AWK_EXTRACT_SUMMITS' {
ext.command = "'{split(\$6, summit, \":\"); split(summit[2], region, \"-\"); print summit[1]\"\\t\"region[1]\"\\t\"region[2]}'"
@@ -831,7 +843,7 @@ if(params.run_peak_calling && params.callers[0] == 'seacr') {
if(params.run_peak_calling) {
process {
withName: '.*:AWK_NAME_PEAK_BED' {
- ext.command = "'{OFS = \"\\t\"} {print \$0, FILENAME}'"
+ ext.command = "'BEGIN{OFS = \"\\t\"} \$1 !~ /^#/ && \$1 != \"Chromosome\" && \$1 != \"chromosome\" && \$1 != \"track\" && \$1 != \"browser\" {print \$0, FILENAME}'"
ext.ext = "bed"
publishDir = [
enabled: false
@@ -1062,14 +1074,14 @@ if (params.run_reporting && params.run_peak_qc) {
}
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PEAK_FRIP' {
- ext.prefix = { "${meta.id}_${meta.caller}" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
enabled: false
]
}
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PRIMARY_PEAK_COUNTS' {
- ext.prefix = { "${meta.id}_${meta.caller}" }
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
enabled: false
]
@@ -1083,7 +1095,8 @@ if (params.run_reporting && params.run_peak_qc) {
}
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:CUT_CALC_REPROD' {
- ext.args = "-f 1,2,3,6"
+ ext.args = "-f 1-3"
+ ext.command = '| awk -v OFS="\\t" \'{print $1, $2, $3, $1":"$2"-"$3}\''
ext.suffix = ".repro"
ext.ext = "bed"
publishDir = [
@@ -1093,17 +1106,29 @@ if (params.run_reporting && params.run_peak_qc) {
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:BEDTOOLS_INTERSECT' {
ext.args = "-C -sorted -f ${params.min_peak_overlap}"
+ ext.prefix = { "${meta.id}.intersect" }
publishDir = [
enabled: false
]
}
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:CALCULATE_PEAK_REPROD' {
+ ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" }
publishDir = [
enabled: false
]
}
+ withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PEAK_QC_.*_REPORT' {
+ publishDir = [
+ path: { "${params.outdir}/03_peak_calling/07_qc_tables" },
+ mode: "${params.publish_dir_mode}",
+ pattern: "*.tsv",
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ enabled: true
+ ]
+ }
+
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PLOT_CONSENSUS_PEAKS' {
publishDir = [
path: { "${params.outdir}/04_reporting/consensus_upset_plots" },
diff --git a/conf/resources.config b/conf/resources.config
index d6cb5942..d87d6ac2 100644
--- a/conf/resources.config
+++ b/conf/resources.config
@@ -40,7 +40,7 @@ if(params.run_reporting) {
process {
withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FRAG_LEN_HIST' {
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
- memory = { check_max( 32.GB * task.attempt, 'memory' ) }
+ memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
}
diff --git a/docs/output.md b/docs/output.md
index ccf61f74..09176fe6 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -313,7 +313,7 @@ Computes the overall similarity between two or more samples based on read covera
-These tables are generated when `--normalisation_mode Spikein` to record per-sample scale factors. When `--normalisation_scope` is `group` or `group_condition`, a separate TSV is written per scope.
+These tables are generated when `--normalisation_mode Spikein` to record per-sample scale factors. When `--normalisation_scope` is `group` or `group_condition`, a separate TSV is written per scope. If `--dump_scale_factors` is set, an additional debug file `normalisation_scope_reference.tsv` is written with the per-scope reference read counts used to compute scale factors.
### 6.2. Pooled controls
@@ -326,7 +326,10 @@ These tables are generated when `--normalisation_mode Spikein` to record per-sam
-Controls are pooled per control group and condition for callers that require pooled controls (epic2/SPAN). If a matching condition is unavailable, the closest available control is used and recorded in `03_peak_calling/07_qc_tables/control_pooling_fallbacks.tsv`.
+Controls are pooled per control group and condition for callers that require pooled controls (epic2/SPAN). If a matching condition is unavailable, the closest available control is used **only when explicitly allowed**; otherwise the pipeline fails fast during samplesheet validation. The control pooling report is always written to `03_peak_calling/07_qc_tables/control_pooling_fallbacks.tsv` (header + zero or more rows) and records both fallback usage and missing-control skips with status/action fields.
+
+The report columns are:
+`sample_id`, `group`, `condition`, `caller_id`, `control_group`, `selected_control_condition`, `status`, `action`, `reason`, `pooled_control_path`.
### 6.3. Bam to bedgraph
@@ -433,6 +436,9 @@ SPAN/OmniPeaks callers require pooled controls and the `--omnipeaks_jar` paramet
- `03_peak_calling/05_consensus_peaks///`
- `__.consensus.peak_counts.bed`: merged consensus peaks with replicate counts.
- `___consensus.awk.bed`: filtered consensus peaks after applying `--replicate_threshold`.
+- `03_peak_calling/05_consensus_peaks/all/all/` (when `--consensus_grouping all`)
+ - `all_samples__consensus.awk.bed`: filtered consensus peaks across all samples.
+ - `all_all_.consensus.peak_counts.bed`: merged consensus peaks with replicate counts across all samples.
@@ -442,9 +448,20 @@ The merge function from [BEDtools](https://github.com/arq5x/bedtools2) is used t
Once the peak calling process is complete, we run a separate set of reports that analyse the quality of the results at the peak level.
+
+Output files
+
+- `03_peak_calling/07_qc_tables/`
+ - `peak_counts.tsv`: per-sample peak counts (includes `caller_id`).
+ - `peak_frip_scores.tsv`: per-sample FRiP scores (includes `caller_id`).
+ - `peak_reproducibility.tsv`: per-group reproducibility (includes `caller_id`).
+ - `consensus_peak_counts.tsv`: consensus peak counts (includes `caller_id`).
+
+
+
### 7.1. Peak Counts
-For both the sample peaks and the consensus peaks, a simple count is taken. At the sample level, it is important to see consistency between peak counts of biological replicates. It is the first indicator of whether you replicates samples agree with each other after all of the processing has completed. If you use the consensus peaks and use a replicate threshold of more than 1, it is also important to see how many of your peaks across replicates have translated into consensus peaks.
+For both the sample peaks and the consensus peaks, a simple count is taken for each peak caller. At the sample level, it is important to see consistency between peak counts of biological replicates. It is the first indicator of whether you replicates samples agree with each other after all of the processing has completed. If you use the consensus peaks and use a replicate threshold of more than 1, it is also important to see how many of your peaks across replicates have translated into consensus peaks.
In the image below we see comparable peak counts for the H3K27me3 dataset, but a large disparity for the H3K4me3.
@@ -452,7 +469,7 @@ In the image below we see comparable peak counts for the H3K27me3 dataset, but a
### 7.2. Peak Reproducibility
-The peak reproducibility report intersects all samples within a group using `bedtools intersect` with a minimum overlap controlled by `min_peak_overlap`. This report is useful along with the peak count report for estimating how reliable the peaks called are between your biological replicates.
+The peak reproducibility report intersects all samples within a group (per caller) using `bedtools intersect` with a minimum overlap controlled by `min_peak_overlap`. This report is useful along with the peak count report for estimating how reliable the peaks called are between your biological replicates.
For example, in the image below when combined with the peak count information we see that although the H3K27me3 replicates both have similar peak counts, < 30% of the peaks are replicated across the replicate set. For H3K4me3, we see that replicate 1 has a small number of peaks called, but that almost 100% of those peaks are replicated in the second replicate. Replicate 2 has < 20% of its replicates reproduced in replicate 1 but by looking at the peak counts we can see this is due to the low number of peaks called.
@@ -460,7 +477,7 @@ For example, in the image below when combined with the peak count information we
### 7.3. FRiP Score
-Fraction of fragments in peaks (FRiP), defined as the fraction of all mapped paired-end reads extended into fragments that fall into the called peak regions, i.e. usable fragments in significantly enriched peaks divided by all usable fragments. In general, FRiP scores correlate positively with the number of regions. (Landt et al, Genome Research Sept. 2012, 22(9): 1813–1831). A minimum overlap is controlled by `min_frip_overlap`. The FRiP score can be used to assess the overall quality of a sample. Poor samples with a high level of background noise, small numbers of called peaks or other issues will have a large number of fragments falling outside the peaks that were called. Generally FRiP scores > 0.3 are considered to be reasonable with the highest quality data having FRiP scores of > 0.7.
+Fraction of fragments in peaks (FRiP), defined as the fraction of all mapped paired-end reads extended into fragments that fall into the called peak regions, i.e. usable fragments in significantly enriched peaks divided by all usable fragments. In general, FRiP scores correlate positively with the number of regions. (Landt et al, Genome Research Sept. 2012, 22(9): 1813–1831). A minimum overlap is controlled by `min_frip_overlap`. The FRiP score can be used to assess the overall quality of a sample. Poor samples with a high level of background noise, small numbers of called peaks or other issues will have a large number of fragments falling outside the peaks that were called. Generally FRiP scores > 0.3 are considered to be reasonable with the highest quality data having FRiP scores of > 0.7. FRiP is computed per caller when multiple peak callers are enabled.
It is worth noting that the peak caller settings are also crucial to this score, as even the highest quality data will have a low FRiP score if the pipeline is parameterised in a way that calls few peaks, such as setting the peak calling threshold very high.
@@ -474,7 +491,7 @@ CUT&Tag inserts adapters on either side of chromatin particles in the vicinity o
### 8.1. Heatmaps
-Heatmaps for both genomic features and peaks are generated using deepTools. The parameters for the gene heatmap generation including kilobases to map before and after the gene body can be found with the prefix `dt_heatmap_gene_*`. Similarly, the peak-based heatmap parameters can be found using `dt_heatmap_peak_*`.
+Heatmaps for both genomic features and peaks are generated using deepTools. The parameters for the gene heatmap generation including kilobases to map before and after the gene body can be found with the prefix `dt_heatmap_gene_*`. Similarly, the peak-based heatmap parameters can be found using `dt_heatmap_peak_*`. When multiple peak callers are enabled, peak-based heatmaps are generated per caller and filenames include the caller id.
**NB:** These reports are generated outside of MultiQC
diff --git a/docs/test_history/adding_new_peak_callers.md b/docs/test_history/adding_new_peak_callers.md
new file mode 100644
index 00000000..7b784ffa
--- /dev/null
+++ b/docs/test_history/adding_new_peak_callers.md
@@ -0,0 +1,5 @@
+## 2026-01-09 — pytest-suite (Slurm)
+- JobID/run dir: 13170828 — /scratch/users/dhusmann/nextflow-work/runs/pytest-suite/20260109_183946
+- Command: pytest-suite (pytest-workflow) --maxfail=1 --kwdof --symlink --ga --color=yes (via pytest_suite.sh)
+- Result: 323 passed, 0 failed (2:05:41), SLURM COMPLETED 0:0
+- Failures observed (prior attempt 13116667): Nextflow compile error "Variable ch_seacr_pairs/ch_macs_pairs already defined" in subworkflows/local/peak_calling_extended.nf; fix in 1491b92 (remove def on ch_*_inputs)
diff --git a/docs/usage.md b/docs/usage.md
index 56780e94..51fa8476 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -14,7 +14,7 @@ You will need to create a samplesheet file with information about the samples in
--input
```
-An example sample sheet structure is shown below. This defines two target experimental groups for the histone marks h3k27me3 and h3k4me3 across two conditions (Control/Treatment) with two biological replicates per condition. Each antibody target also has an IgG control per condition. The IgG controls are assigned to the target samples using the `control` column. If there are an equal number of replicates assigned to the samples from the control group as is the case below, the IgG controls will automatically be assigned to the same replicate number. If there is a mismatch then the first replicate of the control group will be assigned to all.
+An example sample sheet structure is shown below. This defines two target experimental groups for the histone marks h3k27me3 and h3k4me3 across two conditions (Control/Treatment) with two biological replicates per condition. Each antibody target also has an IgG control per condition. The IgG controls are assigned to the target samples using the `control` column. If there are an equal number of replicates assigned to the samples from the control group as is the case below, the IgG controls will automatically be assigned to the same replicate number. If there is a mismatch then the first replicate of the control group will be assigned to all. When a `condition` column is present, each target row that declares a control must have a matching control row for the same condition (fail-fast by default). Use `--allow_cross_condition_controls` only if you explicitly want cross-condition controls.
```bash
group,condition,replicate,fastq_1,fastq_2,control
@@ -165,7 +165,7 @@ If control samples are provided in the sample sheet, they will be used to normal
### Consensus Peaks
-After peak calling, consensus peaks are calculated by merging peaks within the same grouping key. Use `--consensus_grouping` to choose `group` or `group_condition`. By default, if the samplesheet includes a `condition` column, grouping uses `group_condition`; otherwise it falls back to `group`. The number of replicates required for a valid peak can be changed using `replicate_threshold`. To call consensus peaks across all samples, set `--consensus_peak_mode all`.
+After peak calling, consensus peaks are calculated by merging peaks within the same grouping key. Use `--consensus_grouping` to choose `group`, `group_condition`, or `all`. By default, if the samplesheet includes a `condition` column, grouping uses `group_condition`; otherwise it falls back to `group`. Set `--consensus_grouping all` to call consensus peaks across all samples. The number of replicates required for a valid peak can be changed using `replicate_threshold`. The legacy `--consensus_peak_mode all` still maps to the same all-samples behavior.
### Reproducibility
diff --git a/lib/WorkflowCutandrun.groovy b/lib/WorkflowCutandrun.groovy
index e9e94093..62edea95 100755
--- a/lib/WorkflowCutandrun.groovy
+++ b/lib/WorkflowCutandrun.groovy
@@ -18,6 +18,9 @@ class WorkflowCutandrun {
params.consensus_grouping = has_condition == false ? 'group' : 'group_condition'
}
+ // Resolve peak callers after params are fully loaded (e.g. from -params-file)
+ resolveCallers(params)
+
genomeExistsError(params, log)
validatePeakCallerParams(params, log)
@@ -86,8 +89,8 @@ class WorkflowCutandrun {
Nextflow.error "Invalid --igg_scale_scope value '${params.igg_scale_scope}'. Valid options: legacy, group_condition, sample."
}
- if (params.consensus_grouping && !['group','group_condition'].contains(params.consensus_grouping)) {
- Nextflow.error "Invalid --consensus_grouping value '${params.consensus_grouping}'. Valid options: group, group_condition."
+ if (params.consensus_grouping && !['group','group_condition','all'].contains(params.consensus_grouping)) {
+ Nextflow.error "Invalid --consensus_grouping value '${params.consensus_grouping}'. Valid options: group, group_condition, all."
}
if (params.peakcaller_preset && !['standard','extended'].contains(params.peakcaller_preset.toLowerCase())) {
@@ -95,6 +98,47 @@ class WorkflowCutandrun {
}
}
+ //
+ // Resolve caller list with correct precedence
+ //
+ private static void resolveCallers(params) {
+ def caller_preset_map = [
+ standard: ['seacr'],
+ extended: [
+ 'macs2_narrow',
+ 'macs2_broad',
+ 'gopeaks_narrow',
+ 'gopeaks_broad',
+ 'epic2_200bp',
+ 'epic2_150bp',
+ 'epic2_25bp',
+ 'span_default',
+ 'span_stringent'
+ ]
+ ]
+
+ def callers = []
+ if (params.callers) {
+ if (params.callers instanceof String) {
+ callers = params.callers.split(',').collect { it.trim().toLowerCase() }.findAll { it }
+ } else if (params.callers instanceof List) {
+ callers = params.callers.collect { it.toString().trim().toLowerCase() }.findAll { it }
+ }
+ }
+
+ if (params.peakcaller) {
+ callers = params.peakcaller.split(',').collect { it.trim().toLowerCase() }.findAll { it }
+ } else if (!callers) {
+ if (params.peakcaller_preset && params.peakcaller_preset.toLowerCase() == 'extended') {
+ callers = caller_preset_map.extended
+ } else {
+ callers = caller_preset_map.standard
+ }
+ }
+
+ params.callers = callers
+ }
+
//
// Detect whether the input samplesheet includes a condition column
//
diff --git a/modules/local/control_pooling_fallbacks_report.nf b/modules/local/control_pooling_fallbacks_report.nf
index 0c35d75c..edb8347d 100644
--- a/modules/local/control_pooling_fallbacks_report.nf
+++ b/modules/local/control_pooling_fallbacks_report.nf
@@ -17,16 +17,19 @@ process CONTROL_POOLING_FALLBACKS_REPORT {
task.ext.when == null || task.ext.when
script:
- def header = "sample_id\tgroup\tcondition\tcontrol_group\tcontrol_condition\tcaller\treason"
+ def header = "sample_id\tgroup\tcondition\tcaller_id\tcontrol_group\tselected_control_condition\tstatus\taction\treason\tpooled_control_path"
def lines = records ? records.collect { record ->
[
- record.sample_id,
- record.group,
- record.condition,
- record.control_group,
- record.control_condition,
- record.caller,
- record.reason
+ record.sample_id ?: '',
+ record.group ?: '',
+ record.condition ?: '',
+ record.caller_id ?: '',
+ record.control_group ?: '',
+ record.selected_control_condition ?: '',
+ record.status ?: '',
+ record.action ?: '',
+ record.reason ?: '',
+ record.pooled_control_path ?: ''
].join('\t')
}.join('\n') : ''
"""
diff --git a/modules/local/custom_dumpsoftwareversions.nf b/modules/local/custom_dumpsoftwareversions.nf
index 0fe4d309..4ccd1aa7 100644
--- a/modules/local/custom_dumpsoftwareversions.nf
+++ b/modules/local/custom_dumpsoftwareversions.nf
@@ -25,6 +25,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
import yaml
import platform
+ import json
from textwrap import dedent
def _make_versions_html(versions):
@@ -119,7 +120,33 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
}
with open("$versions") as f:
- workflow_versions = yaml.load(f, Loader=yaml.BaseLoader) | module_versions
+ raw_lines = f.read().splitlines()
+
+ # Sanitize potentially noisy version strings (eg. warnings) into valid YAML
+ clean_lines = []
+ for line in raw_lines:
+ if not line.strip():
+ continue
+ if line.startswith('"'):
+ clean_lines.append(line)
+ continue
+ stripped = line.lstrip(' ')
+ indent = line[:len(line) - len(stripped)]
+ if ':' not in stripped:
+ # Drop stray lines (eg. warning continuations)
+ continue
+ key_part, value = stripped.split(':', 1)
+ key = f"{indent}{key_part}:"
+ value = value.strip()
+ if value == "":
+ clean_lines.append(line)
+ continue
+ if not (value.startswith('"') or value.startswith("'")):
+ clean_lines.append(f"{key} {json.dumps(value)}")
+ else:
+ clean_lines.append(line)
+
+ workflow_versions = yaml.load("\\n".join(clean_lines) + "\\n", Loader=yaml.BaseLoader) | module_versions
workflow_versions["Workflow"] = {
"Nextflow": "$workflow.nextflow.version",
diff --git a/modules/local/epic2_callpeak.nf b/modules/local/epic2_callpeak.nf
index a6e5a79b..45a46607 100644
--- a/modules/local/epic2_callpeak.nf
+++ b/modules/local/epic2_callpeak.nf
@@ -37,7 +37,7 @@ process EPIC2_CALLPEAK {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- epic2: \$(epic2 --version 2>&1 | head -n 1 | sed -e 's/.*epic2 //g' || echo "unknown")
+ epic2: \$(epic2 --version 2>&1 | grep -Eo '[0-9]+(\\.[0-9]+)+' | head -n 1 || echo "unknown")
END_VERSIONS
"""
}
diff --git a/modules/local/normalisation_scope_reference_report.nf b/modules/local/normalisation_scope_reference_report.nf
new file mode 100644
index 00000000..796b5b25
--- /dev/null
+++ b/modules/local/normalisation_scope_reference_report.nf
@@ -0,0 +1,39 @@
+process NORMALISATION_SCOPE_REFERENCE_REPORT {
+ label 'process_single'
+
+ conda "conda-forge::coreutils=9.5"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
+
+ input:
+ val records
+
+ output:
+ path "normalisation_scope_reference.tsv", emit: tsv
+ path "versions.yml", emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def header = "scope_id\treference_reads"
+ def lines = records ? records.collect { record ->
+ [
+ record.scope_id,
+ record.reference_reads
+ ].join('\t')
+ }.join('\n') : ''
+ """
+ printf "%s\\n" "${header}" > normalisation_scope_reference.tsv
+ if [ -n "${lines}" ]; then
+ printf "%s\\n" "${lines}" >> normalisation_scope_reference.tsv
+ fi
+
+ coreutils_version=\$(cat --version | head -n 1 | awk '{print \$NF}')
+ {
+ echo "\\"${task.process}\\":"
+ echo " coreutils: \$coreutils_version"
+ } > versions.yml
+ """
+}
diff --git a/modules/local/peak_counts.nf b/modules/local/peak_counts.nf
index 59a14758..d169d82d 100644
--- a/modules/local/peak_counts.nf
+++ b/modules/local/peak_counts.nf
@@ -12,6 +12,7 @@ process PEAK_COUNTS {
path peak_counts_header
output:
+ tuple val(meta), path("*_peak_count.txt"), emit: count_value
tuple val(meta), path("*mqc.tsv"), emit: count_mqc
path "versions.yml" , emit: versions
@@ -21,7 +22,9 @@ process PEAK_COUNTS {
script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- cat ${bed} | wc -l | awk -v OFS='\t' '{ print "Peak Count", \$1 }' | cat $peak_counts_header - > ${prefix}_mqc.tsv
+ PEAK_COUNT=\$(cat ${bed} | wc -l | awk '{print \$1}')
+ printf "%s\\n" "\$PEAK_COUNT" > ${prefix}_peak_count.txt
+ printf "Peak Count\\t%s\\n" "\$PEAK_COUNT" | cat $peak_counts_header - > ${prefix}_mqc.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/local/peak_frip.nf b/modules/local/peak_frip.nf
index 64ebc387..2a88bd09 100644
--- a/modules/local/peak_frip.nf
+++ b/modules/local/peak_frip.nf
@@ -13,6 +13,7 @@ process PEAK_FRIP {
val min_frip_overlap
output:
+ tuple val(meta), path("*_frip_score.txt"), emit: frip_score
tuple val(meta), path("*mqc.tsv"), emit: frip_mqc
path "versions.yml" , emit: versions
@@ -23,7 +24,10 @@ process PEAK_FRIP {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
READS_IN_PEAKS=\$(bedtools intersect -a ${fragments_bed} -b ${peaks_bed} -bed -c -f $min_frip_overlap | awk -F '\t' '{sum += \$NF} END {print sum * 2}')
- grep -m 1 'mapped (' ${flagstat} | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "Peak FRiP Score", a/\$1}' | cat $frip_score_header - > ${prefix}_mqc.tsv
+ MAPPED_READS=\$(grep -m 1 'mapped (' ${flagstat} | awk '{print \$1}')
+ FRIP_SCORE=\$(awk -v a="\$READS_IN_PEAKS" -v b="\$MAPPED_READS" 'BEGIN {OFS="\\t"; if (b > 0) {print a / b} else {print 0}}')
+ printf "%s\\n" "\$FRIP_SCORE" > ${prefix}_frip_score.txt
+ printf "Peak FRiP Score\\t%s\\n" "\$FRIP_SCORE" | cat $frip_score_header - > ${prefix}_mqc.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/local/peak_qc_table_report.nf b/modules/local/peak_qc_table_report.nf
new file mode 100644
index 00000000..cd025c90
--- /dev/null
+++ b/modules/local/peak_qc_table_report.nf
@@ -0,0 +1,34 @@
+process PEAK_QC_TABLE_REPORT {
+ tag "$table_id"
+ label 'process_single'
+
+ conda "conda-forge::coreutils=9.5"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
+
+ input:
+ tuple val(table_id), val(header), val(rows)
+
+ output:
+ path "${table_id}.tsv", emit: tsv
+ path "versions.yml", emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def lines = rows ? rows.join('\n') : ''
+ """
+ printf "%s\\n" "${header}" > ${table_id}.tsv
+ if [ -n "${lines}" ]; then
+ printf "%s\\n" "${lines}" >> ${table_id}.tsv
+ fi
+
+ coreutils_version=\$(cat --version | head -n 1 | awk '{print \$NF}')
+ {
+ echo "\\"${task.process}\\":"
+ echo " coreutils: \$coreutils_version"
+ } > versions.yml
+ """
+}
diff --git a/modules/local/python/samplesheet_check.nf b/modules/local/python/samplesheet_check.nf
index 6b371ab2..a5e50400 100644
--- a/modules/local/python/samplesheet_check.nf
+++ b/modules/local/python/samplesheet_check.nf
@@ -9,15 +9,17 @@ process SAMPLESHEET_CHECK {
path samplesheet
output:
- path '*.csv' , emit: csv
- path "versions.yml", emit: versions
+ path '*.csv' , emit: csv
+ path "samplesheet.warnings.txt", emit: warnings
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
+ def allow_cross = params.allow_cross_condition_controls ? '--allow-cross-condition-controls' : ''
"""
- check_samplesheet.py $samplesheet samplesheet.valid.csv $params.use_control
+ check_samplesheet.py $samplesheet samplesheet.valid.csv $params.use_control $allow_cross
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/local/span_omnipeaks_analyze.nf b/modules/local/span_omnipeaks_analyze.nf
index 1a01608a..86d9473d 100644
--- a/modules/local/span_omnipeaks_analyze.nf
+++ b/modules/local/span_omnipeaks_analyze.nf
@@ -4,8 +4,8 @@ process SPAN_OMNIPEAKS_ANALYZE {
conda "conda-forge::openjdk=21.0.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'docker://eclipse-temurin:21-jre' :
- 'eclipse-temurin:21-jre' }"
+ 'docker://eclipse-temurin:21-jdk' :
+ 'eclipse-temurin:21-jdk' }"
input:
tuple val(meta), path(treatment_bam), path(control_bam)
@@ -24,10 +24,11 @@ process SPAN_OMNIPEAKS_ANALYZE {
script:
def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}_${meta.caller}"
+ def raw_prefix = task.ext.prefix ?: "${meta.id}_${meta.caller}"
+ def prefix = raw_prefix.endsWith('.peak') ? raw_prefix : "${raw_prefix}.peak"
def fdr_arg = fdr ? "--fdr ${fdr}" : ''
"""
- java -Xmx${java_heap} -jar ${omnipeaks_jar} analyze \
+ java --add-modules jdk.incubator.vector -Xmx${java_heap} -jar ${omnipeaks_jar} analyze \
-t ${treatment_bam} \
-c ${control_bam} \
--cs ${chrom_sizes} \
diff --git a/nextflow.config b/nextflow.config
index 39d9e100..26d115ca 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -75,6 +75,7 @@ params {
// Peak Calling
use_control = true
+ allow_cross_condition_controls = false
only_peak_calling = false
extend_fragments = true
seacr_norm = 'non'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 3c68e540..4efc799d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -73,7 +73,7 @@
},
"dump_scale_factors": {
"type": "boolean",
- "description": "Output calculated scale factors from pipeline"
+ "description": "Emit additional debug outputs for spike-in normalisation (standard scale-factor TSVs are always written when normalisation_mode is Spikein)."
}
}
},
@@ -362,6 +362,12 @@
"fa_icon": "fas fa-align-justify",
"description": "Specifies whether to use a control to normalise peak calls against (e.g. IgG)"
},
+ "allow_cross_condition_controls": {
+ "type": "boolean",
+ "default": false,
+ "fa_icon": "fas fa-align-justify",
+ "description": "Allow targets to use controls from other conditions when no exact condition match exists (default: false, fail-fast)."
+ },
"extend_fragments": {
"type": "boolean",
"default": true,
@@ -468,8 +474,8 @@
"consensus_grouping": {
"type": "string",
"fa_icon": "fas fa-align-justify",
- "description": "Grouping key for consensus peaks when consensus_peak_mode is group. Options are [group, group_condition]. Defaults to group_condition when the samplesheet has a condition column, otherwise group.",
- "enum": ["group", "group_condition"]
+ "description": "Grouping key for consensus peaks when consensus_peak_mode is group. Options are [group, group_condition, all]. Defaults to group_condition when the samplesheet has a condition column, otherwise group. Set to all to compute consensus across all samples.",
+ "enum": ["group", "group_condition", "all"]
},
"replicate_threshold": {
"type": "number",
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index f122b506..6748ded1 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -11,6 +11,11 @@ workflow INPUT_CHECK {
main:
SAMPLESHEET_CHECK ( samplesheet )
+ SAMPLESHEET_CHECK.out.warnings
+ .map { it.text?.trim() }
+ .filter { it }
+ .view { it }
+
SAMPLESHEET_CHECK.out.csv
.splitCsv ( header:true, sep:"," )
.map { get_samplesheet_paths(it) }
diff --git a/subworkflows/local/peak_calling_extended.nf b/subworkflows/local/peak_calling_extended.nf
index b6815410..b03dfefd 100644
--- a/subworkflows/local/peak_calling_extended.nf
+++ b/subworkflows/local/peak_calling_extended.nf
@@ -78,32 +78,49 @@ workflow PEAK_CALLING_EXTENDED {
}
// Pair targets with controls by control_group + control_condition and replicate logic
+ // Never drop targets when controls are missing (return null control instead)
def pair_targets_controls = { ch_target, ch_control ->
- def ch_control_grouped = ch_control
+ def control_map_ch = ch_control
.map { meta, file -> ["${meta.group}__${meta.condition}", [meta, file]] }
.groupTuple(by: [0])
+ .toList()
+ .map { list ->
+ def map = [:]
+ (list ?: []).each { entry ->
+ map[entry[0]] = entry[1]
+ }
+ map
+ }
- def ch_target_grouped = ch_target
- .map { meta, file -> ["${meta.control_group}__${meta.control_condition}", [meta, file]] }
+ def target_count_ch = ch_target
+ .map { meta, file -> ["${meta.control_group}__${meta.control_condition}", meta.replicate] }
.groupTuple(by: [0])
+ .map { key, reps -> [key, reps.size()] }
+ .toList()
+ .map { list ->
+ def map = [:]
+ (list ?: []).each { entry ->
+ map[entry[0]] = entry[1]
+ }
+ map
+ }
- ch_control_grouped
- .join(ch_target_grouped)
- .flatMap { key, control_list, target_list ->
+ ch_target
+ .combine(control_map_ch)
+ .combine(target_count_ch)
+ .flatMap { meta, file, control_map, target_count_map ->
+ def key = "${meta.control_group}__${meta.control_condition}"
+ def control_list = control_map.get(key)
+ if (!control_list) {
+ return [[meta + [control_missing: true], file, null]]
+ }
def control_by_rep = control_list.collectEntries { [(it[0].replicate): it[1]] }
def control_default = control_list.sort { it[0].replicate }[0][1]
def control_reps_count = control_list.size()
- def target_reps_count = target_list.size()
-
- def pairs = []
- target_list.each { target_item ->
- def tmeta = target_item[0]
- def tfile = target_item[1]
- def control_file = (control_reps_count == target_reps_count && control_by_rep.containsKey(tmeta.replicate)) ?
- control_by_rep[tmeta.replicate] : control_default
- pairs.add([tmeta, tfile, control_file])
- }
- pairs
+ def target_reps_count = target_count_map.get(key) ?: 1
+ def control_file = (control_reps_count == target_reps_count && control_by_rep.containsKey(meta.replicate)) ?
+ control_by_rep[meta.replicate] : control_default
+ return [[meta + [control_missing: false], file, control_file]]
}
}
@@ -111,22 +128,6 @@ workflow PEAK_CALLING_EXTENDED {
ch_bedgraph_target_cc = add_control_condition(bedgraph_target, bedgraph_control)
ch_bam_target_cc = add_control_condition(bam_target, bam_control)
- if (params.use_control) {
- ch_control_fallbacks = ch_bam_target_cc
- .filter { meta, bam -> meta.control_condition && meta.control_condition != meta.condition }
- .map { meta, bam ->
- [
- sample_id: meta.id,
- group: meta.group,
- condition: meta.condition,
- control_group: meta.control_group,
- control_condition: meta.control_condition,
- caller: 'pooled_control',
- reason: 'condition_mismatch'
- ]
- }
- }
-
/*
* SEACR
*/
@@ -136,8 +137,15 @@ workflow PEAK_CALLING_EXTENDED {
ch_bedgraph_target_cc.map { meta, bed -> [meta + [caller: 'seacr'], bed] },
bedgraph_control
)
+ ch_seacr_inputs = ch_seacr_pairs.map { meta, bed, control ->
+ if (!control) {
+ log.warn "No control found for group '${meta.control_group}' (condition '${meta.control_condition}') - running SEACR without control for ${meta.sample_id ?: meta.id}"
+ return [meta, bed, []]
+ }
+ [meta, bed, control]
+ }
SEACR_CALLPEAK (
- ch_seacr_pairs,
+ ch_seacr_inputs,
params.seacr_peak_threshold
)
ch_peaks_all = ch_peaks_all.mix(SEACR_CALLPEAK.out.bed)
@@ -163,8 +171,15 @@ workflow PEAK_CALLING_EXTENDED {
ch_bam_target_cc.map { meta, bam -> [meta + [caller: 'macs2'], bam] },
bam_control
)
+ ch_macs_inputs = ch_macs_pairs.map { meta, bam, control ->
+ if (!control) {
+ log.warn "No control found for group '${meta.control_group}' (condition '${meta.control_condition}') - running MACS2 without control for ${meta.sample_id ?: meta.id}"
+ return [meta, bam, []]
+ }
+ [meta, bam, control]
+ }
MACS2_CALLPEAK_LEGACY (
- ch_macs_pairs,
+ ch_macs_inputs,
params.macs_gsize
)
ch_peaks_all = ch_peaks_all.mix(MACS2_CALLPEAK_LEGACY.out.peak)
@@ -276,16 +291,41 @@ workflow PEAK_CALLING_EXTENDED {
.map { meta, bam, pooled_map ->
def entries = pooled_map.get(meta.control_group, [])
if (!entries) {
- return null
+ return [meta, bam, null, '', 'missing_control', 'skipped', 'no_control_for_group', null]
+ }
+ def desired_condition = meta.control_condition ?: meta.condition
+ def selected_entry = entries.find { it[0] == desired_condition }
+ def used_condition = null
+ def control_bam = null
+ def status = null
+ def reason = null
+ if (selected_entry) {
+ used_condition = selected_entry[0]
+ control_bam = selected_entry[1]
+ } else {
+ def na_entry = entries.find { it[0] == 'NA' }
+ if (na_entry) {
+ used_condition = na_entry[0]
+ control_bam = na_entry[1]
+ } else {
+ def fallback = entries.sort { it[0] }[0]
+ used_condition = fallback[0]
+ control_bam = fallback[1]
+ }
+ }
+ def exact_match = (used_condition == meta.condition)
+ if (exact_match) {
+ status = 'exact_match'
+ reason = 'exact_condition'
+ } else if (used_condition == 'NA') {
+ status = 'fallback_other_condition'
+ reason = 'legacy_na_control'
+ } else {
+ status = 'fallback_other_condition'
+ reason = 'condition_fallback'
}
- def exact = entries.find { it[0] == meta.control_condition }
- def chosen = exact ?: entries[0]
- def used_condition = chosen[0]
- def control_bam = chosen[1]
- def reason = exact ? null : 'condition_fallback'
- [meta, bam, control_bam, used_condition, reason]
+ return [meta, bam, control_bam, used_condition, status, 'used', reason, control_bam]
}
- .filter { it != null }
}
chrom_sizes_val = chrom_sizes
@@ -304,9 +344,17 @@ workflow PEAK_CALLING_EXTENDED {
*/
if ('epic2_200bp' in callers) {
ch_epic2_200 = ch_pooled_pairs
- .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'epic2_200bp'], bam, control_bam, used_condition, reason] }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta + [caller: 'epic2_200bp'], bam, control_bam, used_condition, status, action, reason, pooled_path]
+ }
+ ch_epic2_200_branch = ch_epic2_200.branch {
+ run: it[2] != null
+ skip: it[2] == null
+ }
EPIC2_CALLPEAK_200 (
- ch_epic2_200.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] },
+ ch_epic2_200_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta, bam, control_bam]
+ },
params.epic2_genome,
200,
3,
@@ -315,17 +363,38 @@ workflow PEAK_CALLING_EXTENDED {
ch_peaks_all = ch_peaks_all.mix(EPIC2_CALLPEAK_200.out.peaks)
ch_versions = ch_versions.mix(EPIC2_CALLPEAK_200.out.versions)
ch_control_fallbacks = ch_control_fallbacks.mix(
- ch_epic2_200
- .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition }
- .map { meta, bam, control_bam, used_condition, reason ->
+ ch_epic2_200_branch.run
+ .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
[
- sample_id: meta.id,
+ sample_id: meta.sample_id ?: meta.id,
group: meta.group,
condition: meta.condition,
+ caller_id: 'epic2_200bp',
control_group: meta.control_group,
- control_condition: used_condition,
- caller: 'epic2_200bp',
- reason: reason ?: 'condition_fallback'
+ selected_control_condition: used_condition,
+ status: status,
+ action: action,
+ reason: reason,
+ pooled_control_path: pooled_path
+ ]
+ }
+ )
+ ch_control_fallbacks = ch_control_fallbacks.mix(
+ ch_epic2_200_branch.skip
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ log.warn("Skipping epic2_200bp for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.")
+ [
+ sample_id: meta.sample_id ?: meta.id,
+ group: meta.group,
+ condition: meta.condition,
+ caller_id: 'epic2_200bp',
+ control_group: meta.control_group,
+ selected_control_condition: used_condition ?: '',
+ status: status ?: 'missing_control',
+ action: 'skipped',
+ reason: reason ?: 'missing_control',
+ pooled_control_path: pooled_path ?: ''
]
}
)
@@ -333,9 +402,17 @@ workflow PEAK_CALLING_EXTENDED {
if ('epic2_150bp' in callers) {
ch_epic2_150 = ch_pooled_pairs
- .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'epic2_150bp'], bam, control_bam, used_condition, reason] }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta + [caller: 'epic2_150bp'], bam, control_bam, used_condition, status, action, reason, pooled_path]
+ }
+ ch_epic2_150_branch = ch_epic2_150.branch {
+ run: it[2] != null
+ skip: it[2] == null
+ }
EPIC2_CALLPEAK_150 (
- ch_epic2_150.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] },
+ ch_epic2_150_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta, bam, control_bam]
+ },
params.epic2_genome,
150,
2,
@@ -344,17 +421,38 @@ workflow PEAK_CALLING_EXTENDED {
ch_peaks_all = ch_peaks_all.mix(EPIC2_CALLPEAK_150.out.peaks)
ch_versions = ch_versions.mix(EPIC2_CALLPEAK_150.out.versions)
ch_control_fallbacks = ch_control_fallbacks.mix(
- ch_epic2_150
- .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition }
- .map { meta, bam, control_bam, used_condition, reason ->
+ ch_epic2_150_branch.run
+ .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
[
- sample_id: meta.id,
+ sample_id: meta.sample_id ?: meta.id,
group: meta.group,
condition: meta.condition,
+ caller_id: 'epic2_150bp',
control_group: meta.control_group,
- control_condition: used_condition,
- caller: 'epic2_150bp',
- reason: reason ?: 'condition_fallback'
+ selected_control_condition: used_condition,
+ status: status,
+ action: action,
+ reason: reason,
+ pooled_control_path: pooled_path
+ ]
+ }
+ )
+ ch_control_fallbacks = ch_control_fallbacks.mix(
+ ch_epic2_150_branch.skip
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ log.warn("Skipping epic2_150bp for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.")
+ [
+ sample_id: meta.sample_id ?: meta.id,
+ group: meta.group,
+ condition: meta.condition,
+ caller_id: 'epic2_150bp',
+ control_group: meta.control_group,
+ selected_control_condition: used_condition ?: '',
+ status: status ?: 'missing_control',
+ action: 'skipped',
+ reason: reason ?: 'missing_control',
+ pooled_control_path: pooled_path ?: ''
]
}
)
@@ -362,9 +460,17 @@ workflow PEAK_CALLING_EXTENDED {
if ('epic2_25bp' in callers) {
ch_epic2_25 = ch_pooled_pairs
- .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'epic2_25bp'], bam, control_bam, used_condition, reason] }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta + [caller: 'epic2_25bp'], bam, control_bam, used_condition, status, action, reason, pooled_path]
+ }
+ ch_epic2_25_branch = ch_epic2_25.branch {
+ run: it[2] != null
+ skip: it[2] == null
+ }
EPIC2_CALLPEAK_25 (
- ch_epic2_25.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] },
+ ch_epic2_25_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta, bam, control_bam]
+ },
params.epic2_genome,
25,
2,
@@ -373,17 +479,38 @@ workflow PEAK_CALLING_EXTENDED {
ch_peaks_all = ch_peaks_all.mix(EPIC2_CALLPEAK_25.out.peaks)
ch_versions = ch_versions.mix(EPIC2_CALLPEAK_25.out.versions)
ch_control_fallbacks = ch_control_fallbacks.mix(
- ch_epic2_25
- .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition }
- .map { meta, bam, control_bam, used_condition, reason ->
+ ch_epic2_25_branch.run
+ .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
[
- sample_id: meta.id,
+ sample_id: meta.sample_id ?: meta.id,
group: meta.group,
condition: meta.condition,
+ caller_id: 'epic2_25bp',
control_group: meta.control_group,
- control_condition: used_condition,
- caller: 'epic2_25bp',
- reason: reason ?: 'condition_fallback'
+ selected_control_condition: used_condition,
+ status: status,
+ action: action,
+ reason: reason,
+ pooled_control_path: pooled_path
+ ]
+ }
+ )
+ ch_control_fallbacks = ch_control_fallbacks.mix(
+ ch_epic2_25_branch.skip
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ log.warn("Skipping epic2_25bp for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.")
+ [
+ sample_id: meta.sample_id ?: meta.id,
+ group: meta.group,
+ condition: meta.condition,
+ caller_id: 'epic2_25bp',
+ control_group: meta.control_group,
+ selected_control_condition: used_condition ?: '',
+ status: status ?: 'missing_control',
+ action: 'skipped',
+ reason: reason ?: 'missing_control',
+ pooled_control_path: pooled_path ?: ''
]
}
)
@@ -394,9 +521,17 @@ workflow PEAK_CALLING_EXTENDED {
*/
if ('span_default' in callers) {
ch_span_default = ch_pooled_pairs
- .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'span_default'], bam, control_bam, used_condition, reason] }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta + [caller: 'span_default'], bam, control_bam, used_condition, status, action, reason, pooled_path]
+ }
+ ch_span_default_branch = ch_span_default.branch {
+ run: it[2] != null
+ skip: it[2] == null
+ }
SPAN_OMNIPEAKS_DEFAULT (
- ch_span_default.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] },
+ ch_span_default_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta, bam, control_bam]
+ },
chrom_sizes_val,
omnipeaks_jar_val,
5,
@@ -406,17 +541,38 @@ workflow PEAK_CALLING_EXTENDED {
ch_peaks_all = ch_peaks_all.mix(SPAN_OMNIPEAKS_DEFAULT.out.peaks)
ch_versions = ch_versions.mix(SPAN_OMNIPEAKS_DEFAULT.out.versions)
ch_control_fallbacks = ch_control_fallbacks.mix(
- ch_span_default
- .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition }
- .map { meta, bam, control_bam, used_condition, reason ->
+ ch_span_default_branch.run
+ .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [
+ sample_id: meta.sample_id ?: meta.id,
+ group: meta.group,
+ condition: meta.condition,
+ caller_id: 'span_default',
+ control_group: meta.control_group,
+ selected_control_condition: used_condition,
+ status: status,
+ action: action,
+ reason: reason,
+ pooled_control_path: pooled_path
+ ]
+ }
+ )
+ ch_control_fallbacks = ch_control_fallbacks.mix(
+ ch_span_default_branch.skip
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ log.warn("Skipping span_default for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.")
[
- sample_id: meta.id,
+ sample_id: meta.sample_id ?: meta.id,
group: meta.group,
condition: meta.condition,
+ caller_id: 'span_default',
control_group: meta.control_group,
- control_condition: used_condition,
- caller: 'span_default',
- reason: reason ?: 'condition_fallback'
+ selected_control_condition: used_condition ?: '',
+ status: status ?: 'missing_control',
+ action: 'skipped',
+ reason: reason ?: 'missing_control',
+ pooled_control_path: pooled_path ?: ''
]
}
)
@@ -424,9 +580,17 @@ workflow PEAK_CALLING_EXTENDED {
if ('span_stringent' in callers) {
ch_span_stringent = ch_pooled_pairs
- .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'span_stringent'], bam, control_bam, used_condition, reason] }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta + [caller: 'span_stringent'], bam, control_bam, used_condition, status, action, reason, pooled_path]
+ }
+ ch_span_stringent_branch = ch_span_stringent.branch {
+ run: it[2] != null
+ skip: it[2] == null
+ }
SPAN_OMNIPEAKS_STRINGENT (
- ch_span_stringent.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] },
+ ch_span_stringent_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [meta, bam, control_bam]
+ },
chrom_sizes_val,
omnipeaks_jar_val,
2,
@@ -436,17 +600,38 @@ workflow PEAK_CALLING_EXTENDED {
ch_peaks_all = ch_peaks_all.mix(SPAN_OMNIPEAKS_STRINGENT.out.peaks)
ch_versions = ch_versions.mix(SPAN_OMNIPEAKS_STRINGENT.out.versions)
ch_control_fallbacks = ch_control_fallbacks.mix(
- ch_span_stringent
- .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition }
- .map { meta, bam, control_bam, used_condition, reason ->
+ ch_span_stringent_branch.run
+ .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' }
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ [
+ sample_id: meta.sample_id ?: meta.id,
+ group: meta.group,
+ condition: meta.condition,
+ caller_id: 'span_stringent',
+ control_group: meta.control_group,
+ selected_control_condition: used_condition,
+ status: status,
+ action: action,
+ reason: reason,
+ pooled_control_path: pooled_path
+ ]
+ }
+ )
+ ch_control_fallbacks = ch_control_fallbacks.mix(
+ ch_span_stringent_branch.skip
+ .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path ->
+ log.warn("Skipping span_stringent for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.")
[
- sample_id: meta.id,
+ sample_id: meta.sample_id ?: meta.id,
group: meta.group,
condition: meta.condition,
+ caller_id: 'span_stringent',
control_group: meta.control_group,
- control_condition: used_condition,
- caller: 'span_stringent',
- reason: reason ?: 'condition_fallback'
+ selected_control_condition: used_condition ?: '',
+ status: status ?: 'missing_control',
+ action: 'skipped',
+ reason: reason ?: 'missing_control',
+ pooled_control_path: pooled_path ?: ''
]
}
)
diff --git a/subworkflows/local/peak_qc.nf b/subworkflows/local/peak_qc.nf
index 35d52a73..a54fbeac 100644
--- a/subworkflows/local/peak_qc.nf
+++ b/subworkflows/local/peak_qc.nf
@@ -6,9 +6,14 @@ include { PEAK_FRIP } from "../../modules/local/peak_
include { PEAK_COUNTS as PRIMARY_PEAK_COUNTS } from "../../modules/local/peak_counts"
include { PEAK_COUNTS as CONSENSUS_PEAK_COUNTS } from "../../modules/local/peak_counts"
include { CUT as CUT_CALC_REPROD } from "../../modules/local/linux/cut"
+include { BEDTOOLS_SORT as PEAKQC_BEDTOOLS_SORT } from "../../modules/local/for_patch/bedtools/sort/main"
include { BEDTOOLS_INTERSECT } from "../../modules/nf-core/bedtools/intersect/main.nf"
include { CALCULATE_PEAK_REPROD } from "../../modules/local/python/peak_reprod"
include { PLOT_CONSENSUS_PEAKS } from '../../modules/local/python/plot_consensus_peaks'
+include { PEAK_QC_TABLE_REPORT as PEAK_QC_FRIP_REPORT } from "../../modules/local/peak_qc_table_report"
+include { PEAK_QC_TABLE_REPORT as PEAK_QC_COUNTS_REPORT } from "../../modules/local/peak_qc_table_report"
+include { PEAK_QC_TABLE_REPORT as PEAK_QC_CONSENSUS_REPORT } from "../../modules/local/peak_qc_table_report"
+include { PEAK_QC_TABLE_REPORT as PEAK_QC_REPROD_REPORT } from "../../modules/local/peak_qc_table_report"
workflow PEAK_QC {
take:
@@ -16,6 +21,7 @@ workflow PEAK_QC {
peaks_with_ids // channel: [ val(meta), [ bed ] ]
consensus_peaks // channel: [ val(meta), [ bed ] ]
consensus_peaks_unfiltered // channel: [ val(meta), [ bed ] ]
+ chrom_sizes // channel: [ path ]
fragments_bed // channel: [ val(meta), [ bed ] ]
flagstat // channel: [ val(meta), [ flagstat ] ]
min_frip_overlap // val
@@ -31,15 +37,20 @@ workflow PEAK_QC {
/*
* CHANNEL: Combine channel together for frip calculation
*/
- peaks
- .map { row -> [row[0].id, row ].flatten()}
- .join ( fragments_bed.map { row -> [row[0].id, row ].flatten()} )
- .join ( flagstat.map { row -> [row[0].id, row ].flatten()} )
- .map { row -> [ row[1], row[2], row[4], row[6] ]}
- .set { ch_frip }
+ def ch_peaks_by_id = peaks
+ .map { row -> [row[0].id, row] }
+ .groupTuple(by: [0])
+
+ ch_peaks_by_id
+ .join ( fragments_bed.map { row -> [row[0].id, row[1]] } )
+ .join ( flagstat.map { row -> [row[0].id, row[1]] } )
+ .flatMap { id, peak_rows, fragments, flagstat_file ->
+ peak_rows.collect { peak_row -> [ peak_row[0], peak_row[1], fragments, flagstat_file ] }
+ }
+ .set { ch_frip }
/*
- * MODULE: Calculate frip scores for primary peaks
+ * MODULE: Calculate frip scores for sample peaks
*/
PEAK_FRIP(
ch_frip,
@@ -50,7 +61,7 @@ workflow PEAK_QC {
// PEAK_FRIP.out.frip_mqc | view
/*
- * MODULE: Calculate peak counts for primary peaks
+ * MODULE: Calculate peak counts for sample peaks
*/
PRIMARY_PEAK_COUNTS(
peaks,
@@ -77,10 +88,27 @@ workflow PEAK_QC {
)
ch_versions = ch_versions.mix(CUT_CALC_REPROD.out.versions)
+ /*
+ * CHANNEL: Normalize chrom_sizes to a single value for bedtools -g
+ */
+ ch_chrom_sizes_single = chrom_sizes
+ .collect()
+ .map { it instanceof List ? it[0] : it }
+
+ /*
+ * MODULE: Sort repro beds with genome order to satisfy -sorted intersect
+ */
+ PEAKQC_BEDTOOLS_SORT(
+ CUT_CALC_REPROD.out.file,
+ "bed",
+ ch_chrom_sizes_single
+ )
+ ch_versions = ch_versions.mix(PEAKQC_BEDTOOLS_SORT.out.versions)
+
/*
* CHANNEL: Group samples based on group and filter for groups that have more than one file
*/
- CUT_CALC_REPROD.out.file
+ PEAKQC_BEDTOOLS_SORT.out.sorted
.map { row ->
def group_key = consensus_grouping == 'group_condition' ? row[0].group_condition : row[0].group
[ "${group_key}__${row[0].caller}", row[1], row[0] ]
@@ -109,7 +137,9 @@ workflow PEAK_QC {
row[1].each{ file ->
def files_copy = row[1].collect()
files_copy.remove(files_copy.indexOf(file))
- new_output.add([[id: file.name.split("\\.")[0]], file, files_copy])
+ def file_id = file.name.split("\\.")[0]
+ def meta_out = row[0] + [id: file_id, sample_id: file_id]
+ new_output.add([meta_out, file, files_copy])
}
new_output
}
@@ -122,7 +152,7 @@ workflow PEAK_QC {
*/
BEDTOOLS_INTERSECT (
ch_beds_intersect,
- [[:],[]]
+ ch_chrom_sizes_single.map { [[:], it] }
)
ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions)
//EXAMPLE CHANNEL STRUCT: [[META], BED]
@@ -139,6 +169,82 @@ workflow PEAK_QC {
//EXAMPLE CHANNEL STRUCT: [[META], TSV]
//CALCULATE_PEAK_REPROD.out.tsv
+ /*
+ * MODULE: Write caller-aware peak QC tables
+ */
+ def sample_header_prefix = "sample_id\tgroup\tcondition\treplicate\tcaller_id"
+
+ PEAK_FRIP.out.frip_score
+ .map { meta, score_file ->
+ def score = score_file.text.trim()
+ def replicate = meta.replicate ?: 'NA'
+ def group = meta.group ?: 'NA'
+ def condition = meta.condition ?: 'NA'
+ def caller = meta.caller ?: 'NA'
+ def sample_id = meta.sample_id ?: meta.id
+ [sample_id, group, condition, replicate, caller, score].join('\t')
+ }
+ .toList()
+ .ifEmpty([])
+ .map { rows -> ['peak_frip_scores', "${sample_header_prefix}\tfrip_score", rows] }
+ .set { ch_frip_table }
+
+ PEAK_QC_FRIP_REPORT(ch_frip_table)
+ ch_versions = ch_versions.mix(PEAK_QC_FRIP_REPORT.out.versions)
+
+ PRIMARY_PEAK_COUNTS.out.count_value
+ .map { meta, count_file ->
+ def count = count_file.text.trim()
+ def replicate = meta.replicate ?: 'NA'
+ def group = meta.group ?: 'NA'
+ def condition = meta.condition ?: 'NA'
+ def caller = meta.caller ?: 'NA'
+ def sample_id = meta.sample_id ?: meta.id
+ [sample_id, group, condition, replicate, caller, count].join('\t')
+ }
+ .toList()
+ .ifEmpty([])
+ .map { rows -> ['peak_counts', "${sample_header_prefix}\tpeak_count", rows] }
+ .set { ch_peak_count_table }
+
+ PEAK_QC_COUNTS_REPORT(ch_peak_count_table)
+ ch_versions = ch_versions.mix(PEAK_QC_COUNTS_REPORT.out.versions)
+
+ CONSENSUS_PEAK_COUNTS.out.count_value
+ .map { meta, count_file ->
+ def count = count_file.text.trim()
+ def group = meta.group ?: 'NA'
+ def condition = meta.condition ?: 'NA'
+ def caller = meta.caller ?: 'NA'
+ def sample_id = meta.sample_id ?: meta.id
+ [sample_id, group, condition, 'NA', caller, count].join('\t')
+ }
+ .toList()
+ .ifEmpty([])
+ .map { rows -> ['consensus_peak_counts', "${sample_header_prefix}\tconsensus_peak_count", rows] }
+ .set { ch_consensus_count_table }
+
+ PEAK_QC_CONSENSUS_REPORT(ch_consensus_count_table)
+ ch_versions = ch_versions.mix(PEAK_QC_CONSENSUS_REPORT.out.versions)
+
+ CALCULATE_PEAK_REPROD.out.tsv
+ .map { meta, repro_file ->
+ def parts = repro_file.text.trim().split('\t')
+ def value = parts.size() > 1 ? parts[1] : ''
+ def group = meta.group ?: 'NA'
+ def condition = meta.condition ?: 'NA'
+ def caller = meta.caller ?: 'NA'
+ def sample_id = meta.sample_id ?: meta.id
+ [sample_id, group, condition, 'NA', caller, value].join('\t')
+ }
+ .toList()
+ .ifEmpty([])
+ .map { rows -> ['peak_reproducibility', "${sample_header_prefix}\tpeak_reproducibility_percent", rows] }
+ .set { ch_reprod_table }
+
+ PEAK_QC_REPROD_REPORT(ch_reprod_table)
+ ch_versions = ch_versions.mix(PEAK_QC_REPROD_REPORT.out.versions)
+
/*
* CHANNEL: Prep for upset input
*/
diff --git a/subworkflows/local/prepare_peakcalling.nf b/subworkflows/local/prepare_peakcalling.nf
index d9fcc019..da56bf24 100644
--- a/subworkflows/local/prepare_peakcalling.nf
+++ b/subworkflows/local/prepare_peakcalling.nf
@@ -8,6 +8,7 @@ include { BEDTOOLS_SORT } from "../../modules/local/for_patch/bedtools/s
include { UCSC_BEDCLIP } from "../../modules/nf-core/ucsc/bedclip/main"
include { UCSC_BEDGRAPHTOBIGWIG } from "../../modules/nf-core/ucsc/bedgraphtobigwig/main"
include { NORMALISATION_FACTORS_REPORT } from "../../modules/local/normalisation_factors_report"
+include { NORMALISATION_SCOPE_REFERENCE_REPORT } from "../../modules/local/normalisation_scope_reference_report"
workflow PREPARE_PEAKCALLING {
take:
@@ -24,6 +25,7 @@ workflow PREPARE_PEAKCALLING {
main:
ch_versions = Channel.empty()
ch_bedgraph = Channel.empty()
+ ch_scope_reference = Channel.empty()
def norm_scope = normalisation_scope ?: 'all'
def igg_scope = igg_scale_scope ?: 'legacy'
def median = { List values ->
@@ -92,26 +94,41 @@ workflow PREPARE_PEAKCALLING {
// EXAMPLE CHANNEL STRUCT: [id, scale_factor]
//ch_bam_scale_factor | view
+ ch_bam_scale_factor_report
+ .map { meta, bam, scale, reads, scope_id ->
+ [
+ sample_id: meta.sample_id ?: meta.id,
+ group: meta.group,
+ condition: meta.condition,
+ replicate: meta.replicate,
+ spikein_reads: reads,
+ scale_factor: scale,
+ scope_id: scope_id
+ ]
+ }
+ .map { record -> [ record.scope_id, record ] }
+ .groupTuple(by: [0])
+ .map { scope_id, records -> [ scope_id, records ] }
+ .set { ch_norm_factors }
+
+ NORMALISATION_FACTORS_REPORT ( ch_norm_factors )
+ ch_versions = ch_versions.mix(NORMALISATION_FACTORS_REPORT.out.versions)
+
if (params.dump_scale_factors) {
- ch_bam_scale_factor_report
- .map { meta, bam, scale, reads, scope_id ->
- [
- sample_id: meta.id,
- group: meta.group,
- condition: meta.condition,
- replicate: meta.replicate,
- spikein_reads: reads,
- scale_factor: scale,
- scope_id: scope_id
- ]
- }
- .map { record -> [ record.scope_id, record ] }
- .groupTuple(by: [0])
- .map { scope_id, records -> [ scope_id, records ] }
- .set { ch_norm_factors }
+ if (norm_scope == 'all') {
+ ch_scope_reference = Channel.of([scope_id: 'all', reference_reads: params.normalisation_c])
+ } else {
+ ch_scope_reference = ch_scope_ref
+ .map { scope_id, ref -> [scope_id: scope_id, reference_reads: ref] }
+ }
+
+ ch_scope_reference
+ .toList()
+ .map { records -> records ?: [] }
+ .set { ch_scope_reference_records }
- NORMALISATION_FACTORS_REPORT ( ch_norm_factors )
- ch_versions = ch_versions.mix(NORMALISATION_FACTORS_REPORT.out.versions)
+ NORMALISATION_SCOPE_REFERENCE_REPORT ( ch_scope_reference_records )
+ ch_versions = ch_versions.mix(NORMALISATION_SCOPE_REFERENCE_REPORT.out.versions)
}
}
else if (norm_mode == "None") {
diff --git a/tests/assets/samplesheet_condition_missing_control.csv b/tests/assets/samplesheet_condition_missing_control.csv
new file mode 100644
index 00000000..e00945ee
--- /dev/null
+++ b/tests/assets/samplesheet_condition_missing_control.csv
@@ -0,0 +1,4 @@
+group,condition,replicate,fastq_1,fastq_2,control
+h3k27me3,Control,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r2.fastq.gz,igg_ctrl
+h3k27me3,Treatment,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r2.fastq.gz,igg_ctrl
+igg_ctrl,Control,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/igg_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/igg_rep1_r2.fastq.gz,
diff --git a/tests/assets/samplesheet_condition_no_controls.csv b/tests/assets/samplesheet_condition_no_controls.csv
new file mode 100644
index 00000000..0f1496c7
--- /dev/null
+++ b/tests/assets/samplesheet_condition_no_controls.csv
@@ -0,0 +1,3 @@
+group,condition,replicate,fastq_1,fastq_2,control
+h3k27me3,Control,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r2.fastq.gz,igg_ctrl
+h3k27me3,Treatment,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r2.fastq.gz,igg_ctrl
diff --git a/tests/test_02_samplesheet_check.yml b/tests/test_02_samplesheet_check.yml
index c3a85c52..2a463535 100644
--- a/tests/test_02_samplesheet_check.yml
+++ b/tests/test_02_samplesheet_check.yml
@@ -136,6 +136,35 @@
files:
- path: results/pipeline_info/samplesheet.valid.csv
+# Test condition samplesheet missing control (fail-fast)
+- name: test_samplesheet_condition_missing_control_failfast
+ command: nextflow run main.nf -profile docker,test --only_input --input tests/assets/samplesheet_condition_missing_control.csv -c tests/config/nextflow.config
+ tags:
+ - test_samplesheet_condition
+ exit_code: 1
+ stdout:
+ contains:
+ - "Missing condition-matched controls"
+
+# Test condition samplesheet missing control with override
+- name: test_samplesheet_condition_missing_control_allow_cross
+ command: nextflow run main.nf -profile docker,test --only_input --allow_cross_condition_controls --input tests/assets/samplesheet_condition_missing_control.csv -c tests/config/nextflow.config
+ tags:
+ - test_samplesheet_condition
+ files:
+ - path: results/pipeline_info/samplesheet.valid.csv
+
+# Test condition samplesheet with no control rows and override
+- name: test_samplesheet_condition_no_controls_allow_cross
+ command: nextflow run main.nf -profile docker,test --only_input --allow_cross_condition_controls --input tests/assets/samplesheet_condition_no_controls.csv -c tests/config/nextflow.config
+ tags:
+ - test_samplesheet_condition
+ files:
+ - path: results/pipeline_info/samplesheet.valid.csv
+ stdout:
+ contains:
+ - "No control rows exist for one or more control groups"
+
# Test small tech reps
- name: test_samplesheet_small_tech_reps
command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small-tech-reps.csv -c tests/config/nextflow.config
diff --git a/tests/test_04_bam_scaling.yml b/tests/test_04_bam_scaling.yml
index 2b2f68f7..beaea72b 100644
--- a/tests/test_04_bam_scaling.yml
+++ b/tests/test_04_bam_scaling.yml
@@ -4,7 +4,7 @@
- test_bam_scale
- test_bam_scale_none
files:
- - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph
+ - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph
- name: test_bam_scale_spikein
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode Spikein -c tests/config/nextflow.config
@@ -12,7 +12,8 @@
- test_bam_scale
- test_bam_scale_spikein
files:
- - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph
+ - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph
+ - path: results/03_peak_calling/00_normalisation_factors/all.tsv
- name: test_bam_scale_cpm
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode CPM -c tests/config/nextflow.config
@@ -20,7 +21,7 @@
- test_bam_scale
- test_bam_scale_cpm
files:
- - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph
+ - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph
- name: test_bam_scale_rpkm
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode RPKM -c tests/config/nextflow.config
@@ -28,7 +29,7 @@
- test_bam_scale
- test_bam_scale_rpkm
files:
- - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph
+ - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph
- name: test_bam_scale_bpm
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode BPM -c tests/config/nextflow.config
@@ -36,7 +37,7 @@
- test_bam_scale
- test_bam_scale_bpm
files:
- - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph
+ - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph
- name: test_bam_scale_cpm_iggscale
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode CPM --igg_scale_factor 0.1 -c tests/config/nextflow.config
@@ -44,8 +45,8 @@
- test_bam_scale
- test_bam_scale_cpm_iggscale
files:
- - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph
- - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_NA_rep1_T1.sorted.bedGraph
+ - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph
+ - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_NA_rep1.sorted.bedGraph
- name: test_normalisation_mode_invalid
command: nextflow run main.nf -profile docker,test --only_peak_calling --normalisation_mode test -c tests/config/nextflow.config
diff --git a/tests/test_05_peak_callers.yml b/tests/test_05_peak_callers.yml
index 70219dac..c10e23da 100644
--- a/tests/test_05_peak_callers.yml
+++ b/tests/test_05_peak_callers.yml
@@ -4,7 +4,7 @@
- test_peak_callers
- test_peak_callers_seacr
files:
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed
- name: test_peak_callers_macs2
command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2 -c tests/config/nextflow.config
@@ -12,7 +12,7 @@
- test_peak_callers
- test_peak_callers_macs2
files:
- - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.narrowPeak
+ - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.narrowPeak
- name: test_peak_callers_macs2_broad_peak
command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2 --macs2_narrow_peak false -c tests/config/nextflow.config
@@ -20,7 +20,7 @@
- test_peak_callers
- test_peak_callers_macs2
files:
- - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.broadPeak
+ - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.broadPeak
- name: test_peak_callers_invalid_name
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller test -c tests/config/nextflow.config
@@ -35,8 +35,8 @@
- test_peak_callers
- test_peak_callers_seacr_macs2
files:
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.narrowPeak
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.narrowPeak
- name: test_peak_callers_macs2_seacr
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2,seacr -c tests/config/nextflow.config
@@ -44,8 +44,8 @@
- test_peak_callers
- test_peak_callers_macs2_seacr
files:
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.narrowPeak
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.narrowPeak
- name: test_peak_callers_seacr_macs2_noigg
command: nextflow run main.nf -profile docker,test_no_control --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr,macs2 -c tests/config/nextflow.config
@@ -53,8 +53,8 @@
- test_peak_callers
- test_peak_callers_seacr_macs2_noigg
files:
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.narrowPeak
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.narrowPeak
- name: test_peak_callers_seacr_single_ctrl_multi_rep
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-single-ctrl.csv -c tests/config/nextflow.config
@@ -62,10 +62,10 @@
- test_peak_callers
- test_peak_callers_ctrl_tests
files:
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep2_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep2_T1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep2_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep2_seacr.seacr.peaks.stringent.bed
- name: test_peak_callers_seacr_multi_single_ctrl_multi_rep
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-multi-single-ctrl.csv -c tests/config/nextflow.config
@@ -73,10 +73,10 @@
- test_peak_callers
- test_peak_callers_ctrl_tests
files:
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep2_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep2_T1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep2_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep2_seacr.seacr.peaks.stringent.bed
- name: test_peak_callers_gopeaks_condition
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller gopeaks_narrow --input tests/assets/samplesheet_condition.csv -c tests/config/nextflow.config
@@ -84,8 +84,8 @@
- test_peak_callers
- test_peak_callers_gopeaks
files:
- - path: results/03_peak_calling/04_called_peaks/gopeaks_narrow/h3k27me3_Control_rep1_T1_gopeaks_narrow.bed
- - path: results/03_peak_calling/04_called_peaks/gopeaks_narrow/h3k27me3_Control_rep1_T1_gopeaks_narrow_gopeaks.json
+ - path: results/03_peak_calling/04_called_peaks/gopeaks_narrow/h3k27me3_Control_rep1_gopeaks_narrow.bed
+ - path: results/03_peak_calling/04_called_peaks/gopeaks_narrow/h3k27me3_Control_rep1_gopeaks_narrow_gopeaks.json
- path: results/03_peak_calling/05_consensus_peaks/h3k27me3/Control/h3k27me3_Control_gopeaks_narrow.consensus.peak_counts.bed
- name: test_peak_callers_epic2_condition
@@ -98,7 +98,42 @@
- path: results/03_peak_calling/01_pooled_controls/igg_ctrl_Control.bam.bai
- path: results/03_peak_calling/01_pooled_controls/igg_ctrl_Treatment.bam
- path: results/03_peak_calling/01_pooled_controls/igg_ctrl_Treatment.bam.bai
- - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_T1_epic2_200bp.peaks
+ - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks
+
+- name: test_peak_callers_epic2_missing_control_fallback
+ command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller epic2_200bp --epic2_genome hg38 --allow_cross_condition_controls --input tests/assets/samplesheet_condition_missing_control.csv -c tests/config/nextflow.config
+ tags:
+ - test_peak_callers
+ - test_peak_callers_epic2
+ files:
+ - path: results/03_peak_calling/07_qc_tables/control_pooling_fallbacks.tsv
+ contains:
+ - "h3k27me3_Treatment_rep1"
+ - "epic2_200bp"
+ - "fallback_other_condition"
+ - "used"
+ - "condition_fallback"
+ - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks
+ - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Treatment_rep1_epic2_200bp.peaks
+ should_exist: true
+
+- name: test_peak_callers_epic2_no_controls_skip
+ command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller epic2_200bp --epic2_genome hg38 --allow_cross_condition_controls --input tests/assets/samplesheet_condition_no_controls.csv -c tests/config/nextflow.config
+ tags:
+ - test_peak_callers
+ - test_peak_callers_epic2
+ files:
+ - path: results/03_peak_calling/07_qc_tables/control_pooling_fallbacks.tsv
+ contains:
+ - "h3k27me3_Control_rep1"
+ - "epic2_200bp"
+ - "missing_control"
+ - "skipped"
+ - "no_control_for_group"
+ - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks
+ should_exist: false
+ - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Treatment_rep1_epic2_200bp.peaks
+ should_exist: false
- name: test_peak_callers_preset_extended
command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller_preset extended --epic2_genome hg38 --omnipeaks_jar tests/assets/omnipeaks_stub.jar -c tests/config/nextflow.config
@@ -107,7 +142,7 @@
- test_peak_callers_preset_extended
files:
- path: results/03_peak_calling/01_pooled_controls/igg_ctrl_NA.bam
- - path: results/03_peak_calling/04_called_peaks/macs2_narrow/h3k27me3_NA_rep1_T1_macs2_narrow_peaks.narrowPeak
- - path: results/03_peak_calling/04_called_peaks/gopeaks_broad/h3k27me3_NA_rep1_T1_gopeaks_broad.bed
- - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_NA_rep1_T1_epic2_200bp.peaks
- - path: results/03_peak_calling/04_called_peaks/span_default/h3k27me3_NA_rep1_T1_span_default.peak
+ - path: results/03_peak_calling/04_called_peaks/macs2_narrow/h3k27me3_NA_rep1_macs2_narrow_peaks.narrowPeak
+ - path: results/03_peak_calling/04_called_peaks/gopeaks_broad/h3k27me3_NA_rep1_gopeaks_broad.bed
+ - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_NA_rep1_epic2_200bp.peaks
+ - path: results/03_peak_calling/04_called_peaks/span_default/h3k27me3_NA_rep1_span_default.peak
diff --git a/tests/test_06_consensus_peaks.yml b/tests/test_06_consensus_peaks.yml
index 093fe3e3..29c5ca10 100644
--- a/tests/test_06_consensus_peaks.yml
+++ b/tests/test_06_consensus_peaks.yml
@@ -10,7 +10,7 @@
- path: results/03_peak_calling/05_consensus_peaks/h3k27me3/NA/h3k27me3_NA_seacr_consensus.awk.bed
- name: test_consensus_peaks_all
- command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling --skip_fastqc --skip_multiqc --skip_preseq --consensus_peak_mode all -c tests/config/nextflow.config
+ command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling --skip_fastqc --skip_multiqc --skip_preseq --consensus_grouping all -c tests/config/nextflow.config
tags:
- test_consensus_peaks
- test_consensus_peaks_all
diff --git a/tests/test_07_peak_qc_multi_caller.yml b/tests/test_07_peak_qc_multi_caller.yml
new file mode 100644
index 00000000..f5e5b029
--- /dev/null
+++ b/tests/test_07_peak_qc_multi_caller.yml
@@ -0,0 +1,15 @@
+- name: test_peak_qc_multi_caller
+ command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_preseq --skip_multiqc --peakcaller seacr,macs2_narrow -c tests/config/nextflow.config
+ tags:
+ - test_peak_qc_multi_caller
+ files:
+ - path: results/03_peak_calling/07_qc_tables/peak_counts.tsv
+ contains:
+ - "seacr"
+ - "macs2_narrow"
+ must_not_contain:
+ - "_T"
+ - path: results/03_peak_calling/07_qc_tables/peak_frip_scores.tsv
+ contains:
+ - "seacr"
+ - "macs2_narrow"
diff --git a/tests/test_verify_output_01_save_merged.yml b/tests/test_verify_output_01_save_merged.yml
index cd6920dc..54ee4042 100644
--- a/tests/test_verify_output_01_save_merged.yml
+++ b/tests/test_verify_output_01_save_merged.yml
@@ -3,5 +3,5 @@
tags:
- verify_output_save_merged
files:
- - path: results/01_prealign/merged_fastq/h3k27me3_NA_rep1_T1_1.merged.fastq.gz
- - path: results/01_prealign/merged_fastq/h3k27me3_NA_rep1_T1_2.merged.fastq.gz
+ - path: results/01_prealign/merged_fastq/h3k27me3_NA_rep1_1.merged.fastq.gz
+ - path: results/01_prealign/merged_fastq/h3k27me3_NA_rep1_2.merged.fastq.gz
diff --git a/tests/test_verify_output_01_save_trimmed.yml b/tests/test_verify_output_01_save_trimmed.yml
index 5eed8467..1caff730 100644
--- a/tests/test_verify_output_01_save_trimmed.yml
+++ b/tests/test_verify_output_01_save_trimmed.yml
@@ -3,4 +3,4 @@
tags:
- verify_output_save_trimmed
files:
- - path: results/01_prealign/trimgalore/h3k27me3_NA_rep1_T1_1.trimmed.fastq.gz
+ - path: results/01_prealign/trimgalore/h3k27me3_NA_rep1_1.trimmed.fastq.gz
diff --git a/tests/test_verify_output_01_skip_fastqc.yml b/tests/test_verify_output_01_skip_fastqc.yml
index 9db57056..db4a5f26 100644
--- a/tests/test_verify_output_01_skip_fastqc.yml
+++ b/tests/test_verify_output_01_skip_fastqc.yml
@@ -4,9 +4,9 @@
- verify_output_skip_fastqc
- verify_output_skip_fastqc_true
files:
- - path: results/01_prealign/pretrim_fastqc/h3k27me3_NA_rep1_T1_1_fastqc.html
+ - path: results/01_prealign/pretrim_fastqc/h3k27me3_NA_rep1_1_fastqc.html
should_exist: false
- - path: results/01_prealign/trimgalore/fastqc/h3k27me3_NA_rep1_T1_1_fastqc.trimmed.html
+ - path: results/01_prealign/trimgalore/fastqc/h3k27me3_NA_rep1_1_fastqc.trimmed.html
should_exist: false
# - name: test_verify_output_skip_fastqc_false
# command: nextflow run main.nf -profile docker,test --only_preqc --skip_fastqc false
@@ -14,5 +14,5 @@
# - verify_output_skip_fastqc
# - verify_output_skip_fastqc_false
# files:
-# - path: results/01_prealign/pretrim_fastqc/h3k27me3_NA_rep1_T1_1_fastqc.html
-# - path: results/01_prealign/trimgalore/fastqc/h3k27me3_NA_rep1_T1_1.trimmed_fastqc.html
+# - path: results/01_prealign/pretrim_fastqc/h3k27me3_NA_rep1_1_fastqc.html
+# - path: results/01_prealign/trimgalore/fastqc/h3k27me3_NA_rep1_1.trimmed_fastqc.html
diff --git a/tests/test_verify_output_01_skip_trimming.yml b/tests/test_verify_output_01_skip_trimming.yml
index 438558c7..e9e302b2 100644
--- a/tests/test_verify_output_01_skip_trimming.yml
+++ b/tests/test_verify_output_01_skip_trimming.yml
@@ -3,5 +3,5 @@
tags:
- verify_output_skip_trimming
files:
- - path: results/01_prealign/trimgalore/h3k27me3_NA_rep1_T1_1.trimmed.fastq.gz
+ - path: results/01_prealign/trimgalore/h3k27me3_NA_rep1_1.trimmed.fastq.gz
should_exist: false
diff --git a/tests/test_verify_output_03_only_align.yml b/tests/test_verify_output_03_only_align.yml
index a7cd7252..2d4adf50 100644
--- a/tests/test_verify_output_03_only_align.yml
+++ b/tests/test_verify_output_03_only_align.yml
@@ -4,12 +4,12 @@
- verify_output_align_only_align
- verify_output_align_only_align_end_to_end
files:
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam
- - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.target.sorted.bam
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.target.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.flagstat
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam
+ - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.target.sorted.bam
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.target.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.flagstat
- name: test_verify_output_only_align_local
command: nextflow run main.nf -profile docker,test --only_alignment --skip_fastqc -c tests/config/nextflow.config --end_to_end false
@@ -17,9 +17,9 @@
- verify_output_align_only_align
- verify_output_align_only_align_local
files:
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam
- - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.target.sorted.bam
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.target.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.flagstat
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam
+ - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.target.sorted.bam
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.target.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.flagstat
diff --git a/tests/test_verify_output_03_save_align_intermed.yml b/tests/test_verify_output_03_save_align_intermed.yml
index 79de55d2..f5541b61 100644
--- a/tests/test_verify_output_03_save_align_intermed.yml
+++ b/tests/test_verify_output_03_save_align_intermed.yml
@@ -3,8 +3,8 @@
tags:
- verify_output_align_intermed
files:
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.bam
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.filtered.bam
- - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.bam
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.filtered.bam
+ - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai
diff --git a/tests/test_verify_output_03_save_spikein_align.yml b/tests/test_verify_output_03_save_spikein_align.yml
index 4c8cca19..f6d77c14 100644
--- a/tests/test_verify_output_03_save_spikein_align.yml
+++ b/tests/test_verify_output_03_save_spikein_align.yml
@@ -3,9 +3,9 @@
tags:
- verify_output_align_save_spikein_align
files:
- - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1_T1.spikein.sorted.bam
- - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1_T1.spikein.sorted.bam
- - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1_T1.spikein.sorted.bam.bai
- - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1_T1.spikein.sorted.bam.bai
- - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1_T1.flagstat
+ - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1.spikein.sorted.bam
+ - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1.spikein.sorted.bam
+ - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1.spikein.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1.spikein.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1.flagstat
diff --git a/tests/test_verify_output_03_save_unaligned.yml b/tests/test_verify_output_03_save_unaligned.yml
index 9a6da92f..beeb7b0d 100644
--- a/tests/test_verify_output_03_save_unaligned.yml
+++ b/tests/test_verify_output_03_save_unaligned.yml
@@ -3,7 +3,7 @@
tags:
- verify_output_align_save_unaligned
files:
- - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_NA_rep1_T1.unmapped_1.fastq.gz
- - path: results/02_alignment/bowtie2/target/unmapped/igg_ctrl_NA_rep1_T1.unmapped_2.fastq.gz
- - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_NA_rep1_T1.unmapped_1.fastq.gz
- - path: results/02_alignment/bowtie2/target/unmapped/igg_ctrl_NA_rep1_T1.unmapped_2.fastq.gz
+ - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_NA_rep1.unmapped_1.fastq.gz
+ - path: results/02_alignment/bowtie2/target/unmapped/igg_ctrl_NA_rep1.unmapped_2.fastq.gz
+ - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_NA_rep1.unmapped_1.fastq.gz
+ - path: results/02_alignment/bowtie2/target/unmapped/igg_ctrl_NA_rep1.unmapped_2.fastq.gz
diff --git a/tests/test_verify_output_04_duplicates.yml b/tests/test_verify_output_04_duplicates.yml
index 4f0c8713..60cf8a9e 100644
--- a/tests/test_verify_output_04_duplicates.yml
+++ b/tests/test_verify_output_04_duplicates.yml
@@ -4,12 +4,12 @@
- verify_output_align_duplicates
- verify_output_align_duplicates_mark
files:
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat
- name: test_verify_output_duplicates_remove
command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --dedup_target_reads false -c tests/config/nextflow.config
@@ -17,14 +17,14 @@
- verify_output_align_duplicates
- verify_output_align_duplicates_remove
files:
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.flagstat
should_exist: false
- name: test_verify_output_duplicates_remove_target
@@ -33,18 +33,18 @@
- verify_output_align_duplicates
- verify_output_align_duplicates_remove_target
files:
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.target.dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.target.dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.target.dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.target.dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.flagstat
- name: test_verify_output_linear_duplicates_remove
command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --dedup_target_reads false -c tests/config/nextflow.config --remove_linear_duplicates true
@@ -52,19 +52,19 @@
- verify_output_align_linear_duplicates
- verify_output_align_linear_duplicates_remove
files:
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup_metrics.txt
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup_metrics.txt
- name: test_verify_output_linear_duplicates_remove_target
command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --dedup_target_reads true -c tests/config/nextflow.config --remove_linear_duplicates true
@@ -72,23 +72,23 @@
- verify_output_align_linear_duplicates
- verify_output_align_linear_duplicates_remove_target
files:
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.target.dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.target.dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1_T1.target.linear_dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup.sorted.bam
- - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1_T1.target.linear_dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1_T1.target.linear_dedup_metrics.txt
- - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup_metrics.txt
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.target.dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.target.dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1.target.linear_dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1.target.linear_dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1.target.linear_dedup_metrics.txt
+ - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup_metrics.txt
diff --git a/tests/test_verify_output_04_only_filtering.yml b/tests/test_verify_output_04_only_filtering.yml
index 291b1207..55e9971d 100644
--- a/tests/test_verify_output_04_only_filtering.yml
+++ b/tests/test_verify_output_04_only_filtering.yml
@@ -4,12 +4,12 @@
- verify_output_only_filtering
- verify_output_only_filtering_without_mitochondrial_reads
files:
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai
- - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai
+ - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat
- name: test_output_only_filtering_with_mitochondrial_reads
command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_removeduplicates --skip_preseq --remove_mitochondrial_reads false -c tests/config/nextflow.config
diff --git a/tests/test_verify_output_05_only_peak_calling.yml b/tests/test_verify_output_05_only_peak_calling.yml
index 6d4247ab..d993a28d 100644
--- a/tests/test_verify_output_05_only_peak_calling.yml
+++ b/tests/test_verify_output_05_only_peak_calling.yml
@@ -3,14 +3,14 @@
tags:
- verify_output_peak_calling_only_peak_calling
files:
- - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph
- - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_NA_rep1_T1.sorted.bedGraph
- - path: results/03_peak_calling/02_clip_bed/h3k27me3_NA_rep1_T1.clipped.bedGraph
- - path: results/03_peak_calling/02_clip_bed/igg_ctrl_NA_rep1_T1.clipped.bedGraph
- - path: results/03_peak_calling/03_bed_to_bigwig/h3k27me3_NA_rep1_T1.bigWig
- - path: results/03_peak_calling/03_bed_to_bigwig/igg_ctrl_NA_rep1_T1.bigWig
- - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
- - path: results/03_peak_calling/04_called_peaks/seacr/igg_ctrl_NA_rep1_T1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph
+ - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_NA_rep1.sorted.bedGraph
+ - path: results/03_peak_calling/02_clip_bed/h3k27me3_NA_rep1.clipped.bedGraph
+ - path: results/03_peak_calling/02_clip_bed/igg_ctrl_NA_rep1.clipped.bedGraph
+ - path: results/03_peak_calling/03_bed_to_bigwig/h3k27me3_NA_rep1.bigWig
+ - path: results/03_peak_calling/03_bed_to_bigwig/igg_ctrl_NA_rep1.bigWig
+ - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed
+ - path: results/03_peak_calling/04_called_peaks/seacr/igg_ctrl_NA_rep1_seacr.seacr.peaks.stringent.bed
should_exist: false
- path: results/03_peak_calling/05_consensus_peaks/h3k27me3/NA/h3k27me3_NA_seacr_consensus.awk.bed
- path: results/03_peak_calling/05_consensus_peaks/h3k27me3/NA/h3k27me3_NA_seacr.consensus.peak_counts.bed
diff --git a/tests/test_verify_output_06_skip_dt_qc.yml b/tests/test_verify_output_06_skip_dt_qc.yml
index ff8ff037..d4cb5b1e 100644
--- a/tests/test_verify_output_06_skip_dt_qc.yml
+++ b/tests/test_verify_output_06_skip_dt_qc.yml
@@ -6,7 +6,7 @@
files:
- path: results/04_reporting/deeptools_qc/all_target_bams.plotCorrelation.pdf
- path: results/04_reporting/deeptools_qc/all_target_bams.plotPCA.pdf
- - path: results/04_reporting/deeptools_qc/h3k4me3_NA_rep1_T1.plotFingerprint.pdf
+ - path: results/04_reporting/deeptools_qc/h3k4me3_NA_rep1.plotFingerprint.pdf
- name: verify_output_reporting_skip_dtqc_true
command: nextflow run main.nf -profile docker,test_full_small --skip_fastqc --skip_multiqc --skip_preseq --skip_dt_qc -c tests/config/nextflow.config
@@ -18,5 +18,5 @@
should_exist: false
- path: results/04_reporting/deeptools_qc/all_target_bams.plotPCA.pdf
should_exist: false
- - path: results/04_reporting/deeptools_qc/h3k4me3_NA_rep1_T1.plotFingerprint.pdf
+ - path: results/04_reporting/deeptools_qc/h3k4me3_NA_rep1.plotFingerprint.pdf
should_exist: false
diff --git a/tests/test_verify_output_06_skip_heatmaps.yml b/tests/test_verify_output_06_skip_heatmaps.yml
index 40d59ea7..0df553a2 100644
--- a/tests/test_verify_output_06_skip_heatmaps.yml
+++ b/tests/test_verify_output_06_skip_heatmaps.yml
@@ -4,8 +4,8 @@
- verify_output_reporting_skip_heatmaps
- verify_output_reporting_skip_heatmaps_false
files:
- - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_NA_rep1_T1.computeMatrix.mat.gz
- - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_T1.computeMatrix.mat.gz
+ - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_NA_rep1.computeMatrix.mat.gz
+ - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_seacr.computeMatrix.mat.gz
- name: verify_output_reporting_skip_heatmaps_true
command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_multiqc --skip_preseq --skip_heatmaps -c tests/config/nextflow.config
@@ -13,9 +13,9 @@
- verify_output_reporting_skip_heatmaps
- verify_output_reporting_skip_heatmaps_true
files:
- - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_NA_rep1_T1.computeMatrix.mat.gz
+ - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_NA_rep1.computeMatrix.mat.gz
should_exist: false
- - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_T1.computeMatrix.mat.gz
+ - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_seacr.computeMatrix.mat.gz
should_exist: false
- name: verify_output_reporting_skip_heatmaps_grouped
diff --git a/tests/test_verify_output_06_skip_preseq.yml b/tests/test_verify_output_06_skip_preseq.yml
index 115b5ba5..4dfe42bb 100644
--- a/tests/test_verify_output_06_skip_preseq.yml
+++ b/tests/test_verify_output_06_skip_preseq.yml
@@ -10,9 +10,9 @@
- verify_output_reporting_skip_preseq
- verify_output_reporting_skip_preseq_true
files:
- - path: results/04_reporting/preseq/h3k4me3_NA_rep1_T1.command.log
+ - path: results/04_reporting/preseq/h3k4me3_NA_rep1.command.log
should_exist: false
- - path: results/04_reporting/preseq/h3k4me3_NA_rep2_T1.command.log
+ - path: results/04_reporting/preseq/h3k4me3_NA_rep2.command.log
should_exist: false
- - path: results/04_reporting/preseq/igg_ctrl_NA_rep1_T1.command.log
+ - path: results/04_reporting/preseq/igg_ctrl_NA_rep1.command.log
should_exist: false
diff --git a/tests/test_verify_output_06_skip_reporting.yml b/tests/test_verify_output_06_skip_reporting.yml
index 37c2e6bd..e8fd3605 100644
--- a/tests/test_verify_output_06_skip_reporting.yml
+++ b/tests/test_verify_output_06_skip_reporting.yml
@@ -7,7 +7,7 @@
should_exist: false
- path: results/04_reporting/deeptools_qc/all_target_bams.plotCorrelation.pdf
should_exist: false
- - path: results/04_reporting/heatmaps/peaks/h3k27me3_NA_rep1_T1.plotHeatmap.pdf
+ - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_seacr.plotHeatmap.pdf
should_exist: false
- - path: results/04_reporting/deeptools_heatmaps/gene/h3k4me3_NA_rep1_T1.computeMatrix.mat.gz
+ - path: results/04_reporting/deeptools_heatmaps/gene/h3k4me3_NA_rep1.computeMatrix.mat.gz
should_exist: false
diff --git a/workflows/cutandrun.nf b/workflows/cutandrun.nf
index d97aae21..e701247d 100644
--- a/workflows/cutandrun.nf
+++ b/workflows/cutandrun.nf
@@ -102,11 +102,6 @@ def caller_list = [
'span_default',
'span_stringent'
]
-callers = params.callers ?: ['seacr']
-if ((caller_list + callers).unique().size() != caller_list.size()) {
- exit 1, "Invalid variant calller option: ${params.peakcaller ?: params.peakcaller_preset}. Valid options: ${caller_list.join(', ')}"
-}
-
/*
========================================================================================
IMPORT LOCAL MODULES/SUBWORKFLOWS
@@ -187,6 +182,10 @@ workflow CUTANDRUN {
// Init
ch_software_versions = Channel.empty()
+ def caller_ids = params.callers ?: ['seacr']
+ if ((caller_list + caller_ids).unique().size() != caller_list.size()) {
+ exit 1, "Invalid variant calller option: ${params.peakcaller ?: params.peakcaller_preset}. Valid options: ${caller_list.join(', ')}"
+ }
/*
* SUBWORKFLOW: Uncompress and prepare reference genome files
@@ -391,6 +390,11 @@ workflow CUTANDRUN {
* SUBWORKFLOW: Remove duplicates - default is on IgG controls only
*/
if (params.run_remove_dups) {
+ // Preserve pre-dedup stats for targets when only controls are deduplicated
+ ch_stats_pre = ch_samtools_stats
+ ch_flagstat_pre = ch_samtools_flagstat
+ ch_idxstats_pre = ch_samtools_idxstats
+
DEDUPLICATE_PICARD (
ch_samtools_bam,
ch_samtools_bai,
@@ -400,9 +404,15 @@ workflow CUTANDRUN {
)
ch_samtools_bam = DEDUPLICATE_PICARD.out.bam
ch_samtools_bai = DEDUPLICATE_PICARD.out.bai
- ch_samtools_stats = DEDUPLICATE_PICARD.out.stats
- ch_samtools_flagstat = DEDUPLICATE_PICARD.out.flagstat
- ch_samtools_idxstats = DEDUPLICATE_PICARD.out.idxstats
+ if (params.dedup_target_reads) {
+ ch_samtools_stats = DEDUPLICATE_PICARD.out.stats
+ ch_samtools_flagstat = DEDUPLICATE_PICARD.out.flagstat
+ ch_samtools_idxstats = DEDUPLICATE_PICARD.out.idxstats
+ } else {
+ ch_samtools_stats = DEDUPLICATE_PICARD.out.stats.mix(ch_stats_pre.filter { it[0].is_control == false })
+ ch_samtools_flagstat = DEDUPLICATE_PICARD.out.flagstat.mix(ch_flagstat_pre.filter { it[0].is_control == false })
+ ch_samtools_idxstats = DEDUPLICATE_PICARD.out.idxstats.mix(ch_idxstats_pre.filter { it[0].is_control == false })
+ }
ch_software_versions = ch_software_versions.mix(DEDUPLICATE_PICARD.out.versions)
}
//EXAMPLE CHANNEL STRUCT: [[id:h3k27me3_R1, group:h3k27me3, replicate:1, single_end:false, is_control:false], [BAM]]
@@ -501,7 +511,7 @@ workflow CUTANDRUN {
//ch_bam_control | view
/*
- * SUBWORKFLOW: Call peaks using extended callers
+ * SUBWORKFLOW: Call peaks using extended caller_ids
*/
PEAK_CALLING_EXTENDED (
ch_bedgraph_target,
@@ -509,7 +519,7 @@ workflow CUTANDRUN {
ch_bam_target,
ch_bam_control,
PREPARE_GENOME.out.chrom_sizes,
- callers
+ caller_ids
)
ch_peaks_all = PEAK_CALLING_EXTENDED.out.peaks
ch_macs2_summits = PEAK_CALLING_EXTENDED.out.macs2_summits
@@ -519,7 +529,7 @@ workflow CUTANDRUN {
ch_gopeaks_json = PEAK_CALLING_EXTENDED.out.gopeaks_json
ch_software_versions = ch_software_versions.mix(PEAK_CALLING_EXTENDED.out.versions)
- if (callers.find { it.startsWith('macs2') }) {
+ if (caller_ids.find { it.startsWith('macs2') }) {
/*
* MODULE: Convert MACS2 outputs to BED
*/
@@ -536,34 +546,52 @@ workflow CUTANDRUN {
}
// Identify the primary peak data stream for downstream analysis
- ch_peaks_primary = ch_peaks_all.filter { it[0].caller == callers[0] }
- ch_peaks_secondary = ch_peaks_all.filter { it[0].caller != callers[0] }
+ ch_peaks_primary = ch_peaks_all.filter { it[0].caller == caller_ids[0] }
+ ch_peaks_secondary = ch_peaks_all.filter { it[0].caller != caller_ids[0] }
- if(callers[0] == 'seacr') {
+ /*
+ * CHANNEL: Build summit/peak inputs for heatmaps per caller
+ */
+ ch_peaks_summits = Channel.empty()
+ if (caller_ids.contains('seacr')) {
/*
* MODULE: Extract summits from seacr peak beds
*/
AWK_EXTRACT_SUMMITS (
- ch_peaks_primary
+ ch_peaks_all.filter { it[0].caller == 'seacr' }
)
- ch_peaks_summits = AWK_EXTRACT_SUMMITS.out.file
+ ch_peaks_summits = ch_peaks_summits.mix(AWK_EXTRACT_SUMMITS.out.file)
ch_software_versions = ch_software_versions.mix(AWK_EXTRACT_SUMMITS.out.versions)
//AWK_EXTRACT_SUMMITS.out.file | view
- } else if (callers[0].startsWith('macs2')) {
- def use_macs2_summits = true
- if (callers[0] == 'macs2_broad') {
- use_macs2_summits = false
- } else if (callers[0] == 'macs2' && !params.macs2_narrow_peak) {
- use_macs2_summits = false
- }
- if (use_macs2_summits) {
- ch_peaks_summits = ch_macs2_summits
- .filter { it[0].caller == callers[0] }
- } else {
- ch_peaks_summits = ch_peaks_primary
- }
- } else {
- ch_peaks_summits = ch_peaks_primary
+ }
+
+ def macs2_summit_callers = []
+ if (caller_ids.contains('macs2_narrow')) {
+ macs2_summit_callers.add('macs2_narrow')
+ }
+ if (caller_ids.contains('macs2') && params.macs2_narrow_peak) {
+ macs2_summit_callers.add('macs2')
+ }
+
+ if (macs2_summit_callers) {
+ ch_peaks_summits = ch_peaks_summits.mix(
+ ch_macs2_summits.filter { macs2_summit_callers.contains(it[0].caller) }
+ )
+ }
+
+ def macs2_no_summit_callers = caller_ids.findAll { it.startsWith('macs2') && !macs2_summit_callers.contains(it) }
+ if (macs2_no_summit_callers) {
+ ch_peaks_summits = ch_peaks_summits.mix(
+ ch_peaks_all.filter { macs2_no_summit_callers.contains(it[0].caller) }
+ )
+ }
+
+ def summit_override_callers = ['seacr'] + caller_ids.findAll { it.startsWith('macs2') }
+ def other_callers = caller_ids.findAll { !summit_override_callers.contains(it) }
+ if (other_callers) {
+ ch_peaks_summits = ch_peaks_summits.mix(
+ ch_peaks_all.filter { other_callers.contains(it[0].caller) }
+ )
}
/*
@@ -688,11 +716,18 @@ workflow CUTANDRUN {
.set { ch_bigwig_no_igg }
// ch_bigwig_no_igg | view
+ /*
+ * CHANNEL: Use spec-facing sample ids for heatmap outputs
+ */
+ ch_bigwig_no_igg
+ .map { meta, bigwig -> [meta + [id: (meta.sample_id ?: meta.id)], bigwig] }
+ .set { ch_bigwig_no_igg_heatmap }
+
/*
* MODULE: Compute DeepTools matrix used in heatmap plotting for Genes
*/
DEEPTOOLS_COMPUTEMATRIX_GENE (
- ch_bigwig_no_igg,
+ ch_bigwig_no_igg_heatmap,
PREPARE_GENOME.out.bed.collect()
)
ch_software_versions = ch_software_versions.mix(DEEPTOOLS_COMPUTEMATRIX_GENE.out.versions)
@@ -709,27 +744,37 @@ workflow CUTANDRUN {
* CHANNEL: Structure output for join on id
*/
ch_peaks_summits
- .map { row -> [row[0].id, row ].flatten()}
+ .map { meta, bed -> [meta.sample_id ?: meta.id, [meta, bed]] }
+ .groupTuple(by: [0])
.set { ch_peaks_summits_id }
//ch_peaks_bed_id | view
/*
* CHANNEL: Join beds and bigwigs on id
*/
- ch_bigwig_no_igg
- .map { row -> [row[0].id, row ].flatten()}
+ ch_bigwig_no_igg_heatmap
+ .map { meta, bigwig -> [meta.id, bigwig] }
.join ( ch_peaks_summits_id )
- .filter ( it -> it[-1].size() > 1)
+ .flatMap { row ->
+ def peaks_list = row[2]
+ def bigwig = row[1]
+ peaks_list.collect { peak_meta, bed ->
+ def sample_id = peak_meta.sample_id ?: peak_meta.id
+ def heatmap_meta = peak_meta + [id: "${sample_id}_${peak_meta.caller}"]
+ [ heatmap_meta, bigwig, bed ]
+ }
+ }
+ .filter ( it -> it[2].size() > 1)
.set { ch_dt_bigwig_summits }
//ch_dt_peaks | view
ch_dt_bigwig_summits
- .map { row -> row[1,2] }
+ .map { row -> [row[0], row[1]] }
.set { ch_ordered_bigwig }
//ch_ordered_bigwig | view
ch_dt_bigwig_summits
- .map { row -> row[-1] }
+ .map { row -> row[2] }
.set { ch_ordered_peaks_max }
//ch_ordered_peaks_max | view
@@ -817,22 +862,20 @@ workflow CUTANDRUN {
* SUBWORKFLOW: Run suite of peak QC on peaks
*/
AWK_NAME_PEAK_BED.out.file
- .filter { it[0].caller == callers[0] }
- .set { ch_peaks_with_ids_primary }
+ .set { ch_peaks_with_ids_all }
ch_consensus_peaks
- .filter { it[0].caller == callers[0] }
- .set { ch_consensus_peaks_primary }
+ .set { ch_consensus_peaks_all }
ch_consensus_peaks_unfilt
- .filter { it[0].caller == callers[0] }
- .set { ch_consensus_peaks_unfilt_primary }
+ .set { ch_consensus_peaks_unfilt_all }
PEAK_QC(
- ch_peaks_primary,
- ch_peaks_with_ids_primary,
- ch_consensus_peaks_primary,
- ch_consensus_peaks_unfilt_primary,
+ ch_peaks_all,
+ ch_peaks_with_ids_all,
+ ch_consensus_peaks_all,
+ ch_consensus_peaks_unfilt_all,
+ PREPARE_GENOME.out.chrom_sizes,
EXTRACT_FRAGMENTS.out.bed,
ch_flagstat_target,
params.min_frip_overlap,