From ba41a826d55d8fd45158dc5b43c63c99ab5900f0 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Sun, 4 Jan 2026 22:08:53 -0800 Subject: [PATCH 01/19] Enforce control matching and record pooling fallbacks --- bin/check_samplesheet.py | 114 ++++++++- docs/output.md | 5 +- docs/usage.md | 2 +- .../local/control_pooling_fallbacks_report.nf | 19 +- modules/local/python/samplesheet_check.nf | 3 +- nextflow.config | 1 + nextflow_schema.json | 6 + subworkflows/local/peak_calling_extended.nf | 234 ++++++++++++++---- 8 files changed, 316 insertions(+), 68 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 7c4f2ab8..b6448982 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -41,6 +41,11 @@ def parse_args(args=None): "USE_CONTROL", help="Boolean for whether or not the user has specified the pipeline must normalise against a control", ) + parser.add_argument( + "--allow-cross-condition-controls", + action="store_true", + help="Allow targets to use controls from other conditions when no exact condition match exists", + ) return parser.parse_args(args) @@ -63,7 +68,7 @@ def print_error(error, context="Line", context_str=""): sys.exit(1) -def check_samplesheet(file_in, file_out, use_control): +def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_controls=False): """ This function checks that the samplesheet follows the following structure: @@ -83,6 +88,9 @@ def check_samplesheet(file_in, file_out, use_control): control_names_list = [] sample_run_dict = {} control_condition_map = {} + missing_control_errors = [] + cross_condition_warnings = [] + legacy_na_warnings = [] with open(file_in, "r") as fin: ## Check header @@ -281,21 +289,96 @@ def check_samplesheet(file_in, file_out, use_control): if info[-1] == "1": control_condition_map.setdefault(info[0], set()).add(info[1]) - # Warn on condition mismatch between targets and controls - for sample_key, reps in sample_run_dict.items(): - for replicate, infos in reps.items(): - for info in infos: - if info[-1] == "0" and info[3] != "": - ctrl_conditions = control_condition_map.get(info[3], set()) - if ctrl_conditions and info[1] not in ctrl_conditions and "NA" not in ctrl_conditions: + # Validate condition-specific controls for targets + if has_condition: + for sample_key, reps in sample_run_dict.items(): + for replicate, infos in reps.items(): + for info in infos: + if info[-1] == "0" and info[3] != "": + ctrl_conditions = control_condition_map.get(info[3], set()) sample_id = "{}_{}_rep{}".format(info[0], info[1], info[2]) - print( - "WARNING: No control found for target condition; will fall back to other conditions if available.\n" - "Target sample_id: {} | control_group: {} | expected condition: {}".format( - sample_id, info[3], info[1] + if not ctrl_conditions: + missing_control_errors.append( + { + "sample_id": sample_id, + "group": info[0], + "condition": info[1], + "replicate": info[2], + "control_group": info[3], + "control_conditions": "NONE", + } ) + continue + if info[1] in ctrl_conditions: + continue + if "NA" in ctrl_conditions: + legacy_na_warnings.append( + { + "sample_id": sample_id, + "group": info[0], + "condition": info[1], + "replicate": info[2], + "control_group": info[3], + "control_conditions": ",".join(sorted(ctrl_conditions)), + } + ) + continue + if allow_cross_condition_controls: + cross_condition_warnings.append( + { + "sample_id": sample_id, + "group": info[0], + "condition": info[1], + "replicate": info[2], + "control_group": info[3], + "control_conditions": ",".join(sorted(ctrl_conditions)), + } + ) + continue + missing_control_errors.append( + { + "sample_id": sample_id, + "group": info[0], + "condition": info[1], + "replicate": info[2], + "control_group": info[3], + "control_conditions": ",".join(sorted(ctrl_conditions)), + } ) + if missing_control_errors: + print("ERROR: Missing condition-matched controls for target samples (fail-fast).") + for entry in missing_control_errors: + print( + " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | " + "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry) + ) + print( + "Remediation: add control rows for the missing condition(s), or set controls to legacy NA " + "consistently. If you intentionally want cross-condition controls, rerun with " + "--allow_cross_condition_controls." + ) + sys.exit(1) + + if legacy_na_warnings: + print("WARNING: Control rows with condition=NA used for targets with explicit conditions (legacy mode).") + for entry in legacy_na_warnings: + print( + " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | " + "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry) + ) + + if cross_condition_warnings: + print( + "WARNING: No exact condition-matched controls found; proceeding with cross-condition controls " + "because --allow_cross_condition_controls was set." + ) + for entry in cross_condition_warnings: + print( + " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | " + "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry) + ) + ## Write validated samplesheet with appropriate columns if len(sample_run_dict) > 0: out_dir = os.path.dirname(file_out) @@ -333,7 +416,12 @@ def check_samplesheet(file_in, file_out, use_control): def main(args=None): args = parse_args(args) - check_samplesheet(args.FILE_IN, args.FILE_OUT, args.USE_CONTROL) + check_samplesheet( + args.FILE_IN, + args.FILE_OUT, + args.USE_CONTROL, + allow_cross_condition_controls=args.allow_cross_condition_controls, + ) if __name__ == "__main__": diff --git a/docs/output.md b/docs/output.md index ccf61f74..828a87db 100644 --- a/docs/output.md +++ b/docs/output.md @@ -326,7 +326,10 @@ These tables are generated when `--normalisation_mode Spikein` to record per-sam -Controls are pooled per control group and condition for callers that require pooled controls (epic2/SPAN). If a matching condition is unavailable, the closest available control is used and recorded in `03_peak_calling/07_qc_tables/control_pooling_fallbacks.tsv`. +Controls are pooled per control group and condition for callers that require pooled controls (epic2/SPAN). If a matching condition is unavailable, the closest available control is used **only when explicitly allowed**; otherwise the pipeline fails fast during samplesheet validation. The control pooling report is always written to `03_peak_calling/07_qc_tables/control_pooling_fallbacks.tsv` (header + zero or more rows) and records both fallback usage and missing-control skips with status/action fields. + +The report columns are: +`sample_id`, `group`, `condition`, `caller_id`, `control_group`, `selected_control_condition`, `status`, `action`, `reason`, `pooled_control_path`. ### 6.3. Bam to bedgraph diff --git a/docs/usage.md b/docs/usage.md index 56780e94..e3ca9375 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -14,7 +14,7 @@ You will need to create a samplesheet file with information about the samples in --input ``` -An example sample sheet structure is shown below. This defines two target experimental groups for the histone marks h3k27me3 and h3k4me3 across two conditions (Control/Treatment) with two biological replicates per condition. Each antibody target also has an IgG control per condition. The IgG controls are assigned to the target samples using the `control` column. If there are an equal number of replicates assigned to the samples from the control group as is the case below, the IgG controls will automatically be assigned to the same replicate number. If there is a mismatch then the first replicate of the control group will be assigned to all. +An example sample sheet structure is shown below. This defines two target experimental groups for the histone marks h3k27me3 and h3k4me3 across two conditions (Control/Treatment) with two biological replicates per condition. Each antibody target also has an IgG control per condition. The IgG controls are assigned to the target samples using the `control` column. If there are an equal number of replicates assigned to the samples from the control group as is the case below, the IgG controls will automatically be assigned to the same replicate number. If there is a mismatch then the first replicate of the control group will be assigned to all. When a `condition` column is present, each target row that declares a control must have a matching control row for the same condition (fail-fast by default). Use `--allow_cross_condition_controls` only if you explicitly want cross-condition controls. ```bash group,condition,replicate,fastq_1,fastq_2,control diff --git a/modules/local/control_pooling_fallbacks_report.nf b/modules/local/control_pooling_fallbacks_report.nf index 0c35d75c..edb8347d 100644 --- a/modules/local/control_pooling_fallbacks_report.nf +++ b/modules/local/control_pooling_fallbacks_report.nf @@ -17,16 +17,19 @@ process CONTROL_POOLING_FALLBACKS_REPORT { task.ext.when == null || task.ext.when script: - def header = "sample_id\tgroup\tcondition\tcontrol_group\tcontrol_condition\tcaller\treason" + def header = "sample_id\tgroup\tcondition\tcaller_id\tcontrol_group\tselected_control_condition\tstatus\taction\treason\tpooled_control_path" def lines = records ? records.collect { record -> [ - record.sample_id, - record.group, - record.condition, - record.control_group, - record.control_condition, - record.caller, - record.reason + record.sample_id ?: '', + record.group ?: '', + record.condition ?: '', + record.caller_id ?: '', + record.control_group ?: '', + record.selected_control_condition ?: '', + record.status ?: '', + record.action ?: '', + record.reason ?: '', + record.pooled_control_path ?: '' ].join('\t') }.join('\n') : '' """ diff --git a/modules/local/python/samplesheet_check.nf b/modules/local/python/samplesheet_check.nf index 6b371ab2..4972e07e 100644 --- a/modules/local/python/samplesheet_check.nf +++ b/modules/local/python/samplesheet_check.nf @@ -16,8 +16,9 @@ process SAMPLESHEET_CHECK { task.ext.when == null || task.ext.when script: + def allow_cross = params.allow_cross_condition_controls ? '--allow-cross-condition-controls' : '' """ - check_samplesheet.py $samplesheet samplesheet.valid.csv $params.use_control + check_samplesheet.py $samplesheet samplesheet.valid.csv $params.use_control $allow_cross cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 39d9e100..26d115ca 100644 --- a/nextflow.config +++ b/nextflow.config @@ -75,6 +75,7 @@ params { // Peak Calling use_control = true + allow_cross_condition_controls = false only_peak_calling = false extend_fragments = true seacr_norm = 'non' diff --git a/nextflow_schema.json b/nextflow_schema.json index 3c68e540..836f55ad 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -362,6 +362,12 @@ "fa_icon": "fas fa-align-justify", "description": "Specifies whether to use a control to normalise peak calls against (e.g. IgG)" }, + "allow_cross_condition_controls": { + "type": "boolean", + "default": false, + "fa_icon": "fas fa-align-justify", + "description": "Allow targets to use controls from other conditions when no exact condition match exists (default: false, fail-fast)." + }, "extend_fragments": { "type": "boolean", "default": true, diff --git a/subworkflows/local/peak_calling_extended.nf b/subworkflows/local/peak_calling_extended.nf index b6815410..fe52eb65 100644 --- a/subworkflows/local/peak_calling_extended.nf +++ b/subworkflows/local/peak_calling_extended.nf @@ -276,16 +276,17 @@ workflow PEAK_CALLING_EXTENDED { .map { meta, bam, pooled_map -> def entries = pooled_map.get(meta.control_group, []) if (!entries) { - return null + return [meta, bam, null, null, 'missing_control', 'skipped', 'no_control_for_group', null] } def exact = entries.find { it[0] == meta.control_condition } def chosen = exact ?: entries[0] def used_condition = chosen[0] def control_bam = chosen[1] - def reason = exact ? null : 'condition_fallback' - [meta, bam, control_bam, used_condition, reason] + def status = exact ? 'exact_match' : 'fallback_other_condition' + def action = 'used' + def reason = exact ? 'exact_condition' : 'condition_fallback' + [meta, bam, control_bam, used_condition, status, action, reason, control_bam] } - .filter { it != null } } chrom_sizes_val = chrom_sizes @@ -304,9 +305,17 @@ workflow PEAK_CALLING_EXTENDED { */ if ('epic2_200bp' in callers) { ch_epic2_200 = ch_pooled_pairs - .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'epic2_200bp'], bam, control_bam, used_condition, reason] } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta + [caller: 'epic2_200bp'], bam, control_bam, used_condition, status, action, reason, pooled_path] + } + ch_epic2_200_branch = ch_epic2_200.branch { + run: it[2] != null + skip: it[2] == null + } EPIC2_CALLPEAK_200 ( - ch_epic2_200.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] }, + ch_epic2_200_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta, bam, control_bam] + }, params.epic2_genome, 200, 3, @@ -315,17 +324,38 @@ workflow PEAK_CALLING_EXTENDED { ch_peaks_all = ch_peaks_all.mix(EPIC2_CALLPEAK_200.out.peaks) ch_versions = ch_versions.mix(EPIC2_CALLPEAK_200.out.versions) ch_control_fallbacks = ch_control_fallbacks.mix( - ch_epic2_200 - .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition } - .map { meta, bam, control_bam, used_condition, reason -> + ch_epic2_200_branch.run + .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [ + sample_id: meta.id, + group: meta.group, + condition: meta.condition, + caller_id: 'epic2_200bp', + control_group: meta.control_group, + selected_control_condition: used_condition, + status: status, + action: action, + reason: reason, + pooled_control_path: pooled_path + ] + } + ) + ch_control_fallbacks = ch_control_fallbacks.mix( + ch_epic2_200_branch.skip + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + log.warn("Skipping epic2_200bp for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") [ sample_id: meta.id, group: meta.group, condition: meta.condition, + caller_id: 'epic2_200bp', control_group: meta.control_group, - control_condition: used_condition, - caller: 'epic2_200bp', - reason: reason ?: 'condition_fallback' + selected_control_condition: used_condition ?: '', + status: status ?: 'missing_control', + action: 'skipped', + reason: reason ?: 'missing_control', + pooled_control_path: pooled_path ?: '' ] } ) @@ -333,9 +363,17 @@ workflow PEAK_CALLING_EXTENDED { if ('epic2_150bp' in callers) { ch_epic2_150 = ch_pooled_pairs - .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'epic2_150bp'], bam, control_bam, used_condition, reason] } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta + [caller: 'epic2_150bp'], bam, control_bam, used_condition, status, action, reason, pooled_path] + } + ch_epic2_150_branch = ch_epic2_150.branch { + run: it[2] != null + skip: it[2] == null + } EPIC2_CALLPEAK_150 ( - ch_epic2_150.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] }, + ch_epic2_150_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta, bam, control_bam] + }, params.epic2_genome, 150, 2, @@ -344,17 +382,38 @@ workflow PEAK_CALLING_EXTENDED { ch_peaks_all = ch_peaks_all.mix(EPIC2_CALLPEAK_150.out.peaks) ch_versions = ch_versions.mix(EPIC2_CALLPEAK_150.out.versions) ch_control_fallbacks = ch_control_fallbacks.mix( - ch_epic2_150 - .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition } - .map { meta, bam, control_bam, used_condition, reason -> + ch_epic2_150_branch.run + .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [ + sample_id: meta.id, + group: meta.group, + condition: meta.condition, + caller_id: 'epic2_150bp', + control_group: meta.control_group, + selected_control_condition: used_condition, + status: status, + action: action, + reason: reason, + pooled_control_path: pooled_path + ] + } + ) + ch_control_fallbacks = ch_control_fallbacks.mix( + ch_epic2_150_branch.skip + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + log.warn("Skipping epic2_150bp for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") [ sample_id: meta.id, group: meta.group, condition: meta.condition, + caller_id: 'epic2_150bp', control_group: meta.control_group, - control_condition: used_condition, - caller: 'epic2_150bp', - reason: reason ?: 'condition_fallback' + selected_control_condition: used_condition ?: '', + status: status ?: 'missing_control', + action: 'skipped', + reason: reason ?: 'missing_control', + pooled_control_path: pooled_path ?: '' ] } ) @@ -362,9 +421,17 @@ workflow PEAK_CALLING_EXTENDED { if ('epic2_25bp' in callers) { ch_epic2_25 = ch_pooled_pairs - .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'epic2_25bp'], bam, control_bam, used_condition, reason] } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta + [caller: 'epic2_25bp'], bam, control_bam, used_condition, status, action, reason, pooled_path] + } + ch_epic2_25_branch = ch_epic2_25.branch { + run: it[2] != null + skip: it[2] == null + } EPIC2_CALLPEAK_25 ( - ch_epic2_25.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] }, + ch_epic2_25_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta, bam, control_bam] + }, params.epic2_genome, 25, 2, @@ -373,17 +440,38 @@ workflow PEAK_CALLING_EXTENDED { ch_peaks_all = ch_peaks_all.mix(EPIC2_CALLPEAK_25.out.peaks) ch_versions = ch_versions.mix(EPIC2_CALLPEAK_25.out.versions) ch_control_fallbacks = ch_control_fallbacks.mix( - ch_epic2_25 - .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition } - .map { meta, bam, control_bam, used_condition, reason -> + ch_epic2_25_branch.run + .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> [ sample_id: meta.id, group: meta.group, condition: meta.condition, + caller_id: 'epic2_25bp', control_group: meta.control_group, - control_condition: used_condition, - caller: 'epic2_25bp', - reason: reason ?: 'condition_fallback' + selected_control_condition: used_condition, + status: status, + action: action, + reason: reason, + pooled_control_path: pooled_path + ] + } + ) + ch_control_fallbacks = ch_control_fallbacks.mix( + ch_epic2_25_branch.skip + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + log.warn("Skipping epic2_25bp for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") + [ + sample_id: meta.id, + group: meta.group, + condition: meta.condition, + caller_id: 'epic2_25bp', + control_group: meta.control_group, + selected_control_condition: used_condition ?: '', + status: status ?: 'missing_control', + action: 'skipped', + reason: reason ?: 'missing_control', + pooled_control_path: pooled_path ?: '' ] } ) @@ -394,9 +482,17 @@ workflow PEAK_CALLING_EXTENDED { */ if ('span_default' in callers) { ch_span_default = ch_pooled_pairs - .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'span_default'], bam, control_bam, used_condition, reason] } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta + [caller: 'span_default'], bam, control_bam, used_condition, status, action, reason, pooled_path] + } + ch_span_default_branch = ch_span_default.branch { + run: it[2] != null + skip: it[2] == null + } SPAN_OMNIPEAKS_DEFAULT ( - ch_span_default.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] }, + ch_span_default_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta, bam, control_bam] + }, chrom_sizes_val, omnipeaks_jar_val, 5, @@ -406,17 +502,38 @@ workflow PEAK_CALLING_EXTENDED { ch_peaks_all = ch_peaks_all.mix(SPAN_OMNIPEAKS_DEFAULT.out.peaks) ch_versions = ch_versions.mix(SPAN_OMNIPEAKS_DEFAULT.out.versions) ch_control_fallbacks = ch_control_fallbacks.mix( - ch_span_default - .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition } - .map { meta, bam, control_bam, used_condition, reason -> + ch_span_default_branch.run + .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> [ sample_id: meta.id, group: meta.group, condition: meta.condition, + caller_id: 'span_default', control_group: meta.control_group, - control_condition: used_condition, - caller: 'span_default', - reason: reason ?: 'condition_fallback' + selected_control_condition: used_condition, + status: status, + action: action, + reason: reason, + pooled_control_path: pooled_path + ] + } + ) + ch_control_fallbacks = ch_control_fallbacks.mix( + ch_span_default_branch.skip + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + log.warn("Skipping span_default for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") + [ + sample_id: meta.id, + group: meta.group, + condition: meta.condition, + caller_id: 'span_default', + control_group: meta.control_group, + selected_control_condition: used_condition ?: '', + status: status ?: 'missing_control', + action: 'skipped', + reason: reason ?: 'missing_control', + pooled_control_path: pooled_path ?: '' ] } ) @@ -424,9 +541,17 @@ workflow PEAK_CALLING_EXTENDED { if ('span_stringent' in callers) { ch_span_stringent = ch_pooled_pairs - .map { meta, bam, control_bam, used_condition, reason -> [meta + [caller: 'span_stringent'], bam, control_bam, used_condition, reason] } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta + [caller: 'span_stringent'], bam, control_bam, used_condition, status, action, reason, pooled_path] + } + ch_span_stringent_branch = ch_span_stringent.branch { + run: it[2] != null + skip: it[2] == null + } SPAN_OMNIPEAKS_STRINGENT ( - ch_span_stringent.map { meta, bam, control_bam, used_condition, reason -> [meta, bam, control_bam] }, + ch_span_stringent_branch.run.map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [meta, bam, control_bam] + }, chrom_sizes_val, omnipeaks_jar_val, 2, @@ -436,17 +561,38 @@ workflow PEAK_CALLING_EXTENDED { ch_peaks_all = ch_peaks_all.mix(SPAN_OMNIPEAKS_STRINGENT.out.peaks) ch_versions = ch_versions.mix(SPAN_OMNIPEAKS_STRINGENT.out.versions) ch_control_fallbacks = ch_control_fallbacks.mix( - ch_span_stringent - .filter { meta, bam, control_bam, used_condition, reason -> used_condition != meta.condition } - .map { meta, bam, control_bam, used_condition, reason -> + ch_span_stringent_branch.run + .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + [ + sample_id: meta.id, + group: meta.group, + condition: meta.condition, + caller_id: 'span_stringent', + control_group: meta.control_group, + selected_control_condition: used_condition, + status: status, + action: action, + reason: reason, + pooled_control_path: pooled_path + ] + } + ) + ch_control_fallbacks = ch_control_fallbacks.mix( + ch_span_stringent_branch.skip + .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> + log.warn("Skipping span_stringent for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") [ sample_id: meta.id, group: meta.group, condition: meta.condition, + caller_id: 'span_stringent', control_group: meta.control_group, - control_condition: used_condition, - caller: 'span_stringent', - reason: reason ?: 'condition_fallback' + selected_control_condition: used_condition ?: '', + status: status ?: 'missing_control', + action: 'skipped', + reason: reason ?: 'missing_control', + pooled_control_path: pooled_path ?: '' ] } ) From 5151d321bebaa6979efa980b04806aada59ac8d4 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Sun, 4 Jan 2026 22:42:04 -0800 Subject: [PATCH 02/19] Always emit spike-in normalisation factor reports --- conf/modules.config | 10 ++++ docs/output.md | 2 +- .../normalisation_scope_reference_report.nf | 39 ++++++++++++++ nextflow_schema.json | 2 +- subworkflows/local/prepare_peakcalling.nf | 53 ++++++++++++------- 5 files changed, 86 insertions(+), 20 deletions(-) create mode 100644 modules/local/normalisation_scope_reference_report.nf diff --git a/conf/modules.config b/conf/modules.config index 676eb44a..2ebf43c7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -637,6 +637,16 @@ if(params.run_peak_calling) { ] } + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:NORMALISATION_SCOPE_REFERENCE_REPORT' { + publishDir = [ + path: { "${params.outdir}/03_peak_calling/00_normalisation_factors" }, + mode: "${params.publish_dir_mode}", + pattern: "normalisation_scope_reference.tsv", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ diff --git a/docs/output.md b/docs/output.md index 828a87db..ee25c3df 100644 --- a/docs/output.md +++ b/docs/output.md @@ -313,7 +313,7 @@ Computes the overall similarity between two or more samples based on read covera -These tables are generated when `--normalisation_mode Spikein` to record per-sample scale factors. When `--normalisation_scope` is `group` or `group_condition`, a separate TSV is written per scope. +These tables are generated when `--normalisation_mode Spikein` to record per-sample scale factors. When `--normalisation_scope` is `group` or `group_condition`, a separate TSV is written per scope. If `--dump_scale_factors` is set, an additional debug file `normalisation_scope_reference.tsv` is written with the per-scope reference read counts used to compute scale factors. ### 6.2. Pooled controls diff --git a/modules/local/normalisation_scope_reference_report.nf b/modules/local/normalisation_scope_reference_report.nf new file mode 100644 index 00000000..796b5b25 --- /dev/null +++ b/modules/local/normalisation_scope_reference_report.nf @@ -0,0 +1,39 @@ +process NORMALISATION_SCOPE_REFERENCE_REPORT { + label 'process_single' + + conda "conda-forge::coreutils=9.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + val records + + output: + path "normalisation_scope_reference.tsv", emit: tsv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def header = "scope_id\treference_reads" + def lines = records ? records.collect { record -> + [ + record.scope_id, + record.reference_reads + ].join('\t') + }.join('\n') : '' + """ + printf "%s\\n" "${header}" > normalisation_scope_reference.tsv + if [ -n "${lines}" ]; then + printf "%s\\n" "${lines}" >> normalisation_scope_reference.tsv + fi + + coreutils_version=\$(cat --version | head -n 1 | awk '{print \$NF}') + { + echo "\\"${task.process}\\":" + echo " coreutils: \$coreutils_version" + } > versions.yml + """ +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 836f55ad..eafef6b2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -73,7 +73,7 @@ }, "dump_scale_factors": { "type": "boolean", - "description": "Output calculated scale factors from pipeline" + "description": "Emit additional debug outputs for spike-in normalisation (standard scale-factor TSVs are always written when normalisation_mode is Spikein)." } } }, diff --git a/subworkflows/local/prepare_peakcalling.nf b/subworkflows/local/prepare_peakcalling.nf index d9fcc019..f65bd022 100644 --- a/subworkflows/local/prepare_peakcalling.nf +++ b/subworkflows/local/prepare_peakcalling.nf @@ -8,6 +8,7 @@ include { BEDTOOLS_SORT } from "../../modules/local/for_patch/bedtools/s include { UCSC_BEDCLIP } from "../../modules/nf-core/ucsc/bedclip/main" include { UCSC_BEDGRAPHTOBIGWIG } from "../../modules/nf-core/ucsc/bedgraphtobigwig/main" include { NORMALISATION_FACTORS_REPORT } from "../../modules/local/normalisation_factors_report" +include { NORMALISATION_SCOPE_REFERENCE_REPORT } from "../../modules/local/normalisation_scope_reference_report" workflow PREPARE_PEAKCALLING { take: @@ -92,26 +93,42 @@ workflow PREPARE_PEAKCALLING { // EXAMPLE CHANNEL STRUCT: [id, scale_factor] //ch_bam_scale_factor | view + ch_bam_scale_factor_report + .map { meta, bam, scale, reads, scope_id -> + [ + sample_id: meta.id, + group: meta.group, + condition: meta.condition, + replicate: meta.replicate, + spikein_reads: reads, + scale_factor: scale, + scope_id: scope_id + ] + } + .map { record -> [ record.scope_id, record ] } + .groupTuple(by: [0]) + .map { scope_id, records -> [ scope_id, records ] } + .set { ch_norm_factors } + + NORMALISATION_FACTORS_REPORT ( ch_norm_factors ) + ch_versions = ch_versions.mix(NORMALISATION_FACTORS_REPORT.out.versions) + if (params.dump_scale_factors) { - ch_bam_scale_factor_report - .map { meta, bam, scale, reads, scope_id -> - [ - sample_id: meta.id, - group: meta.group, - condition: meta.condition, - replicate: meta.replicate, - spikein_reads: reads, - scale_factor: scale, - scope_id: scope_id - ] - } - .map { record -> [ record.scope_id, record ] } - .groupTuple(by: [0]) - .map { scope_id, records -> [ scope_id, records ] } - .set { ch_norm_factors } + def ch_scope_reference = Channel.empty() + if (norm_scope == 'all') { + ch_scope_reference = Channel.of([scope_id: 'all', reference_reads: params.normalisation_c]) + } else { + ch_scope_reference = ch_scope_ref + .map { scope_id, ref -> [scope_id: scope_id, reference_reads: ref] } + } + + ch_scope_reference + .toList() + .map { records -> records ?: [] } + .set { ch_scope_reference_records } - NORMALISATION_FACTORS_REPORT ( ch_norm_factors ) - ch_versions = ch_versions.mix(NORMALISATION_FACTORS_REPORT.out.versions) + NORMALISATION_SCOPE_REFERENCE_REPORT ( ch_scope_reference_records ) + ch_versions = ch_versions.mix(NORMALISATION_SCOPE_REFERENCE_REPORT.out.versions) } } else if (norm_mode == "None") { From c2db7f86a4fc7e2e53be250e57666eeba6fdf7cc Mon Sep 17 00:00:00 2001 From: dhusmann Date: Sun, 4 Jan 2026 23:15:41 -0800 Subject: [PATCH 03/19] Expand peak QC across callers --- assets/multiqc/frip_score_header.txt | 4 +- assets/multiqc/peak_counts_header.txt | 4 +- assets/multiqc/peak_reprod_header.txt | 4 +- conf/modules.config | 12 +++- docs/output.md | 19 ++++-- modules/local/peak_counts.nf | 5 +- modules/local/peak_frip.nf | 6 +- modules/local/peak_qc_table_report.nf | 34 +++++++++++ subworkflows/local/peak_qc.nf | 80 +++++++++++++++++++++++- workflows/cutandrun.nf | 87 +++++++++++++++++---------- 10 files changed, 207 insertions(+), 48 deletions(-) create mode 100644 modules/local/peak_qc_table_report.nf diff --git a/assets/multiqc/frip_score_header.txt b/assets/multiqc/frip_score_header.txt index a0cab909..ca7f9aae 100644 --- a/assets/multiqc/frip_score_header.txt +++ b/assets/multiqc/frip_score_header.txt @@ -2,9 +2,9 @@ #parent_id: 'peak_qc' #parent_name: 'Peak QC' #parent_description: 'This section contains peak-based QC reports' -#section_name: 'Sample FRiP score' +#section_name: 'Sample FRiP score (per caller)' #description: "is generated by calculating the fraction of all mapped fragments that fall -# into the peak regions called by either MACS2 or SEACR. +# into the peak regions called by each caller (e.g. MACS2, SEACR, GoPeaks). # See FRiP score." #plot_type: 'bargraph' #anchor: 'primary_fripscore' diff --git a/assets/multiqc/peak_counts_header.txt b/assets/multiqc/peak_counts_header.txt index c6145c8a..69532d05 100644 --- a/assets/multiqc/peak_counts_header.txt +++ b/assets/multiqc/peak_counts_header.txt @@ -2,8 +2,8 @@ #parent_id: 'peak_qc' #parent_name: 'Peak QC' #parent_description: 'This section contains peak-based QC reports' -#section_name: 'Sample Peak Count' -#description: 'Calculated from the total number of peaks called by MACS2 or SEACR' +#section_name: 'Sample Peak Count (per caller)' +#description: 'Calculated from the total number of peaks called by each peak caller' #plot_type: 'bargraph' #anchor: 'primary_peakcounts' #pconfig: diff --git a/assets/multiqc/peak_reprod_header.txt b/assets/multiqc/peak_reprod_header.txt index daccd629..54fad200 100644 --- a/assets/multiqc/peak_reprod_header.txt +++ b/assets/multiqc/peak_reprod_header.txt @@ -2,8 +2,8 @@ #parent_id: 'peak_qc' #parent_name: 'Peak QC' #parent_description: 'This section contains peak-based QC reports' -#section_name: 'Sample Peak reproducibility %' -#description: 'Calculated from the total number of overlapping peaks within group replicate sets' +#section_name: 'Sample Peak reproducibility % (per caller)' +#description: 'Calculated from the total number of overlapping peaks within group replicate sets for each caller' #plot_type: 'bargraph' #anchor: 'primary_peakrepro' #pconfig: diff --git a/conf/modules.config b/conf/modules.config index 2ebf43c7..caf6021c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -820,7 +820,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) { } } -if(params.run_peak_calling && params.callers[0] == 'seacr') { +if(params.run_peak_calling && params.callers && params.callers.contains('seacr')) { process { withName: 'NFCORE_CUTANDRUN:CUTANDRUN:AWK_EXTRACT_SUMMITS' { ext.command = "'{split(\$6, summit, \":\"); split(summit[2], region, \"-\"); print summit[1]\"\\t\"region[1]\"\\t\"region[2]}'" @@ -1114,6 +1114,16 @@ if (params.run_reporting && params.run_peak_qc) { ] } + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PEAK_QC_.*_REPORT' { + publishDir = [ + path: { "${params.outdir}/03_peak_calling/07_qc_tables" }, + mode: "${params.publish_dir_mode}", + pattern: "*.tsv", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PLOT_CONSENSUS_PEAKS' { publishDir = [ path: { "${params.outdir}/04_reporting/consensus_upset_plots" }, diff --git a/docs/output.md b/docs/output.md index ee25c3df..0c8cb76f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -445,9 +445,20 @@ The merge function from [BEDtools](https://github.com/arq5x/bedtools2) is used t Once the peak calling process is complete, we run a separate set of reports that analyse the quality of the results at the peak level. +
+Output files + +- `03_peak_calling/07_qc_tables/` + - `peak_counts.tsv`: per-sample peak counts (includes `caller_id`). + - `peak_frip_scores.tsv`: per-sample FRiP scores (includes `caller_id`). + - `peak_reproducibility.tsv`: per-group reproducibility (includes `caller_id`). + - `consensus_peak_counts.tsv`: consensus peak counts (includes `caller_id`). + +
+ ### 7.1. Peak Counts -For both the sample peaks and the consensus peaks, a simple count is taken. At the sample level, it is important to see consistency between peak counts of biological replicates. It is the first indicator of whether you replicates samples agree with each other after all of the processing has completed. If you use the consensus peaks and use a replicate threshold of more than 1, it is also important to see how many of your peaks across replicates have translated into consensus peaks. +For both the sample peaks and the consensus peaks, a simple count is taken for each peak caller. At the sample level, it is important to see consistency between peak counts of biological replicates. It is the first indicator of whether you replicates samples agree with each other after all of the processing has completed. If you use the consensus peaks and use a replicate threshold of more than 1, it is also important to see how many of your peaks across replicates have translated into consensus peaks. In the image below we see comparable peak counts for the H3K27me3 dataset, but a large disparity for the H3K4me3. @@ -455,7 +466,7 @@ In the image below we see comparable peak counts for the H3K27me3 dataset, but a ### 7.2. Peak Reproducibility -The peak reproducibility report intersects all samples within a group using `bedtools intersect` with a minimum overlap controlled by `min_peak_overlap`. This report is useful along with the peak count report for estimating how reliable the peaks called are between your biological replicates. +The peak reproducibility report intersects all samples within a group (per caller) using `bedtools intersect` with a minimum overlap controlled by `min_peak_overlap`. This report is useful along with the peak count report for estimating how reliable the peaks called are between your biological replicates. For example, in the image below when combined with the peak count information we see that although the H3K27me3 replicates both have similar peak counts, < 30% of the peaks are replicated across the replicate set. For H3K4me3, we see that replicate 1 has a small number of peaks called, but that almost 100% of those peaks are replicated in the second replicate. Replicate 2 has < 20% of its replicates reproduced in replicate 1 but by looking at the peak counts we can see this is due to the low number of peaks called. @@ -463,7 +474,7 @@ For example, in the image below when combined with the peak count information we ### 7.3. FRiP Score -Fraction of fragments in peaks (FRiP), defined as the fraction of all mapped paired-end reads extended into fragments that fall into the called peak regions, i.e. usable fragments in significantly enriched peaks divided by all usable fragments. In general, FRiP scores correlate positively with the number of regions. (Landt et al, Genome Research Sept. 2012, 22(9): 1813–1831). A minimum overlap is controlled by `min_frip_overlap`. The FRiP score can be used to assess the overall quality of a sample. Poor samples with a high level of background noise, small numbers of called peaks or other issues will have a large number of fragments falling outside the peaks that were called. Generally FRiP scores > 0.3 are considered to be reasonable with the highest quality data having FRiP scores of > 0.7. +Fraction of fragments in peaks (FRiP), defined as the fraction of all mapped paired-end reads extended into fragments that fall into the called peak regions, i.e. usable fragments in significantly enriched peaks divided by all usable fragments. In general, FRiP scores correlate positively with the number of regions. (Landt et al, Genome Research Sept. 2012, 22(9): 1813–1831). A minimum overlap is controlled by `min_frip_overlap`. The FRiP score can be used to assess the overall quality of a sample. Poor samples with a high level of background noise, small numbers of called peaks or other issues will have a large number of fragments falling outside the peaks that were called. Generally FRiP scores > 0.3 are considered to be reasonable with the highest quality data having FRiP scores of > 0.7. FRiP is computed per caller when multiple peak callers are enabled. It is worth noting that the peak caller settings are also crucial to this score, as even the highest quality data will have a low FRiP score if the pipeline is parameterised in a way that calls few peaks, such as setting the peak calling threshold very high. @@ -477,7 +488,7 @@ CUT&Tag inserts adapters on either side of chromatin particles in the vicinity o ### 8.1. Heatmaps -Heatmaps for both genomic features and peaks are generated using deepTools. The parameters for the gene heatmap generation including kilobases to map before and after the gene body can be found with the prefix `dt_heatmap_gene_*`. Similarly, the peak-based heatmap parameters can be found using `dt_heatmap_peak_*`. +Heatmaps for both genomic features and peaks are generated using deepTools. The parameters for the gene heatmap generation including kilobases to map before and after the gene body can be found with the prefix `dt_heatmap_gene_*`. Similarly, the peak-based heatmap parameters can be found using `dt_heatmap_peak_*`. When multiple peak callers are enabled, peak-based heatmaps are generated per caller and filenames include the caller id. **NB:** These reports are generated outside of MultiQC diff --git a/modules/local/peak_counts.nf b/modules/local/peak_counts.nf index 59a14758..d169d82d 100644 --- a/modules/local/peak_counts.nf +++ b/modules/local/peak_counts.nf @@ -12,6 +12,7 @@ process PEAK_COUNTS { path peak_counts_header output: + tuple val(meta), path("*_peak_count.txt"), emit: count_value tuple val(meta), path("*mqc.tsv"), emit: count_mqc path "versions.yml" , emit: versions @@ -21,7 +22,9 @@ process PEAK_COUNTS { script: def prefix = task.ext.prefix ?: "${meta.id}" """ - cat ${bed} | wc -l | awk -v OFS='\t' '{ print "Peak Count", \$1 }' | cat $peak_counts_header - > ${prefix}_mqc.tsv + PEAK_COUNT=\$(cat ${bed} | wc -l | awk '{print \$1}') + printf "%s\\n" "\$PEAK_COUNT" > ${prefix}_peak_count.txt + printf "Peak Count\\t%s\\n" "\$PEAK_COUNT" | cat $peak_counts_header - > ${prefix}_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/peak_frip.nf b/modules/local/peak_frip.nf index 64ebc387..24f9a49d 100644 --- a/modules/local/peak_frip.nf +++ b/modules/local/peak_frip.nf @@ -13,6 +13,7 @@ process PEAK_FRIP { val min_frip_overlap output: + tuple val(meta), path("*_frip_score.txt"), emit: frip_score tuple val(meta), path("*mqc.tsv"), emit: frip_mqc path "versions.yml" , emit: versions @@ -23,7 +24,10 @@ process PEAK_FRIP { def prefix = task.ext.prefix ?: "${meta.id}" """ READS_IN_PEAKS=\$(bedtools intersect -a ${fragments_bed} -b ${peaks_bed} -bed -c -f $min_frip_overlap | awk -F '\t' '{sum += \$NF} END {print sum * 2}') - grep -m 1 'mapped (' ${flagstat} | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "Peak FRiP Score", a/\$1}' | cat $frip_score_header - > ${prefix}_mqc.tsv + MAPPED_READS=\$(grep -m 1 'mapped (' ${flagstat} | awk '{print \$1}') + FRIP_SCORE=\$(awk -v a="\$READS_IN_PEAKS" -v b="\$MAPPED_READS" 'BEGIN {OFS="\\t"} {if (b > 0) {print a / b} else {print 0}}') + printf "%s\\n" "\$FRIP_SCORE" > ${prefix}_frip_score.txt + printf "Peak FRiP Score\\t%s\\n" "\$FRIP_SCORE" | cat $frip_score_header - > ${prefix}_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/peak_qc_table_report.nf b/modules/local/peak_qc_table_report.nf new file mode 100644 index 00000000..cd025c90 --- /dev/null +++ b/modules/local/peak_qc_table_report.nf @@ -0,0 +1,34 @@ +process PEAK_QC_TABLE_REPORT { + tag "$table_id" + label 'process_single' + + conda "conda-forge::coreutils=9.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(table_id), val(header), val(rows) + + output: + path "${table_id}.tsv", emit: tsv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def lines = rows ? rows.join('\n') : '' + """ + printf "%s\\n" "${header}" > ${table_id}.tsv + if [ -n "${lines}" ]; then + printf "%s\\n" "${lines}" >> ${table_id}.tsv + fi + + coreutils_version=\$(cat --version | head -n 1 | awk '{print \$NF}') + { + echo "\\"${task.process}\\":" + echo " coreutils: \$coreutils_version" + } > versions.yml + """ +} diff --git a/subworkflows/local/peak_qc.nf b/subworkflows/local/peak_qc.nf index 35d52a73..63adf6e0 100644 --- a/subworkflows/local/peak_qc.nf +++ b/subworkflows/local/peak_qc.nf @@ -9,6 +9,10 @@ include { CUT as CUT_CALC_REPROD } from "../../modules/local/linux include { BEDTOOLS_INTERSECT } from "../../modules/nf-core/bedtools/intersect/main.nf" include { CALCULATE_PEAK_REPROD } from "../../modules/local/python/peak_reprod" include { PLOT_CONSENSUS_PEAKS } from '../../modules/local/python/plot_consensus_peaks' +include { PEAK_QC_TABLE_REPORT as PEAK_QC_FRIP_REPORT } from "../../modules/local/peak_qc_table_report" +include { PEAK_QC_TABLE_REPORT as PEAK_QC_COUNTS_REPORT } from "../../modules/local/peak_qc_table_report" +include { PEAK_QC_TABLE_REPORT as PEAK_QC_CONSENSUS_REPORT } from "../../modules/local/peak_qc_table_report" +include { PEAK_QC_TABLE_REPORT as PEAK_QC_REPROD_REPORT } from "../../modules/local/peak_qc_table_report" workflow PEAK_QC { take: @@ -39,7 +43,7 @@ workflow PEAK_QC { .set { ch_frip } /* - * MODULE: Calculate frip scores for primary peaks + * MODULE: Calculate frip scores for sample peaks */ PEAK_FRIP( ch_frip, @@ -50,7 +54,7 @@ workflow PEAK_QC { // PEAK_FRIP.out.frip_mqc | view /* - * MODULE: Calculate peak counts for primary peaks + * MODULE: Calculate peak counts for sample peaks */ PRIMARY_PEAK_COUNTS( peaks, @@ -139,6 +143,78 @@ workflow PEAK_QC { //EXAMPLE CHANNEL STRUCT: [[META], TSV] //CALCULATE_PEAK_REPROD.out.tsv + /* + * MODULE: Write caller-aware peak QC tables + */ + def sample_header_prefix = "sample_id\tgroup\tcondition\treplicate\tcaller_id" + + PEAK_FRIP.out.frip_score + .map { meta, score_file -> + def score = score_file.text.trim() + def replicate = meta.replicate ?: 'NA' + def group = meta.group ?: 'NA' + def condition = meta.condition ?: 'NA' + def caller = meta.caller ?: 'NA' + [meta.id, group, condition, replicate, caller, score].join('\t') + } + .toList() + .ifEmpty([]) + .map { rows -> ['peak_frip_scores', "${sample_header_prefix}\tfrip_score", rows] } + .set { ch_frip_table } + + PEAK_QC_FRIP_REPORT(ch_frip_table) + ch_versions = ch_versions.mix(PEAK_QC_FRIP_REPORT.out.versions) + + PRIMARY_PEAK_COUNTS.out.count_value + .map { meta, count_file -> + def count = count_file.text.trim() + def replicate = meta.replicate ?: 'NA' + def group = meta.group ?: 'NA' + def condition = meta.condition ?: 'NA' + def caller = meta.caller ?: 'NA' + [meta.id, group, condition, replicate, caller, count].join('\t') + } + .toList() + .ifEmpty([]) + .map { rows -> ['peak_counts', "${sample_header_prefix}\tpeak_count", rows] } + .set { ch_peak_count_table } + + PEAK_QC_COUNTS_REPORT(ch_peak_count_table) + ch_versions = ch_versions.mix(PEAK_QC_COUNTS_REPORT.out.versions) + + CONSENSUS_PEAK_COUNTS.out.count_value + .map { meta, count_file -> + def count = count_file.text.trim() + def group = meta.group ?: 'NA' + def condition = meta.condition ?: 'NA' + def caller = meta.caller ?: 'NA' + [meta.id, group, condition, 'NA', caller, count].join('\t') + } + .toList() + .ifEmpty([]) + .map { rows -> ['consensus_peak_counts', "${sample_header_prefix}\tconsensus_peak_count", rows] } + .set { ch_consensus_count_table } + + PEAK_QC_CONSENSUS_REPORT(ch_consensus_count_table) + ch_versions = ch_versions.mix(PEAK_QC_CONSENSUS_REPORT.out.versions) + + CALCULATE_PEAK_REPROD.out.tsv + .map { meta, repro_file -> + def parts = repro_file.text.trim().split('\t') + def value = parts.size() > 1 ? parts[1] : '' + def group = meta.group ?: 'NA' + def condition = meta.condition ?: 'NA' + def caller = meta.caller ?: 'NA' + [meta.id, group, condition, 'NA', caller, value].join('\t') + } + .toList() + .ifEmpty([]) + .map { rows -> ['peak_reproducibility', "${sample_header_prefix}\tpeak_reproducibility_percent", rows] } + .set { ch_reprod_table } + + PEAK_QC_REPROD_REPORT(ch_reprod_table) + ch_versions = ch_versions.mix(PEAK_QC_REPROD_REPORT.out.versions) + /* * CHANNEL: Prep for upset input */ diff --git a/workflows/cutandrun.nf b/workflows/cutandrun.nf index d97aae21..923ecb76 100644 --- a/workflows/cutandrun.nf +++ b/workflows/cutandrun.nf @@ -539,31 +539,49 @@ workflow CUTANDRUN { ch_peaks_primary = ch_peaks_all.filter { it[0].caller == callers[0] } ch_peaks_secondary = ch_peaks_all.filter { it[0].caller != callers[0] } - if(callers[0] == 'seacr') { + /* + * CHANNEL: Build summit/peak inputs for heatmaps per caller + */ + ch_peaks_summits = Channel.empty() + if (callers.contains('seacr')) { /* * MODULE: Extract summits from seacr peak beds */ AWK_EXTRACT_SUMMITS ( - ch_peaks_primary + ch_peaks_all.filter { it[0].caller == 'seacr' } ) - ch_peaks_summits = AWK_EXTRACT_SUMMITS.out.file + ch_peaks_summits = ch_peaks_summits.mix(AWK_EXTRACT_SUMMITS.out.file) ch_software_versions = ch_software_versions.mix(AWK_EXTRACT_SUMMITS.out.versions) //AWK_EXTRACT_SUMMITS.out.file | view - } else if (callers[0].startsWith('macs2')) { - def use_macs2_summits = true - if (callers[0] == 'macs2_broad') { - use_macs2_summits = false - } else if (callers[0] == 'macs2' && !params.macs2_narrow_peak) { - use_macs2_summits = false - } - if (use_macs2_summits) { - ch_peaks_summits = ch_macs2_summits - .filter { it[0].caller == callers[0] } - } else { - ch_peaks_summits = ch_peaks_primary - } - } else { - ch_peaks_summits = ch_peaks_primary + } + + def macs2_summit_callers = [] + if (callers.contains('macs2_narrow')) { + macs2_summit_callers.add('macs2_narrow') + } + if (callers.contains('macs2') && params.macs2_narrow_peak) { + macs2_summit_callers.add('macs2') + } + + if (macs2_summit_callers) { + ch_peaks_summits = ch_peaks_summits.mix( + ch_macs2_summits.filter { macs2_summit_callers.contains(it[0].caller) } + ) + } + + def macs2_no_summit_callers = callers.findAll { it.startsWith('macs2') && !macs2_summit_callers.contains(it) } + if (macs2_no_summit_callers) { + ch_peaks_summits = ch_peaks_summits.mix( + ch_peaks_all.filter { macs2_no_summit_callers.contains(it[0].caller) } + ) + } + + def summit_override_callers = ['seacr'] + callers.findAll { it.startsWith('macs2') } + def other_callers = callers.findAll { !summit_override_callers.contains(it) } + if (other_callers) { + ch_peaks_summits = ch_peaks_summits.mix( + ch_peaks_all.filter { other_callers.contains(it[0].caller) } + ) } /* @@ -709,7 +727,7 @@ workflow CUTANDRUN { * CHANNEL: Structure output for join on id */ ch_peaks_summits - .map { row -> [row[0].id, row ].flatten()} + .map { meta, bed -> [meta.id, meta, bed] } .set { ch_peaks_summits_id } //ch_peaks_bed_id | view @@ -717,19 +735,25 @@ workflow CUTANDRUN { * CHANNEL: Join beds and bigwigs on id */ ch_bigwig_no_igg - .map { row -> [row[0].id, row ].flatten()} + .map { meta, bigwig -> [meta.id, bigwig] } .join ( ch_peaks_summits_id ) - .filter ( it -> it[-1].size() > 1) + .map { row -> + def peak_meta = row[2] + def bed = row[3] + def heatmap_meta = peak_meta + [id: "${peak_meta.id}_${peak_meta.caller}"] + [ heatmap_meta, row[1], bed ] + } + .filter ( it -> it[2].size() > 1) .set { ch_dt_bigwig_summits } //ch_dt_peaks | view ch_dt_bigwig_summits - .map { row -> row[1,2] } + .map { row -> [row[0], row[1]] } .set { ch_ordered_bigwig } //ch_ordered_bigwig | view ch_dt_bigwig_summits - .map { row -> row[-1] } + .map { row -> row[2] } .set { ch_ordered_peaks_max } //ch_ordered_peaks_max | view @@ -817,22 +841,19 @@ workflow CUTANDRUN { * SUBWORKFLOW: Run suite of peak QC on peaks */ AWK_NAME_PEAK_BED.out.file - .filter { it[0].caller == callers[0] } - .set { ch_peaks_with_ids_primary } + .set { ch_peaks_with_ids_all } ch_consensus_peaks - .filter { it[0].caller == callers[0] } - .set { ch_consensus_peaks_primary } + .set { ch_consensus_peaks_all } ch_consensus_peaks_unfilt - .filter { it[0].caller == callers[0] } - .set { ch_consensus_peaks_unfilt_primary } + .set { ch_consensus_peaks_unfilt_all } PEAK_QC( - ch_peaks_primary, - ch_peaks_with_ids_primary, - ch_consensus_peaks_primary, - ch_consensus_peaks_unfilt_primary, + ch_peaks_all, + ch_peaks_with_ids_all, + ch_consensus_peaks_all, + ch_consensus_peaks_unfilt_all, EXTRACT_FRAGMENTS.out.bed, ch_flagstat_target, params.min_frip_overlap, From 019dd4aefa66dd28b02dcfa592c6c1c4599e32ee Mon Sep 17 00:00:00 2001 From: dhusmann Date: Sun, 4 Jan 2026 23:28:12 -0800 Subject: [PATCH 04/19] Align peak outputs to biological sample IDs --- conf/modules.config | 26 +++++----- subworkflows/local/peak_calling_extended.nf | 32 ++++++------- subworkflows/local/peak_qc.nf | 12 +++-- subworkflows/local/prepare_peakcalling.nf | 2 +- tests/test_04_bam_scaling.yml | 14 +++--- tests/test_05_peak_callers.yml | 48 +++++++++---------- ...est_verify_output_05_only_peak_calling.yml | 16 +++---- tests/test_verify_output_06_skip_heatmaps.yml | 8 ++-- .../test_verify_output_06_skip_reporting.yml | 4 +- workflows/cutandrun.nf | 16 +++++-- 10 files changed, 96 insertions(+), 82 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index caf6021c..93784de3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -617,7 +617,7 @@ if(params.run_peak_calling && (params.normalisation_mode != "Spikein" && params. "--normalizeUsing ${params.normalisation_mode}", params.extend_fragments ? '--extendReads' : '', ].join(' ').trim() - ext.prefix = { "${meta.id}.bedgraph" } + ext.prefix = { "${meta.sample_id ?: meta.id}.bedgraph" } publishDir = [ enabled: false ] @@ -648,7 +648,7 @@ if(params.run_peak_calling) { } withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_SORT' { - ext.prefix = { "${meta.id}.sorted" } + ext.prefix = { "${meta.sample_id ?: meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/03_peak_calling/01_bam_to_bedgraph" }, mode: "${params.publish_dir_mode}", @@ -658,7 +658,7 @@ if(params.run_peak_calling) { } withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDCLIP' { - ext.prefix = { "${meta.id}.clipped" } + ext.prefix = { "${meta.sample_id ?: meta.id}.clipped" } publishDir = [ path: { "${params.outdir}/03_peak_calling/02_clip_bed" }, mode: "${params.publish_dir_mode}", @@ -668,6 +668,7 @@ if(params.run_peak_calling) { } withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.sample_id ?: meta.id}" } publishDir = [ path: { "${params.outdir}/03_peak_calling/03_bed_to_bigwig" }, mode: "${params.publish_dir_mode}", @@ -682,7 +683,7 @@ if(params.run_peak_calling && 'seacr' in params.callers) { process { withName: '.*:CUTANDRUN:.*SEACR_.*' { ext.args = "${params.seacr_norm} ${params.seacr_stringent}" - ext.prefix = { "${meta.id}_${meta.caller}.seacr.peaks" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}.seacr.peaks" } publishDir = [ path: { "${params.outdir}/03_peak_calling/04_called_peaks/seacr" }, mode: "${params.publish_dir_mode}", @@ -701,7 +702,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) { params.macs2_pvalue ? "-p ${params.macs2_pvalue}" : "-q ${params.macs2_qvalue}", params.macs2_narrow_peak ? '' : "--broad --broad-cutoff ${params.macs2_broad_cutoff}" ].join(' ').trim() - ext.prefix = { "${meta.id}_${meta.caller}" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" }, mode: "${params.publish_dir_mode}", @@ -712,7 +713,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) { withName: '.*:CUTANDRUN:.*MACS2_CALLPEAK_NARROW' { ext.args = "--nomodel --shift -75 --extsize 150 --keep-dup all -q 0.05 --format BAMPE" - ext.prefix = { "${meta.id}_${meta.caller}" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" }, mode: "${params.publish_dir_mode}", @@ -723,7 +724,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) { withName: '.*:CUTANDRUN:.*MACS2_CALLPEAK_BROAD' { ext.args = "--nomodel --shift -75 --extsize 150 --keep-dup all -q 0.05 --format BAMPE --broad --broad-cutoff 0.1" - ext.prefix = { "${meta.id}_${meta.caller}" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" }, mode: "${params.publish_dir_mode}", @@ -737,7 +738,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) { if(params.run_peak_calling && params.callers.any { it.startsWith('gopeaks') }) { process { withName: '.*:CUTANDRUN:.*GOPEAKS_CALLPEAK.*' { - ext.prefix = { "${meta.id}_${meta.caller}" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" }, mode: "${params.publish_dir_mode}", @@ -751,7 +752,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('gopeaks') }) { if(params.run_peak_calling && params.callers.any { it.startsWith('epic2') }) { process { withName: '.*:CUTANDRUN:.*EPIC2_CALLPEAK.*' { - ext.prefix = { "${meta.id}_${meta.caller}" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" }, mode: "${params.publish_dir_mode}", @@ -765,7 +766,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('epic2') }) { if(params.run_peak_calling && params.callers.any { it.startsWith('span') }) { process { withName: '.*:CUTANDRUN:.*SPAN_OMNIPEAKS.*' { - ext.prefix = { "${meta.id}_${meta.caller}" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" }, mode: "${params.publish_dir_mode}", @@ -810,6 +811,7 @@ if(params.run_peak_calling && params.callers.any { it.startsWith('macs2') }) { ext.args = "-f 1-6" ext.ext = "bed" ext.suffix = { ".${meta.caller}.peaks" } + ext.prefix = { "${meta.sample_id ?: meta.id}" } publishDir = [ path: { "${params.outdir}/03_peak_calling/04_called_peaks/${meta.caller}" }, mode: "${params.publish_dir_mode}", @@ -1072,14 +1074,14 @@ if (params.run_reporting && params.run_peak_qc) { } withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PEAK_FRIP' { - ext.prefix = { "${meta.id}_${meta.caller}" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ enabled: false ] } withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:PRIMARY_PEAK_COUNTS' { - ext.prefix = { "${meta.id}_${meta.caller}" } + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ enabled: false ] diff --git a/subworkflows/local/peak_calling_extended.nf b/subworkflows/local/peak_calling_extended.nf index fe52eb65..f15304c1 100644 --- a/subworkflows/local/peak_calling_extended.nf +++ b/subworkflows/local/peak_calling_extended.nf @@ -116,7 +116,7 @@ workflow PEAK_CALLING_EXTENDED { .filter { meta, bam -> meta.control_condition && meta.control_condition != meta.condition } .map { meta, bam -> [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, control_group: meta.control_group, @@ -328,7 +328,7 @@ workflow PEAK_CALLING_EXTENDED { .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'epic2_200bp', @@ -344,9 +344,9 @@ workflow PEAK_CALLING_EXTENDED { ch_control_fallbacks = ch_control_fallbacks.mix( ch_epic2_200_branch.skip .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> - log.warn("Skipping epic2_200bp for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") + log.warn("Skipping epic2_200bp for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'epic2_200bp', @@ -386,7 +386,7 @@ workflow PEAK_CALLING_EXTENDED { .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'epic2_150bp', @@ -402,9 +402,9 @@ workflow PEAK_CALLING_EXTENDED { ch_control_fallbacks = ch_control_fallbacks.mix( ch_epic2_150_branch.skip .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> - log.warn("Skipping epic2_150bp for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") + log.warn("Skipping epic2_150bp for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'epic2_150bp', @@ -444,7 +444,7 @@ workflow PEAK_CALLING_EXTENDED { .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'epic2_25bp', @@ -460,9 +460,9 @@ workflow PEAK_CALLING_EXTENDED { ch_control_fallbacks = ch_control_fallbacks.mix( ch_epic2_25_branch.skip .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> - log.warn("Skipping epic2_25bp for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") + log.warn("Skipping epic2_25bp for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'epic2_25bp', @@ -506,7 +506,7 @@ workflow PEAK_CALLING_EXTENDED { .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'span_default', @@ -522,9 +522,9 @@ workflow PEAK_CALLING_EXTENDED { ch_control_fallbacks = ch_control_fallbacks.mix( ch_span_default_branch.skip .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> - log.warn("Skipping span_default for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") + log.warn("Skipping span_default for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'span_default', @@ -565,7 +565,7 @@ workflow PEAK_CALLING_EXTENDED { .filter { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> status != 'exact_match' } .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'span_stringent', @@ -581,9 +581,9 @@ workflow PEAK_CALLING_EXTENDED { ch_control_fallbacks = ch_control_fallbacks.mix( ch_span_stringent_branch.skip .map { meta, bam, control_bam, used_condition, status, action, reason, pooled_path -> - log.warn("Skipping span_stringent for sample ${meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") + log.warn("Skipping span_stringent for sample ${meta.sample_id ?: meta.id} (control_group=${meta.control_group}, condition=${meta.condition}) - no pooled control available.") [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, caller_id: 'span_stringent', diff --git a/subworkflows/local/peak_qc.nf b/subworkflows/local/peak_qc.nf index 63adf6e0..6ba27391 100644 --- a/subworkflows/local/peak_qc.nf +++ b/subworkflows/local/peak_qc.nf @@ -155,7 +155,8 @@ workflow PEAK_QC { def group = meta.group ?: 'NA' def condition = meta.condition ?: 'NA' def caller = meta.caller ?: 'NA' - [meta.id, group, condition, replicate, caller, score].join('\t') + def sample_id = meta.sample_id ?: meta.id + [sample_id, group, condition, replicate, caller, score].join('\t') } .toList() .ifEmpty([]) @@ -172,7 +173,8 @@ workflow PEAK_QC { def group = meta.group ?: 'NA' def condition = meta.condition ?: 'NA' def caller = meta.caller ?: 'NA' - [meta.id, group, condition, replicate, caller, count].join('\t') + def sample_id = meta.sample_id ?: meta.id + [sample_id, group, condition, replicate, caller, count].join('\t') } .toList() .ifEmpty([]) @@ -188,7 +190,8 @@ workflow PEAK_QC { def group = meta.group ?: 'NA' def condition = meta.condition ?: 'NA' def caller = meta.caller ?: 'NA' - [meta.id, group, condition, 'NA', caller, count].join('\t') + def sample_id = meta.sample_id ?: meta.id + [sample_id, group, condition, 'NA', caller, count].join('\t') } .toList() .ifEmpty([]) @@ -205,7 +208,8 @@ workflow PEAK_QC { def group = meta.group ?: 'NA' def condition = meta.condition ?: 'NA' def caller = meta.caller ?: 'NA' - [meta.id, group, condition, 'NA', caller, value].join('\t') + def sample_id = meta.sample_id ?: meta.id + [sample_id, group, condition, 'NA', caller, value].join('\t') } .toList() .ifEmpty([]) diff --git a/subworkflows/local/prepare_peakcalling.nf b/subworkflows/local/prepare_peakcalling.nf index f65bd022..37aa892c 100644 --- a/subworkflows/local/prepare_peakcalling.nf +++ b/subworkflows/local/prepare_peakcalling.nf @@ -96,7 +96,7 @@ workflow PREPARE_PEAKCALLING { ch_bam_scale_factor_report .map { meta, bam, scale, reads, scope_id -> [ - sample_id: meta.id, + sample_id: meta.sample_id ?: meta.id, group: meta.group, condition: meta.condition, replicate: meta.replicate, diff --git a/tests/test_04_bam_scaling.yml b/tests/test_04_bam_scaling.yml index 2b2f68f7..e1b50b61 100644 --- a/tests/test_04_bam_scaling.yml +++ b/tests/test_04_bam_scaling.yml @@ -4,7 +4,7 @@ - test_bam_scale - test_bam_scale_none files: - - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph + - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph - name: test_bam_scale_spikein command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode Spikein -c tests/config/nextflow.config @@ -12,7 +12,7 @@ - test_bam_scale - test_bam_scale_spikein files: - - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph + - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph - name: test_bam_scale_cpm command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode CPM -c tests/config/nextflow.config @@ -20,7 +20,7 @@ - test_bam_scale - test_bam_scale_cpm files: - - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph + - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph - name: test_bam_scale_rpkm command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode RPKM -c tests/config/nextflow.config @@ -28,7 +28,7 @@ - test_bam_scale - test_bam_scale_rpkm files: - - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph + - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph - name: test_bam_scale_bpm command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode BPM -c tests/config/nextflow.config @@ -36,7 +36,7 @@ - test_bam_scale - test_bam_scale_bpm files: - - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph + - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph - name: test_bam_scale_cpm_iggscale command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode CPM --igg_scale_factor 0.1 -c tests/config/nextflow.config @@ -44,8 +44,8 @@ - test_bam_scale - test_bam_scale_cpm_iggscale files: - - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph - - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_NA_rep1_T1.sorted.bedGraph + - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph + - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_NA_rep1.sorted.bedGraph - name: test_normalisation_mode_invalid command: nextflow run main.nf -profile docker,test --only_peak_calling --normalisation_mode test -c tests/config/nextflow.config diff --git a/tests/test_05_peak_callers.yml b/tests/test_05_peak_callers.yml index 70219dac..13fe2348 100644 --- a/tests/test_05_peak_callers.yml +++ b/tests/test_05_peak_callers.yml @@ -4,7 +4,7 @@ - test_peak_callers - test_peak_callers_seacr files: - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed - name: test_peak_callers_macs2 command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2 -c tests/config/nextflow.config @@ -12,7 +12,7 @@ - test_peak_callers - test_peak_callers_macs2 files: - - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.narrowPeak + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.narrowPeak - name: test_peak_callers_macs2_broad_peak command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2 --macs2_narrow_peak false -c tests/config/nextflow.config @@ -20,7 +20,7 @@ - test_peak_callers - test_peak_callers_macs2 files: - - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.broadPeak + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.broadPeak - name: test_peak_callers_invalid_name command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller test -c tests/config/nextflow.config @@ -35,8 +35,8 @@ - test_peak_callers - test_peak_callers_seacr_macs2 files: - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.narrowPeak + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.narrowPeak - name: test_peak_callers_macs2_seacr command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller macs2,seacr -c tests/config/nextflow.config @@ -44,8 +44,8 @@ - test_peak_callers - test_peak_callers_macs2_seacr files: - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.narrowPeak + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.narrowPeak - name: test_peak_callers_seacr_macs2_noigg command: nextflow run main.nf -profile docker,test_no_control --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr,macs2 -c tests/config/nextflow.config @@ -53,8 +53,8 @@ - test_peak_callers - test_peak_callers_seacr_macs2_noigg files: - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_T1_macs2_peaks.narrowPeak + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/macs2/h3k27me3_NA_rep1_macs2_peaks.narrowPeak - name: test_peak_callers_seacr_single_ctrl_multi_rep command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-single-ctrl.csv -c tests/config/nextflow.config @@ -62,10 +62,10 @@ - test_peak_callers - test_peak_callers_ctrl_tests files: - - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep2_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep2_T1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep2_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep2_seacr.seacr.peaks.stringent.bed - name: test_peak_callers_seacr_multi_single_ctrl_multi_rep command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller seacr --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-all-multi-rep-multi-single-ctrl.csv -c tests/config/nextflow.config @@ -73,10 +73,10 @@ - test_peak_callers - test_peak_callers_ctrl_tests files: - - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep2_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep2_T1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k4me3_NA_rep2_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep2_seacr.seacr.peaks.stringent.bed - name: test_peak_callers_gopeaks_condition command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller gopeaks_narrow --input tests/assets/samplesheet_condition.csv -c tests/config/nextflow.config @@ -84,8 +84,8 @@ - test_peak_callers - test_peak_callers_gopeaks files: - - path: results/03_peak_calling/04_called_peaks/gopeaks_narrow/h3k27me3_Control_rep1_T1_gopeaks_narrow.bed - - path: results/03_peak_calling/04_called_peaks/gopeaks_narrow/h3k27me3_Control_rep1_T1_gopeaks_narrow_gopeaks.json + - path: results/03_peak_calling/04_called_peaks/gopeaks_narrow/h3k27me3_Control_rep1_gopeaks_narrow.bed + - path: results/03_peak_calling/04_called_peaks/gopeaks_narrow/h3k27me3_Control_rep1_gopeaks_narrow_gopeaks.json - path: results/03_peak_calling/05_consensus_peaks/h3k27me3/Control/h3k27me3_Control_gopeaks_narrow.consensus.peak_counts.bed - name: test_peak_callers_epic2_condition @@ -98,7 +98,7 @@ - path: results/03_peak_calling/01_pooled_controls/igg_ctrl_Control.bam.bai - path: results/03_peak_calling/01_pooled_controls/igg_ctrl_Treatment.bam - path: results/03_peak_calling/01_pooled_controls/igg_ctrl_Treatment.bam.bai - - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_T1_epic2_200bp.peaks + - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks - name: test_peak_callers_preset_extended command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller_preset extended --epic2_genome hg38 --omnipeaks_jar tests/assets/omnipeaks_stub.jar -c tests/config/nextflow.config @@ -107,7 +107,7 @@ - test_peak_callers_preset_extended files: - path: results/03_peak_calling/01_pooled_controls/igg_ctrl_NA.bam - - path: results/03_peak_calling/04_called_peaks/macs2_narrow/h3k27me3_NA_rep1_T1_macs2_narrow_peaks.narrowPeak - - path: results/03_peak_calling/04_called_peaks/gopeaks_broad/h3k27me3_NA_rep1_T1_gopeaks_broad.bed - - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_NA_rep1_T1_epic2_200bp.peaks - - path: results/03_peak_calling/04_called_peaks/span_default/h3k27me3_NA_rep1_T1_span_default.peak + - path: results/03_peak_calling/04_called_peaks/macs2_narrow/h3k27me3_NA_rep1_macs2_narrow_peaks.narrowPeak + - path: results/03_peak_calling/04_called_peaks/gopeaks_broad/h3k27me3_NA_rep1_gopeaks_broad.bed + - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_NA_rep1_epic2_200bp.peaks + - path: results/03_peak_calling/04_called_peaks/span_default/h3k27me3_NA_rep1_span_default.peak diff --git a/tests/test_verify_output_05_only_peak_calling.yml b/tests/test_verify_output_05_only_peak_calling.yml index 6d4247ab..d993a28d 100644 --- a/tests/test_verify_output_05_only_peak_calling.yml +++ b/tests/test_verify_output_05_only_peak_calling.yml @@ -3,14 +3,14 @@ tags: - verify_output_peak_calling_only_peak_calling files: - - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1_T1.sorted.bedGraph - - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_NA_rep1_T1.sorted.bedGraph - - path: results/03_peak_calling/02_clip_bed/h3k27me3_NA_rep1_T1.clipped.bedGraph - - path: results/03_peak_calling/02_clip_bed/igg_ctrl_NA_rep1_T1.clipped.bedGraph - - path: results/03_peak_calling/03_bed_to_bigwig/h3k27me3_NA_rep1_T1.bigWig - - path: results/03_peak_calling/03_bed_to_bigwig/igg_ctrl_NA_rep1_T1.bigWig - - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_T1_seacr.seacr.peaks.stringent.bed - - path: results/03_peak_calling/04_called_peaks/seacr/igg_ctrl_NA_rep1_T1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph + - path: results/03_peak_calling/01_bam_to_bedgraph/igg_ctrl_NA_rep1.sorted.bedGraph + - path: results/03_peak_calling/02_clip_bed/h3k27me3_NA_rep1.clipped.bedGraph + - path: results/03_peak_calling/02_clip_bed/igg_ctrl_NA_rep1.clipped.bedGraph + - path: results/03_peak_calling/03_bed_to_bigwig/h3k27me3_NA_rep1.bigWig + - path: results/03_peak_calling/03_bed_to_bigwig/igg_ctrl_NA_rep1.bigWig + - path: results/03_peak_calling/04_called_peaks/seacr/h3k27me3_NA_rep1_seacr.seacr.peaks.stringent.bed + - path: results/03_peak_calling/04_called_peaks/seacr/igg_ctrl_NA_rep1_seacr.seacr.peaks.stringent.bed should_exist: false - path: results/03_peak_calling/05_consensus_peaks/h3k27me3/NA/h3k27me3_NA_seacr_consensus.awk.bed - path: results/03_peak_calling/05_consensus_peaks/h3k27me3/NA/h3k27me3_NA_seacr.consensus.peak_counts.bed diff --git a/tests/test_verify_output_06_skip_heatmaps.yml b/tests/test_verify_output_06_skip_heatmaps.yml index 40d59ea7..0df553a2 100644 --- a/tests/test_verify_output_06_skip_heatmaps.yml +++ b/tests/test_verify_output_06_skip_heatmaps.yml @@ -4,8 +4,8 @@ - verify_output_reporting_skip_heatmaps - verify_output_reporting_skip_heatmaps_false files: - - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_NA_rep1_T1.computeMatrix.mat.gz - - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_T1.computeMatrix.mat.gz + - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_NA_rep1.computeMatrix.mat.gz + - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_seacr.computeMatrix.mat.gz - name: verify_output_reporting_skip_heatmaps_true command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_multiqc --skip_preseq --skip_heatmaps -c tests/config/nextflow.config @@ -13,9 +13,9 @@ - verify_output_reporting_skip_heatmaps - verify_output_reporting_skip_heatmaps_true files: - - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_NA_rep1_T1.computeMatrix.mat.gz + - path: results/04_reporting/deeptools_heatmaps/gene/h3k27me3_NA_rep1.computeMatrix.mat.gz should_exist: false - - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_T1.computeMatrix.mat.gz + - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_seacr.computeMatrix.mat.gz should_exist: false - name: verify_output_reporting_skip_heatmaps_grouped diff --git a/tests/test_verify_output_06_skip_reporting.yml b/tests/test_verify_output_06_skip_reporting.yml index 37c2e6bd..e8fd3605 100644 --- a/tests/test_verify_output_06_skip_reporting.yml +++ b/tests/test_verify_output_06_skip_reporting.yml @@ -7,7 +7,7 @@ should_exist: false - path: results/04_reporting/deeptools_qc/all_target_bams.plotCorrelation.pdf should_exist: false - - path: results/04_reporting/heatmaps/peaks/h3k27me3_NA_rep1_T1.plotHeatmap.pdf + - path: results/04_reporting/deeptools_heatmaps/peaks/h3k27me3_NA_rep1_seacr.plotHeatmap.pdf should_exist: false - - path: results/04_reporting/deeptools_heatmaps/gene/h3k4me3_NA_rep1_T1.computeMatrix.mat.gz + - path: results/04_reporting/deeptools_heatmaps/gene/h3k4me3_NA_rep1.computeMatrix.mat.gz should_exist: false diff --git a/workflows/cutandrun.nf b/workflows/cutandrun.nf index 923ecb76..c163effc 100644 --- a/workflows/cutandrun.nf +++ b/workflows/cutandrun.nf @@ -706,11 +706,18 @@ workflow CUTANDRUN { .set { ch_bigwig_no_igg } // ch_bigwig_no_igg | view + /* + * CHANNEL: Use spec-facing sample ids for heatmap outputs + */ + ch_bigwig_no_igg + .map { meta, bigwig -> [meta + [id: (meta.sample_id ?: meta.id)], bigwig] } + .set { ch_bigwig_no_igg_heatmap } + /* * MODULE: Compute DeepTools matrix used in heatmap plotting for Genes */ DEEPTOOLS_COMPUTEMATRIX_GENE ( - ch_bigwig_no_igg, + ch_bigwig_no_igg_heatmap, PREPARE_GENOME.out.bed.collect() ) ch_software_versions = ch_software_versions.mix(DEEPTOOLS_COMPUTEMATRIX_GENE.out.versions) @@ -727,20 +734,21 @@ workflow CUTANDRUN { * CHANNEL: Structure output for join on id */ ch_peaks_summits - .map { meta, bed -> [meta.id, meta, bed] } + .map { meta, bed -> [meta.sample_id ?: meta.id, meta, bed] } .set { ch_peaks_summits_id } //ch_peaks_bed_id | view /* * CHANNEL: Join beds and bigwigs on id */ - ch_bigwig_no_igg + ch_bigwig_no_igg_heatmap .map { meta, bigwig -> [meta.id, bigwig] } .join ( ch_peaks_summits_id ) .map { row -> def peak_meta = row[2] def bed = row[3] - def heatmap_meta = peak_meta + [id: "${peak_meta.id}_${peak_meta.caller}"] + def sample_id = peak_meta.sample_id ?: peak_meta.id + def heatmap_meta = peak_meta + [id: "${sample_id}_${peak_meta.caller}"] [ heatmap_meta, row[1], bed ] } .filter ( it -> it[2].size() > 1) From 069fc0e1fef33afd5af71e5101d4e957dcd24039 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Sun, 4 Jan 2026 23:30:51 -0800 Subject: [PATCH 05/19] Support consensus_grouping all --- conf/flowswitch.config | 2 +- docs/output.md | 3 +++ docs/usage.md | 2 +- nextflow_schema.json | 4 ++-- tests/test_06_consensus_peaks.yml | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/conf/flowswitch.config b/conf/flowswitch.config index a79202c8..8b0df7d8 100644 --- a/conf/flowswitch.config +++ b/conf/flowswitch.config @@ -53,7 +53,7 @@ if (params.peakcaller) { params.callers = caller_preset_map.standard } -if(params.consensus_peak_mode == 'all') { params.run_consensus_all = true } +if(params.consensus_peak_mode == 'all' || params.consensus_grouping == 'all') { params.run_consensus_all = true } if(params.remove_linear_duplicates) { params.run_remove_linear_dups = true } if(params.skip_removeduplicates || !params.run_mark_dups) { params.run_remove_dups = false } diff --git a/docs/output.md b/docs/output.md index 0c8cb76f..09176fe6 100644 --- a/docs/output.md +++ b/docs/output.md @@ -436,6 +436,9 @@ SPAN/OmniPeaks callers require pooled controls and the `--omnipeaks_jar` paramet - `03_peak_calling/05_consensus_peaks///` - `__.consensus.peak_counts.bed`: merged consensus peaks with replicate counts. - `___consensus.awk.bed`: filtered consensus peaks after applying `--replicate_threshold`. +- `03_peak_calling/05_consensus_peaks/all/all/` (when `--consensus_grouping all`) + - `all_samples__consensus.awk.bed`: filtered consensus peaks across all samples. + - `all_all_.consensus.peak_counts.bed`: merged consensus peaks with replicate counts across all samples. diff --git a/docs/usage.md b/docs/usage.md index e3ca9375..51fa8476 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -165,7 +165,7 @@ If control samples are provided in the sample sheet, they will be used to normal ### Consensus Peaks -After peak calling, consensus peaks are calculated by merging peaks within the same grouping key. Use `--consensus_grouping` to choose `group` or `group_condition`. By default, if the samplesheet includes a `condition` column, grouping uses `group_condition`; otherwise it falls back to `group`. The number of replicates required for a valid peak can be changed using `replicate_threshold`. To call consensus peaks across all samples, set `--consensus_peak_mode all`. +After peak calling, consensus peaks are calculated by merging peaks within the same grouping key. Use `--consensus_grouping` to choose `group`, `group_condition`, or `all`. By default, if the samplesheet includes a `condition` column, grouping uses `group_condition`; otherwise it falls back to `group`. Set `--consensus_grouping all` to call consensus peaks across all samples. The number of replicates required for a valid peak can be changed using `replicate_threshold`. The legacy `--consensus_peak_mode all` still maps to the same all-samples behavior. ### Reproducibility diff --git a/nextflow_schema.json b/nextflow_schema.json index eafef6b2..4efc799d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -474,8 +474,8 @@ "consensus_grouping": { "type": "string", "fa_icon": "fas fa-align-justify", - "description": "Grouping key for consensus peaks when consensus_peak_mode is group. Options are [group, group_condition]. Defaults to group_condition when the samplesheet has a condition column, otherwise group.", - "enum": ["group", "group_condition"] + "description": "Grouping key for consensus peaks when consensus_peak_mode is group. Options are [group, group_condition, all]. Defaults to group_condition when the samplesheet has a condition column, otherwise group. Set to all to compute consensus across all samples.", + "enum": ["group", "group_condition", "all"] }, "replicate_threshold": { "type": "number", diff --git a/tests/test_06_consensus_peaks.yml b/tests/test_06_consensus_peaks.yml index 093fe3e3..29c5ca10 100644 --- a/tests/test_06_consensus_peaks.yml +++ b/tests/test_06_consensus_peaks.yml @@ -10,7 +10,7 @@ - path: results/03_peak_calling/05_consensus_peaks/h3k27me3/NA/h3k27me3_NA_seacr_consensus.awk.bed - name: test_consensus_peaks_all - command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling --skip_fastqc --skip_multiqc --skip_preseq --consensus_peak_mode all -c tests/config/nextflow.config + command: nextflow run main.nf -profile docker,test_full_small --only_peak_calling --skip_fastqc --skip_multiqc --skip_preseq --consensus_grouping all -c tests/config/nextflow.config tags: - test_consensus_peaks - test_consensus_peaks_all From 061930c9ae7f187a2a011f2861ad0639f211553b Mon Sep 17 00:00:00 2001 From: dhusmann Date: Sun, 4 Jan 2026 23:36:57 -0800 Subject: [PATCH 06/19] Add CI tests for control validation and QC --- .github/workflows/ci.yml | 1 + subworkflows/local/peak_calling_extended.nf | 14 +++++++------- .../samplesheet_condition_missing_control.csv | 4 ++++ tests/test_02_samplesheet_check.yml | 18 ++++++++++++++++++ tests/test_04_bam_scaling.yml | 1 + tests/test_05_peak_callers.yml | 17 +++++++++++++++++ tests/test_07_peak_qc_multi_caller.yml | 15 +++++++++++++++ 7 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 tests/assets/samplesheet_condition_missing_control.csv create mode 100644 tests/test_07_peak_qc_multi_caller.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2c402453..c0e05d22 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -174,6 +174,7 @@ jobs: - test_peak_callers_gopeaks - test_peak_callers_epic2 - test_peak_callers_preset_extended + - test_peak_qc_multi_caller - test_consensus_peaks_group - test_consensus_peaks_all - test_consensus_peaks_invalid diff --git a/subworkflows/local/peak_calling_extended.nf b/subworkflows/local/peak_calling_extended.nf index f15304c1..66b5469c 100644 --- a/subworkflows/local/peak_calling_extended.nf +++ b/subworkflows/local/peak_calling_extended.nf @@ -279,13 +279,13 @@ workflow PEAK_CALLING_EXTENDED { return [meta, bam, null, null, 'missing_control', 'skipped', 'no_control_for_group', null] } def exact = entries.find { it[0] == meta.control_condition } - def chosen = exact ?: entries[0] - def used_condition = chosen[0] - def control_bam = chosen[1] - def status = exact ? 'exact_match' : 'fallback_other_condition' - def action = 'used' - def reason = exact ? 'exact_condition' : 'condition_fallback' - [meta, bam, control_bam, used_condition, status, action, reason, control_bam] + if (exact) { + def used_condition = exact[0] + def control_bam = exact[1] + return [meta, bam, control_bam, used_condition, 'exact_match', 'used', 'exact_condition', control_bam] + } + def fallback_condition = entries[0][0] + return [meta, bam, null, fallback_condition, 'fallback_other_condition', 'skipped', 'no_exact_condition_match', null] } } diff --git a/tests/assets/samplesheet_condition_missing_control.csv b/tests/assets/samplesheet_condition_missing_control.csv new file mode 100644 index 00000000..e00945ee --- /dev/null +++ b/tests/assets/samplesheet_condition_missing_control.csv @@ -0,0 +1,4 @@ +group,condition,replicate,fastq_1,fastq_2,control +h3k27me3,Control,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r2.fastq.gz,igg_ctrl +h3k27me3,Treatment,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r2.fastq.gz,igg_ctrl +igg_ctrl,Control,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/igg_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/igg_rep1_r2.fastq.gz, diff --git a/tests/test_02_samplesheet_check.yml b/tests/test_02_samplesheet_check.yml index c3a85c52..806261f3 100644 --- a/tests/test_02_samplesheet_check.yml +++ b/tests/test_02_samplesheet_check.yml @@ -136,6 +136,24 @@ files: - path: results/pipeline_info/samplesheet.valid.csv +# Test condition samplesheet missing control (fail-fast) +- name: test_samplesheet_condition_missing_control_failfast + command: nextflow run main.nf -profile docker,test --only_input --input tests/assets/samplesheet_condition_missing_control.csv -c tests/config/nextflow.config + tags: + - test_samplesheet_condition + exit_code: 1 + stdout: + contains: + - "Missing condition-matched controls" + +# Test condition samplesheet missing control with override +- name: test_samplesheet_condition_missing_control_allow_cross + command: nextflow run main.nf -profile docker,test --only_input --allow_cross_condition_controls --input tests/assets/samplesheet_condition_missing_control.csv -c tests/config/nextflow.config + tags: + - test_samplesheet_condition + files: + - path: results/pipeline_info/samplesheet.valid.csv + # Test small tech reps - name: test_samplesheet_small_tech_reps command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small-tech-reps.csv -c tests/config/nextflow.config diff --git a/tests/test_04_bam_scaling.yml b/tests/test_04_bam_scaling.yml index e1b50b61..beaea72b 100644 --- a/tests/test_04_bam_scaling.yml +++ b/tests/test_04_bam_scaling.yml @@ -13,6 +13,7 @@ - test_bam_scale_spikein files: - path: results/03_peak_calling/01_bam_to_bedgraph/h3k27me3_NA_rep1.sorted.bedGraph + - path: results/03_peak_calling/00_normalisation_factors/all.tsv - name: test_bam_scale_cpm command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_preseq --skip_multiqc --normalisation_mode CPM -c tests/config/nextflow.config diff --git a/tests/test_05_peak_callers.yml b/tests/test_05_peak_callers.yml index 13fe2348..053c4af4 100644 --- a/tests/test_05_peak_callers.yml +++ b/tests/test_05_peak_callers.yml @@ -100,6 +100,23 @@ - path: results/03_peak_calling/01_pooled_controls/igg_ctrl_Treatment.bam.bai - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks +- name: test_peak_callers_epic2_missing_control_skip + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller epic2_200bp --epic2_genome hg38 --allow_cross_condition_controls --input tests/assets/samplesheet_condition_missing_control.csv -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_epic2 + files: + - path: results/03_peak_calling/07_qc_tables/control_pooling_fallbacks.tsv + contains: + - "h3k27me3_Treatment_rep1" + - "epic2_200bp" + - "fallback_other_condition" + - "skipped" + - "no_exact_condition_match" + - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks + - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Treatment_rep1_epic2_200bp.peaks + should_exist: false + - name: test_peak_callers_preset_extended command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller_preset extended --epic2_genome hg38 --omnipeaks_jar tests/assets/omnipeaks_stub.jar -c tests/config/nextflow.config tags: diff --git a/tests/test_07_peak_qc_multi_caller.yml b/tests/test_07_peak_qc_multi_caller.yml new file mode 100644 index 00000000..f5e5b029 --- /dev/null +++ b/tests/test_07_peak_qc_multi_caller.yml @@ -0,0 +1,15 @@ +- name: test_peak_qc_multi_caller + command: nextflow run main.nf -profile docker,test --skip_fastqc --skip_preseq --skip_multiqc --peakcaller seacr,macs2_narrow -c tests/config/nextflow.config + tags: + - test_peak_qc_multi_caller + files: + - path: results/03_peak_calling/07_qc_tables/peak_counts.tsv + contains: + - "seacr" + - "macs2_narrow" + must_not_contain: + - "_T" + - path: results/03_peak_calling/07_qc_tables/peak_frip_scores.tsv + contains: + - "seacr" + - "macs2_narrow" From 981eedb4ab90be814f3af7448e136852676da1ab Mon Sep 17 00:00:00 2001 From: dhusmann Date: Mon, 5 Jan 2026 01:40:03 -0800 Subject: [PATCH 07/19] Fix control fallback handling and sample IDs --- bin/check_samplesheet.py | 44 +++++-- lib/WorkflowCutandrun.groovy | 4 +- subworkflows/local/peak_calling_extended.nf | 56 +++++---- .../samplesheet_condition_no_controls.csv | 3 + tests/test_02_samplesheet_check.yml | 11 ++ tests/test_05_peak_callers.yml | 22 +++- tests/test_verify_output_01_save_merged.yml | 4 +- tests/test_verify_output_01_save_trimmed.yml | 2 +- tests/test_verify_output_01_skip_fastqc.yml | 8 +- tests/test_verify_output_01_skip_trimming.yml | 2 +- tests/test_verify_output_03_only_align.yml | 24 ++-- ...t_verify_output_03_save_align_intermed.yml | 10 +- ...st_verify_output_03_save_spikein_align.yml | 12 +- .../test_verify_output_03_save_unaligned.yml | 8 +- tests/test_verify_output_04_duplicates.yml | 118 +++++++++--------- .../test_verify_output_04_only_filtering.yml | 12 +- tests/test_verify_output_06_skip_dt_qc.yml | 4 +- tests/test_verify_output_06_skip_preseq.yml | 6 +- 18 files changed, 205 insertions(+), 145 deletions(-) create mode 100644 tests/assets/samplesheet_condition_no_controls.csv diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index b6448982..d286bc43 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -89,6 +89,7 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont sample_run_dict = {} control_condition_map = {} missing_control_errors = [] + missing_control_warnings = [] cross_condition_warnings = [] legacy_na_warnings = [] @@ -247,6 +248,12 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont ## Check control group exists for ctrl in control_names_list: if ctrl != "" and ctrl not in sample_names_list: + if allow_cross_condition_controls: + print( + "WARNING: Control entry '{}' does not match any group entry; proceeding because " + "--allow_cross_condition_controls was set. Control-required callers will be skipped.".format(ctrl) + ) + continue print_error( "Each control entry must match at least one group entry! Unmatched control entry: {}.".format(ctrl) ) @@ -298,16 +305,18 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont ctrl_conditions = control_condition_map.get(info[3], set()) sample_id = "{}_{}_rep{}".format(info[0], info[1], info[2]) if not ctrl_conditions: - missing_control_errors.append( - { - "sample_id": sample_id, - "group": info[0], - "condition": info[1], - "replicate": info[2], - "control_group": info[3], - "control_conditions": "NONE", - } - ) + entry = { + "sample_id": sample_id, + "group": info[0], + "condition": info[1], + "replicate": info[2], + "control_group": info[3], + "control_conditions": "NONE", + } + if allow_cross_condition_controls: + missing_control_warnings.append(entry) + continue + missing_control_errors.append(entry) continue if info[1] in ctrl_conditions: continue @@ -360,6 +369,17 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont ) sys.exit(1) + if missing_control_warnings: + print( + "WARNING: No control rows exist for one or more control groups; proceeding because " + "--allow_cross_condition_controls was set. Control-required callers will be skipped for these samples." + ) + for entry in missing_control_warnings: + print( + " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | " + "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry) + ) + if legacy_na_warnings: print("WARNING: Control rows with condition=NA used for targets with explicit conditions (legacy mode).") for entry in legacy_na_warnings: @@ -409,8 +429,8 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont # print_error("Control group must match within technical replicates", tech_rep[2]) ## Write to file - for idx, sample_info in enumerate(sample_run_dict[sample_key][replicate]): - sample_id = "{}_{}_rep{}_T{}".format(sample_info[0], sample_info[1], replicate, idx + 1) + for sample_info in sample_run_dict[sample_key][replicate]: + sample_id = "{}_{}_rep{}".format(sample_info[0], sample_info[1], replicate) fout.write(",".join([sample_id] + sample_info[:7] + [sample_info[-1]]) + "\n") diff --git a/lib/WorkflowCutandrun.groovy b/lib/WorkflowCutandrun.groovy index e9e94093..213d5015 100755 --- a/lib/WorkflowCutandrun.groovy +++ b/lib/WorkflowCutandrun.groovy @@ -86,8 +86,8 @@ class WorkflowCutandrun { Nextflow.error "Invalid --igg_scale_scope value '${params.igg_scale_scope}'. Valid options: legacy, group_condition, sample." } - if (params.consensus_grouping && !['group','group_condition'].contains(params.consensus_grouping)) { - Nextflow.error "Invalid --consensus_grouping value '${params.consensus_grouping}'. Valid options: group, group_condition." + if (params.consensus_grouping && !['group','group_condition','all'].contains(params.consensus_grouping)) { + Nextflow.error "Invalid --consensus_grouping value '${params.consensus_grouping}'. Valid options: group, group_condition, all." } if (params.peakcaller_preset && !['standard','extended'].contains(params.peakcaller_preset.toLowerCase())) { diff --git a/subworkflows/local/peak_calling_extended.nf b/subworkflows/local/peak_calling_extended.nf index 66b5469c..0761736e 100644 --- a/subworkflows/local/peak_calling_extended.nf +++ b/subworkflows/local/peak_calling_extended.nf @@ -111,22 +111,6 @@ workflow PEAK_CALLING_EXTENDED { ch_bedgraph_target_cc = add_control_condition(bedgraph_target, bedgraph_control) ch_bam_target_cc = add_control_condition(bam_target, bam_control) - if (params.use_control) { - ch_control_fallbacks = ch_bam_target_cc - .filter { meta, bam -> meta.control_condition && meta.control_condition != meta.condition } - .map { meta, bam -> - [ - sample_id: meta.sample_id ?: meta.id, - group: meta.group, - condition: meta.condition, - control_group: meta.control_group, - control_condition: meta.control_condition, - caller: 'pooled_control', - reason: 'condition_mismatch' - ] - } - } - /* * SEACR */ @@ -276,16 +260,40 @@ workflow PEAK_CALLING_EXTENDED { .map { meta, bam, pooled_map -> def entries = pooled_map.get(meta.control_group, []) if (!entries) { - return [meta, bam, null, null, 'missing_control', 'skipped', 'no_control_for_group', null] + return [meta, bam, null, '', 'missing_control', 'skipped', 'no_control_for_group', null] + } + def desired_condition = meta.control_condition ?: meta.condition + def selected_entry = entries.find { it[0] == desired_condition } + def used_condition = null + def control_bam = null + def status = null + def reason = null + if (selected_entry) { + used_condition = selected_entry[0] + control_bam = selected_entry[1] + } else { + def na_entry = entries.find { it[0] == 'NA' } + if (na_entry) { + used_condition = na_entry[0] + control_bam = na_entry[1] + } else { + def fallback = entries.sort { it[0] }[0] + used_condition = fallback[0] + control_bam = fallback[1] + } } - def exact = entries.find { it[0] == meta.control_condition } - if (exact) { - def used_condition = exact[0] - def control_bam = exact[1] - return [meta, bam, control_bam, used_condition, 'exact_match', 'used', 'exact_condition', control_bam] + def exact_match = (used_condition == meta.condition) + if (exact_match) { + status = 'exact_match' + reason = 'exact_condition' + } else if (used_condition == 'NA') { + status = 'fallback_other_condition' + reason = 'legacy_na_control' + } else { + status = 'fallback_other_condition' + reason = 'condition_fallback' } - def fallback_condition = entries[0][0] - return [meta, bam, null, fallback_condition, 'fallback_other_condition', 'skipped', 'no_exact_condition_match', null] + return [meta, bam, control_bam, used_condition, status, 'used', reason, control_bam] } } diff --git a/tests/assets/samplesheet_condition_no_controls.csv b/tests/assets/samplesheet_condition_no_controls.csv new file mode 100644 index 00000000..0f1496c7 --- /dev/null +++ b/tests/assets/samplesheet_condition_no_controls.csv @@ -0,0 +1,3 @@ +group,condition,replicate,fastq_1,fastq_2,control +h3k27me3,Control,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r2.fastq.gz,igg_ctrl +h3k27me3,Treatment,1,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/testdata/GSE145187_10000/h3k27me3_rep1_r2.fastq.gz,igg_ctrl diff --git a/tests/test_02_samplesheet_check.yml b/tests/test_02_samplesheet_check.yml index 806261f3..2a463535 100644 --- a/tests/test_02_samplesheet_check.yml +++ b/tests/test_02_samplesheet_check.yml @@ -154,6 +154,17 @@ files: - path: results/pipeline_info/samplesheet.valid.csv +# Test condition samplesheet with no control rows and override +- name: test_samplesheet_condition_no_controls_allow_cross + command: nextflow run main.nf -profile docker,test --only_input --allow_cross_condition_controls --input tests/assets/samplesheet_condition_no_controls.csv -c tests/config/nextflow.config + tags: + - test_samplesheet_condition + files: + - path: results/pipeline_info/samplesheet.valid.csv + stdout: + contains: + - "No control rows exist for one or more control groups" + # Test small tech reps - name: test_samplesheet_small_tech_reps command: nextflow run main.nf -profile docker,test --only_input --input https://raw.githubusercontent.com/nf-core/test-datasets/cutandrun/samplesheet_2_0/test-GSE145187-small-tech-reps.csv -c tests/config/nextflow.config diff --git a/tests/test_05_peak_callers.yml b/tests/test_05_peak_callers.yml index 053c4af4..c10e23da 100644 --- a/tests/test_05_peak_callers.yml +++ b/tests/test_05_peak_callers.yml @@ -100,7 +100,7 @@ - path: results/03_peak_calling/01_pooled_controls/igg_ctrl_Treatment.bam.bai - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks -- name: test_peak_callers_epic2_missing_control_skip +- name: test_peak_callers_epic2_missing_control_fallback command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller epic2_200bp --epic2_genome hg38 --allow_cross_condition_controls --input tests/assets/samplesheet_condition_missing_control.csv -c tests/config/nextflow.config tags: - test_peak_callers @@ -111,9 +111,27 @@ - "h3k27me3_Treatment_rep1" - "epic2_200bp" - "fallback_other_condition" + - "used" + - "condition_fallback" + - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks + - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Treatment_rep1_epic2_200bp.peaks + should_exist: true + +- name: test_peak_callers_epic2_no_controls_skip + command: nextflow run main.nf -profile docker,test --only_peak_calling --skip_fastqc --skip_removeduplicates --skip_multiqc --skip_preseq --peakcaller epic2_200bp --epic2_genome hg38 --allow_cross_condition_controls --input tests/assets/samplesheet_condition_no_controls.csv -c tests/config/nextflow.config + tags: + - test_peak_callers + - test_peak_callers_epic2 + files: + - path: results/03_peak_calling/07_qc_tables/control_pooling_fallbacks.tsv + contains: + - "h3k27me3_Control_rep1" + - "epic2_200bp" + - "missing_control" - "skipped" - - "no_exact_condition_match" + - "no_control_for_group" - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Control_rep1_epic2_200bp.peaks + should_exist: false - path: results/03_peak_calling/04_called_peaks/epic2_200bp/h3k27me3_Treatment_rep1_epic2_200bp.peaks should_exist: false diff --git a/tests/test_verify_output_01_save_merged.yml b/tests/test_verify_output_01_save_merged.yml index cd6920dc..54ee4042 100644 --- a/tests/test_verify_output_01_save_merged.yml +++ b/tests/test_verify_output_01_save_merged.yml @@ -3,5 +3,5 @@ tags: - verify_output_save_merged files: - - path: results/01_prealign/merged_fastq/h3k27me3_NA_rep1_T1_1.merged.fastq.gz - - path: results/01_prealign/merged_fastq/h3k27me3_NA_rep1_T1_2.merged.fastq.gz + - path: results/01_prealign/merged_fastq/h3k27me3_NA_rep1_1.merged.fastq.gz + - path: results/01_prealign/merged_fastq/h3k27me3_NA_rep1_2.merged.fastq.gz diff --git a/tests/test_verify_output_01_save_trimmed.yml b/tests/test_verify_output_01_save_trimmed.yml index 5eed8467..1caff730 100644 --- a/tests/test_verify_output_01_save_trimmed.yml +++ b/tests/test_verify_output_01_save_trimmed.yml @@ -3,4 +3,4 @@ tags: - verify_output_save_trimmed files: - - path: results/01_prealign/trimgalore/h3k27me3_NA_rep1_T1_1.trimmed.fastq.gz + - path: results/01_prealign/trimgalore/h3k27me3_NA_rep1_1.trimmed.fastq.gz diff --git a/tests/test_verify_output_01_skip_fastqc.yml b/tests/test_verify_output_01_skip_fastqc.yml index 9db57056..db4a5f26 100644 --- a/tests/test_verify_output_01_skip_fastqc.yml +++ b/tests/test_verify_output_01_skip_fastqc.yml @@ -4,9 +4,9 @@ - verify_output_skip_fastqc - verify_output_skip_fastqc_true files: - - path: results/01_prealign/pretrim_fastqc/h3k27me3_NA_rep1_T1_1_fastqc.html + - path: results/01_prealign/pretrim_fastqc/h3k27me3_NA_rep1_1_fastqc.html should_exist: false - - path: results/01_prealign/trimgalore/fastqc/h3k27me3_NA_rep1_T1_1_fastqc.trimmed.html + - path: results/01_prealign/trimgalore/fastqc/h3k27me3_NA_rep1_1_fastqc.trimmed.html should_exist: false # - name: test_verify_output_skip_fastqc_false # command: nextflow run main.nf -profile docker,test --only_preqc --skip_fastqc false @@ -14,5 +14,5 @@ # - verify_output_skip_fastqc # - verify_output_skip_fastqc_false # files: -# - path: results/01_prealign/pretrim_fastqc/h3k27me3_NA_rep1_T1_1_fastqc.html -# - path: results/01_prealign/trimgalore/fastqc/h3k27me3_NA_rep1_T1_1.trimmed_fastqc.html +# - path: results/01_prealign/pretrim_fastqc/h3k27me3_NA_rep1_1_fastqc.html +# - path: results/01_prealign/trimgalore/fastqc/h3k27me3_NA_rep1_1.trimmed_fastqc.html diff --git a/tests/test_verify_output_01_skip_trimming.yml b/tests/test_verify_output_01_skip_trimming.yml index 438558c7..e9e302b2 100644 --- a/tests/test_verify_output_01_skip_trimming.yml +++ b/tests/test_verify_output_01_skip_trimming.yml @@ -3,5 +3,5 @@ tags: - verify_output_skip_trimming files: - - path: results/01_prealign/trimgalore/h3k27me3_NA_rep1_T1_1.trimmed.fastq.gz + - path: results/01_prealign/trimgalore/h3k27me3_NA_rep1_1.trimmed.fastq.gz should_exist: false diff --git a/tests/test_verify_output_03_only_align.yml b/tests/test_verify_output_03_only_align.yml index a7cd7252..2d4adf50 100644 --- a/tests/test_verify_output_03_only_align.yml +++ b/tests/test_verify_output_03_only_align.yml @@ -4,12 +4,12 @@ - verify_output_align_only_align - verify_output_align_only_align_end_to_end files: - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam - - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.target.sorted.bam - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.target.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.flagstat + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam + - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.target.sorted.bam + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.target.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.flagstat - name: test_verify_output_only_align_local command: nextflow run main.nf -profile docker,test --only_alignment --skip_fastqc -c tests/config/nextflow.config --end_to_end false @@ -17,9 +17,9 @@ - verify_output_align_only_align - verify_output_align_only_align_local files: - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam - - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.target.sorted.bam - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.target.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1_T1.flagstat + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam + - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.target.sorted.bam + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.target.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/igg_ctrl_NA_rep1.flagstat diff --git a/tests/test_verify_output_03_save_align_intermed.yml b/tests/test_verify_output_03_save_align_intermed.yml index 79de55d2..f5541b61 100644 --- a/tests/test_verify_output_03_save_align_intermed.yml +++ b/tests/test_verify_output_03_save_align_intermed.yml @@ -3,8 +3,8 @@ tags: - verify_output_align_intermed files: - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.bam - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.filtered.bam - - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1_T1.target.sorted.bam - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.bam + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.filtered.bam + - path: results/02_alignment/bowtie2/target/h3k27me3_NA_rep1.target.sorted.bam + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai diff --git a/tests/test_verify_output_03_save_spikein_align.yml b/tests/test_verify_output_03_save_spikein_align.yml index 4c8cca19..f6d77c14 100644 --- a/tests/test_verify_output_03_save_spikein_align.yml +++ b/tests/test_verify_output_03_save_spikein_align.yml @@ -3,9 +3,9 @@ tags: - verify_output_align_save_spikein_align files: - - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1_T1.spikein.sorted.bam - - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1_T1.spikein.sorted.bam - - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1_T1.spikein.sorted.bam.bai - - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1_T1.spikein.sorted.bam.bai - - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1_T1.flagstat + - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1.spikein.sorted.bam + - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1.spikein.sorted.bam + - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1.spikein.sorted.bam.bai + - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1.spikein.sorted.bam.bai + - path: results/02_alignment/bowtie2/spikein/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/spikein/igg_ctrl_NA_rep1.flagstat diff --git a/tests/test_verify_output_03_save_unaligned.yml b/tests/test_verify_output_03_save_unaligned.yml index 9a6da92f..beeb7b0d 100644 --- a/tests/test_verify_output_03_save_unaligned.yml +++ b/tests/test_verify_output_03_save_unaligned.yml @@ -3,7 +3,7 @@ tags: - verify_output_align_save_unaligned files: - - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_NA_rep1_T1.unmapped_1.fastq.gz - - path: results/02_alignment/bowtie2/target/unmapped/igg_ctrl_NA_rep1_T1.unmapped_2.fastq.gz - - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_NA_rep1_T1.unmapped_1.fastq.gz - - path: results/02_alignment/bowtie2/target/unmapped/igg_ctrl_NA_rep1_T1.unmapped_2.fastq.gz + - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_NA_rep1.unmapped_1.fastq.gz + - path: results/02_alignment/bowtie2/target/unmapped/igg_ctrl_NA_rep1.unmapped_2.fastq.gz + - path: results/02_alignment/bowtie2/target/unmapped/h3k27me3_NA_rep1.unmapped_1.fastq.gz + - path: results/02_alignment/bowtie2/target/unmapped/igg_ctrl_NA_rep1.unmapped_2.fastq.gz diff --git a/tests/test_verify_output_04_duplicates.yml b/tests/test_verify_output_04_duplicates.yml index 4f0c8713..60cf8a9e 100644 --- a/tests/test_verify_output_04_duplicates.yml +++ b/tests/test_verify_output_04_duplicates.yml @@ -4,12 +4,12 @@ - verify_output_align_duplicates - verify_output_align_duplicates_mark files: - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat - name: test_verify_output_duplicates_remove command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --dedup_target_reads false -c tests/config/nextflow.config @@ -17,14 +17,14 @@ - verify_output_align_duplicates - verify_output_align_duplicates_remove files: - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.flagstat should_exist: false - name: test_verify_output_duplicates_remove_target @@ -33,18 +33,18 @@ - verify_output_align_duplicates - verify_output_align_duplicates_remove_target files: - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.target.dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.target.dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.target.dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.flagstat - name: test_verify_output_linear_duplicates_remove command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --dedup_target_reads false -c tests/config/nextflow.config --remove_linear_duplicates true @@ -52,19 +52,19 @@ - verify_output_align_linear_duplicates - verify_output_align_linear_duplicates_remove files: - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup_metrics.txt + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup_metrics.txt - name: test_verify_output_linear_duplicates_remove_target command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_preseq --dedup_target_reads true -c tests/config/nextflow.config --remove_linear_duplicates true @@ -72,23 +72,23 @@ - verify_output_align_linear_duplicates - verify_output_align_linear_duplicates_remove_target files: - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.target.dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.target.dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.target.dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1_T1.target.linear_dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup.sorted.bam - - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1_T1.target.linear_dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1_T1.target.linear_dedup_metrics.txt - - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1_T1.target.linear_dedup_metrics.txt + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.target.dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.target.dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/dedup/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/dedup/igg_ctrl_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1.target.linear_dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup.sorted.bam + - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1.target.linear_dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/linear_dedup/h3k27me3_NA_rep1.target.linear_dedup_metrics.txt + - path: results/02_alignment/bowtie2/target/linear_dedup/igg_ctrl_NA_rep1.target.linear_dedup_metrics.txt diff --git a/tests/test_verify_output_04_only_filtering.yml b/tests/test_verify_output_04_only_filtering.yml index 291b1207..55e9971d 100644 --- a/tests/test_verify_output_04_only_filtering.yml +++ b/tests/test_verify_output_04_only_filtering.yml @@ -4,12 +4,12 @@ - verify_output_only_filtering - verify_output_only_filtering_without_mitochondrial_reads files: - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1_T1.flagstat - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.target.markdup.sorted.bam.bai - - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1_T1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/h3k27me3_NA_rep1.flagstat + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.target.markdup.sorted.bam.bai + - path: results/02_alignment/bowtie2/target/markdup/igg_ctrl_NA_rep1.flagstat - name: test_output_only_filtering_with_mitochondrial_reads command: nextflow run main.nf -profile docker,test --only_filtering --skip_fastqc --skip_removeduplicates --skip_preseq --remove_mitochondrial_reads false -c tests/config/nextflow.config diff --git a/tests/test_verify_output_06_skip_dt_qc.yml b/tests/test_verify_output_06_skip_dt_qc.yml index ff8ff037..d4cb5b1e 100644 --- a/tests/test_verify_output_06_skip_dt_qc.yml +++ b/tests/test_verify_output_06_skip_dt_qc.yml @@ -6,7 +6,7 @@ files: - path: results/04_reporting/deeptools_qc/all_target_bams.plotCorrelation.pdf - path: results/04_reporting/deeptools_qc/all_target_bams.plotPCA.pdf - - path: results/04_reporting/deeptools_qc/h3k4me3_NA_rep1_T1.plotFingerprint.pdf + - path: results/04_reporting/deeptools_qc/h3k4me3_NA_rep1.plotFingerprint.pdf - name: verify_output_reporting_skip_dtqc_true command: nextflow run main.nf -profile docker,test_full_small --skip_fastqc --skip_multiqc --skip_preseq --skip_dt_qc -c tests/config/nextflow.config @@ -18,5 +18,5 @@ should_exist: false - path: results/04_reporting/deeptools_qc/all_target_bams.plotPCA.pdf should_exist: false - - path: results/04_reporting/deeptools_qc/h3k4me3_NA_rep1_T1.plotFingerprint.pdf + - path: results/04_reporting/deeptools_qc/h3k4me3_NA_rep1.plotFingerprint.pdf should_exist: false diff --git a/tests/test_verify_output_06_skip_preseq.yml b/tests/test_verify_output_06_skip_preseq.yml index 115b5ba5..4dfe42bb 100644 --- a/tests/test_verify_output_06_skip_preseq.yml +++ b/tests/test_verify_output_06_skip_preseq.yml @@ -10,9 +10,9 @@ - verify_output_reporting_skip_preseq - verify_output_reporting_skip_preseq_true files: - - path: results/04_reporting/preseq/h3k4me3_NA_rep1_T1.command.log + - path: results/04_reporting/preseq/h3k4me3_NA_rep1.command.log should_exist: false - - path: results/04_reporting/preseq/h3k4me3_NA_rep2_T1.command.log + - path: results/04_reporting/preseq/h3k4me3_NA_rep2.command.log should_exist: false - - path: results/04_reporting/preseq/igg_ctrl_NA_rep1_T1.command.log + - path: results/04_reporting/preseq/igg_ctrl_NA_rep1.command.log should_exist: false From 27005d391d1db266244e1b35dd321053559015e2 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Mon, 5 Jan 2026 08:54:51 -0800 Subject: [PATCH 08/19] Fix samplesheet warnings and multi-caller FRiP --- bin/check_samplesheet.py | 25 ++++++++++++------ modules/local/python/samplesheet_check.nf | 5 ++-- subworkflows/local/input_check.nf | 5 ++++ subworkflows/local/peak_qc.nf | 17 ++++++++----- subworkflows/local/prepare_peakcalling.nf | 2 +- workflows/cutandrun.nf | 31 +++++++++++------------ 6 files changed, 52 insertions(+), 33 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d286bc43..f8600ad8 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -92,6 +92,11 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont missing_control_warnings = [] cross_condition_warnings = [] legacy_na_warnings = [] + warning_lines = [] + + def record_warning(message): + print(message) + warning_lines.append(message) with open(file_in, "r") as fin: ## Check header @@ -249,7 +254,7 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont for ctrl in control_names_list: if ctrl != "" and ctrl not in sample_names_list: if allow_cross_condition_controls: - print( + record_warning( "WARNING: Control entry '{}' does not match any group entry; proceeding because " "--allow_cross_condition_controls was set. Control-required callers will be skipped.".format(ctrl) ) @@ -284,7 +289,7 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont ) if use_control == "false" and control_present: - print( + record_warning( "WARNING: Parameter --use_control was set to false, but an control group was found in " + str(file_in) + "." ) @@ -370,35 +375,39 @@ def check_samplesheet(file_in, file_out, use_control, allow_cross_condition_cont sys.exit(1) if missing_control_warnings: - print( + record_warning( "WARNING: No control rows exist for one or more control groups; proceeding because " "--allow_cross_condition_controls was set. Control-required callers will be skipped for these samples." ) for entry in missing_control_warnings: - print( + record_warning( " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | " "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry) ) if legacy_na_warnings: - print("WARNING: Control rows with condition=NA used for targets with explicit conditions (legacy mode).") + record_warning("WARNING: Control rows with condition=NA used for targets with explicit conditions (legacy mode).") for entry in legacy_na_warnings: - print( + record_warning( " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | " "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry) ) if cross_condition_warnings: - print( + record_warning( "WARNING: No exact condition-matched controls found; proceeding with cross-condition controls " "because --allow_cross_condition_controls was set." ) for entry in cross_condition_warnings: - print( + record_warning( " - sample_id: {sample_id} | group: {group} | condition: {condition} | replicate: {replicate} | " "control_group: {control_group} | control_conditions_found: {control_conditions}".format(**entry) ) + with open("samplesheet.warnings.txt", "w") as warn: + if warning_lines: + warn.write("\n".join(warning_lines) + "\n") + ## Write validated samplesheet with appropriate columns if len(sample_run_dict) > 0: out_dir = os.path.dirname(file_out) diff --git a/modules/local/python/samplesheet_check.nf b/modules/local/python/samplesheet_check.nf index 4972e07e..a5e50400 100644 --- a/modules/local/python/samplesheet_check.nf +++ b/modules/local/python/samplesheet_check.nf @@ -9,8 +9,9 @@ process SAMPLESHEET_CHECK { path samplesheet output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions + path '*.csv' , emit: csv + path "samplesheet.warnings.txt", emit: warnings + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index f122b506..6748ded1 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -11,6 +11,11 @@ workflow INPUT_CHECK { main: SAMPLESHEET_CHECK ( samplesheet ) + SAMPLESHEET_CHECK.out.warnings + .map { it.text?.trim() } + .filter { it } + .view { it } + SAMPLESHEET_CHECK.out.csv .splitCsv ( header:true, sep:"," ) .map { get_samplesheet_paths(it) } diff --git a/subworkflows/local/peak_qc.nf b/subworkflows/local/peak_qc.nf index 6ba27391..c2b49fc3 100644 --- a/subworkflows/local/peak_qc.nf +++ b/subworkflows/local/peak_qc.nf @@ -35,12 +35,17 @@ workflow PEAK_QC { /* * CHANNEL: Combine channel together for frip calculation */ - peaks - .map { row -> [row[0].id, row ].flatten()} - .join ( fragments_bed.map { row -> [row[0].id, row ].flatten()} ) - .join ( flagstat.map { row -> [row[0].id, row ].flatten()} ) - .map { row -> [ row[1], row[2], row[4], row[6] ]} - .set { ch_frip } + def ch_peaks_by_id = peaks + .map { row -> [row[0].id, row] } + .groupTuple(by: [0]) + + ch_peaks_by_id + .join ( fragments_bed.map { row -> [row[0].id, row[1]] } ) + .join ( flagstat.map { row -> [row[0].id, row[1]] } ) + .flatMap { id, peak_rows, fragments, flagstat_file -> + peak_rows.collect { peak_row -> [ peak_row[0], peak_row[1], fragments, flagstat_file ] } + } + .set { ch_frip } /* * MODULE: Calculate frip scores for sample peaks diff --git a/subworkflows/local/prepare_peakcalling.nf b/subworkflows/local/prepare_peakcalling.nf index 37aa892c..da56bf24 100644 --- a/subworkflows/local/prepare_peakcalling.nf +++ b/subworkflows/local/prepare_peakcalling.nf @@ -25,6 +25,7 @@ workflow PREPARE_PEAKCALLING { main: ch_versions = Channel.empty() ch_bedgraph = Channel.empty() + ch_scope_reference = Channel.empty() def norm_scope = normalisation_scope ?: 'all' def igg_scope = igg_scale_scope ?: 'legacy' def median = { List values -> @@ -114,7 +115,6 @@ workflow PREPARE_PEAKCALLING { ch_versions = ch_versions.mix(NORMALISATION_FACTORS_REPORT.out.versions) if (params.dump_scale_factors) { - def ch_scope_reference = Channel.empty() if (norm_scope == 'all') { ch_scope_reference = Channel.of([scope_id: 'all', reference_reads: params.normalisation_c]) } else { diff --git a/workflows/cutandrun.nf b/workflows/cutandrun.nf index c163effc..d8b6e7f5 100644 --- a/workflows/cutandrun.nf +++ b/workflows/cutandrun.nf @@ -102,11 +102,6 @@ def caller_list = [ 'span_default', 'span_stringent' ] -callers = params.callers ?: ['seacr'] -if ((caller_list + callers).unique().size() != caller_list.size()) { - exit 1, "Invalid variant calller option: ${params.peakcaller ?: params.peakcaller_preset}. Valid options: ${caller_list.join(', ')}" -} - /* ======================================================================================== IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -187,6 +182,10 @@ workflow CUTANDRUN { // Init ch_software_versions = Channel.empty() + def caller_ids = params.callers ?: ['seacr'] + if ((caller_list + caller_ids).unique().size() != caller_list.size()) { + exit 1, "Invalid variant calller option: ${params.peakcaller ?: params.peakcaller_preset}. Valid options: ${caller_list.join(', ')}" + } /* * SUBWORKFLOW: Uncompress and prepare reference genome files @@ -501,7 +500,7 @@ workflow CUTANDRUN { //ch_bam_control | view /* - * SUBWORKFLOW: Call peaks using extended callers + * SUBWORKFLOW: Call peaks using extended caller_ids */ PEAK_CALLING_EXTENDED ( ch_bedgraph_target, @@ -509,7 +508,7 @@ workflow CUTANDRUN { ch_bam_target, ch_bam_control, PREPARE_GENOME.out.chrom_sizes, - callers + caller_ids ) ch_peaks_all = PEAK_CALLING_EXTENDED.out.peaks ch_macs2_summits = PEAK_CALLING_EXTENDED.out.macs2_summits @@ -519,7 +518,7 @@ workflow CUTANDRUN { ch_gopeaks_json = PEAK_CALLING_EXTENDED.out.gopeaks_json ch_software_versions = ch_software_versions.mix(PEAK_CALLING_EXTENDED.out.versions) - if (callers.find { it.startsWith('macs2') }) { + if (caller_ids.find { it.startsWith('macs2') }) { /* * MODULE: Convert MACS2 outputs to BED */ @@ -536,14 +535,14 @@ workflow CUTANDRUN { } // Identify the primary peak data stream for downstream analysis - ch_peaks_primary = ch_peaks_all.filter { it[0].caller == callers[0] } - ch_peaks_secondary = ch_peaks_all.filter { it[0].caller != callers[0] } + ch_peaks_primary = ch_peaks_all.filter { it[0].caller == caller_ids[0] } + ch_peaks_secondary = ch_peaks_all.filter { it[0].caller != caller_ids[0] } /* * CHANNEL: Build summit/peak inputs for heatmaps per caller */ ch_peaks_summits = Channel.empty() - if (callers.contains('seacr')) { + if (caller_ids.contains('seacr')) { /* * MODULE: Extract summits from seacr peak beds */ @@ -556,10 +555,10 @@ workflow CUTANDRUN { } def macs2_summit_callers = [] - if (callers.contains('macs2_narrow')) { + if (caller_ids.contains('macs2_narrow')) { macs2_summit_callers.add('macs2_narrow') } - if (callers.contains('macs2') && params.macs2_narrow_peak) { + if (caller_ids.contains('macs2') && params.macs2_narrow_peak) { macs2_summit_callers.add('macs2') } @@ -569,15 +568,15 @@ workflow CUTANDRUN { ) } - def macs2_no_summit_callers = callers.findAll { it.startsWith('macs2') && !macs2_summit_callers.contains(it) } + def macs2_no_summit_callers = caller_ids.findAll { it.startsWith('macs2') && !macs2_summit_callers.contains(it) } if (macs2_no_summit_callers) { ch_peaks_summits = ch_peaks_summits.mix( ch_peaks_all.filter { macs2_no_summit_callers.contains(it[0].caller) } ) } - def summit_override_callers = ['seacr'] + callers.findAll { it.startsWith('macs2') } - def other_callers = callers.findAll { !summit_override_callers.contains(it) } + def summit_override_callers = ['seacr'] + caller_ids.findAll { it.startsWith('macs2') } + def other_callers = caller_ids.findAll { !summit_override_callers.contains(it) } if (other_callers) { ch_peaks_summits = ch_peaks_summits.mix( ch_peaks_all.filter { other_callers.contains(it[0].caller) } From e8e2b6f8936fb36f518c8335a15733a2719fdc3c Mon Sep 17 00:00:00 2001 From: dhusmann Date: Thu, 8 Jan 2026 21:37:25 -0800 Subject: [PATCH 09/19] Avoid dropping targets when control group missing --- subworkflows/local/peak_calling_extended.nf | 69 +++++++++++++++------ 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/subworkflows/local/peak_calling_extended.nf b/subworkflows/local/peak_calling_extended.nf index 0761736e..9ad32305 100644 --- a/subworkflows/local/peak_calling_extended.nf +++ b/subworkflows/local/peak_calling_extended.nf @@ -78,32 +78,49 @@ workflow PEAK_CALLING_EXTENDED { } // Pair targets with controls by control_group + control_condition and replicate logic + // Never drop targets when controls are missing (return null control instead) def pair_targets_controls = { ch_target, ch_control -> - def ch_control_grouped = ch_control + def control_map_ch = ch_control .map { meta, file -> ["${meta.group}__${meta.condition}", [meta, file]] } .groupTuple(by: [0]) + .toList() + .map { list -> + def map = [:] + (list ?: []).each { entry -> + map[entry[0]] = entry[1] + } + map + } - def ch_target_grouped = ch_target - .map { meta, file -> ["${meta.control_group}__${meta.control_condition}", [meta, file]] } + def target_count_ch = ch_target + .map { meta, file -> ["${meta.control_group}__${meta.control_condition}", meta.replicate] } .groupTuple(by: [0]) + .map { key, reps -> [key, reps.size()] } + .toList() + .map { list -> + def map = [:] + (list ?: []).each { entry -> + map[entry[0]] = entry[1] + } + map + } - ch_control_grouped - .join(ch_target_grouped) - .flatMap { key, control_list, target_list -> + ch_target + .combine(control_map_ch) + .combine(target_count_ch) + .flatMap { meta, file, control_map, target_count_map -> + def key = "${meta.control_group}__${meta.control_condition}" + def control_list = control_map.get(key) + if (!control_list) { + return [[meta + [control_missing: true], file, null]] + } def control_by_rep = control_list.collectEntries { [(it[0].replicate): it[1]] } def control_default = control_list.sort { it[0].replicate }[0][1] def control_reps_count = control_list.size() - def target_reps_count = target_list.size() - - def pairs = [] - target_list.each { target_item -> - def tmeta = target_item[0] - def tfile = target_item[1] - def control_file = (control_reps_count == target_reps_count && control_by_rep.containsKey(tmeta.replicate)) ? - control_by_rep[tmeta.replicate] : control_default - pairs.add([tmeta, tfile, control_file]) - } - pairs + def target_reps_count = target_count_map.get(key) ?: 1 + def control_file = (control_reps_count == target_reps_count && control_by_rep.containsKey(meta.replicate)) ? + control_by_rep[meta.replicate] : control_default + return [[meta + [control_missing: false], file, control_file]] } } @@ -120,8 +137,15 @@ workflow PEAK_CALLING_EXTENDED { ch_bedgraph_target_cc.map { meta, bed -> [meta + [caller: 'seacr'], bed] }, bedgraph_control ) + def ch_seacr_inputs = ch_seacr_pairs.map { meta, bed, control -> + if (!control) { + log.warn "No control found for group '${meta.control_group}' (condition '${meta.control_condition}') - running SEACR without control for ${meta.sample_id ?: meta.id}" + return [meta, bed, []] + } + [meta, bed, control] + } SEACR_CALLPEAK ( - ch_seacr_pairs, + ch_seacr_inputs, params.seacr_peak_threshold ) ch_peaks_all = ch_peaks_all.mix(SEACR_CALLPEAK.out.bed) @@ -147,8 +171,15 @@ workflow PEAK_CALLING_EXTENDED { ch_bam_target_cc.map { meta, bam -> [meta + [caller: 'macs2'], bam] }, bam_control ) + def ch_macs_inputs = ch_macs_pairs.map { meta, bam, control -> + if (!control) { + log.warn "No control found for group '${meta.control_group}' (condition '${meta.control_condition}') - running MACS2 without control for ${meta.sample_id ?: meta.id}" + return [meta, bam, []] + } + [meta, bam, control] + } MACS2_CALLPEAK_LEGACY ( - ch_macs_pairs, + ch_macs_inputs, params.macs_gsize ) ch_peaks_all = ch_peaks_all.mix(MACS2_CALLPEAK_LEGACY.out.peak) From 45831001083430dbbdec495691e894310ff55663 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Thu, 8 Jan 2026 21:55:21 -0800 Subject: [PATCH 10/19] Clarify control-missing warnings --- bin/check_samplesheet.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index f8600ad8..2dba5f25 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -256,7 +256,8 @@ def record_warning(message): if allow_cross_condition_controls: record_warning( "WARNING: Control entry '{}' does not match any group entry; proceeding because " - "--allow_cross_condition_controls was set. Control-required callers will be skipped.".format(ctrl) + "--allow_cross_condition_controls was set. This may indicate a typo. " + "Control-required callers (epic2/span) will be skipped; SEACR/MACS2 will run without control.".format(ctrl) ) continue print_error( @@ -377,7 +378,8 @@ def record_warning(message): if missing_control_warnings: record_warning( "WARNING: No control rows exist for one or more control groups; proceeding because " - "--allow_cross_condition_controls was set. Control-required callers will be skipped for these samples." + "--allow_cross_condition_controls was set. Control-required callers (epic2/span) will be skipped " + "for these samples; SEACR/MACS2 will run without control." ) for entry in missing_control_warnings: record_warning( From 79880d9f2b188be8d59fc0d3231a7b61166ba865 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Thu, 8 Jan 2026 22:12:58 -0800 Subject: [PATCH 11/19] Fix multi-caller heatmap joins --- workflows/cutandrun.nf | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/workflows/cutandrun.nf b/workflows/cutandrun.nf index d8b6e7f5..753cadae 100644 --- a/workflows/cutandrun.nf +++ b/workflows/cutandrun.nf @@ -733,7 +733,8 @@ workflow CUTANDRUN { * CHANNEL: Structure output for join on id */ ch_peaks_summits - .map { meta, bed -> [meta.sample_id ?: meta.id, meta, bed] } + .map { meta, bed -> [meta.sample_id ?: meta.id, [meta, bed]] } + .groupTuple(by: [0]) .set { ch_peaks_summits_id } //ch_peaks_bed_id | view @@ -743,12 +744,14 @@ workflow CUTANDRUN { ch_bigwig_no_igg_heatmap .map { meta, bigwig -> [meta.id, bigwig] } .join ( ch_peaks_summits_id ) - .map { row -> - def peak_meta = row[2] - def bed = row[3] - def sample_id = peak_meta.sample_id ?: peak_meta.id - def heatmap_meta = peak_meta + [id: "${sample_id}_${peak_meta.caller}"] - [ heatmap_meta, row[1], bed ] + .flatMap { row -> + def peaks_list = row[2] + def bigwig = row[1] + peaks_list.collect { peak_meta, bed -> + def sample_id = peak_meta.sample_id ?: peak_meta.id + def heatmap_meta = peak_meta + [id: "${sample_id}_${peak_meta.caller}"] + [ heatmap_meta, bigwig, bed ] + } } .filter ( it -> it[2].size() > 1) .set { ch_dt_bigwig_summits } From 1491b921429b9f85d58b9bcd8e3bfb8d72473640 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Fri, 9 Jan 2026 20:48:01 -0800 Subject: [PATCH 12/19] Fix channel redefinition in peak_calling_extended --- subworkflows/local/peak_calling_extended.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/peak_calling_extended.nf b/subworkflows/local/peak_calling_extended.nf index 9ad32305..b03dfefd 100644 --- a/subworkflows/local/peak_calling_extended.nf +++ b/subworkflows/local/peak_calling_extended.nf @@ -137,7 +137,7 @@ workflow PEAK_CALLING_EXTENDED { ch_bedgraph_target_cc.map { meta, bed -> [meta + [caller: 'seacr'], bed] }, bedgraph_control ) - def ch_seacr_inputs = ch_seacr_pairs.map { meta, bed, control -> + ch_seacr_inputs = ch_seacr_pairs.map { meta, bed, control -> if (!control) { log.warn "No control found for group '${meta.control_group}' (condition '${meta.control_condition}') - running SEACR without control for ${meta.sample_id ?: meta.id}" return [meta, bed, []] @@ -171,7 +171,7 @@ workflow PEAK_CALLING_EXTENDED { ch_bam_target_cc.map { meta, bam -> [meta + [caller: 'macs2'], bam] }, bam_control ) - def ch_macs_inputs = ch_macs_pairs.map { meta, bam, control -> + ch_macs_inputs = ch_macs_pairs.map { meta, bam, control -> if (!control) { log.warn "No control found for group '${meta.control_group}' (condition '${meta.control_condition}') - running MACS2 without control for ${meta.sample_id ?: meta.id}" return [meta, bam, []] From 91f2ff4150d073c9bdbddf4235c7539384628cfc Mon Sep 17 00:00:00 2001 From: dhusmann Date: Fri, 9 Jan 2026 20:48:38 -0800 Subject: [PATCH 13/19] Record pytest-suite run 2026-01-09 --- docs/test_history/adding_new_peak_callers.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/test_history/adding_new_peak_callers.md diff --git a/docs/test_history/adding_new_peak_callers.md b/docs/test_history/adding_new_peak_callers.md new file mode 100644 index 00000000..7b784ffa --- /dev/null +++ b/docs/test_history/adding_new_peak_callers.md @@ -0,0 +1,5 @@ +## 2026-01-09 — pytest-suite (Slurm) +- JobID/run dir: 13170828 — /scratch/users/dhusmann/nextflow-work/runs/pytest-suite/20260109_183946 +- Command: pytest-suite (pytest-workflow) --maxfail=1 --kwdof --symlink --ga --color=yes (via pytest_suite.sh) +- Result: 323 passed, 0 failed (2:05:41), SLURM COMPLETED 0:0 +- Failures observed (prior attempt 13116667): Nextflow compile error "Variable ch_seacr_pairs/ch_macs_pairs already defined" in subworkflows/local/peak_calling_extended.nf; fix in 1491b92 (remove def on ch_*_inputs) From 50d0df8ccef74c420b8ba2b8cf0b71ecdb4fa51c Mon Sep 17 00:00:00 2001 From: dhusmann Date: Tue, 20 Jan 2026 14:28:39 -0800 Subject: [PATCH 14/19] Fix SPAN outputs and peak QC bedtools --- conf/modules.config | 3 ++- modules/local/span_omnipeaks_analyze.nf | 9 +++++---- subworkflows/local/peak_qc.nf | 23 +++++++++++++++++++++-- workflows/cutandrun.nf | 1 + 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 93784de3..9e923f37 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -843,7 +843,7 @@ if(params.run_peak_calling && params.callers && params.callers.contains('seacr') if(params.run_peak_calling) { process { withName: '.*:AWK_NAME_PEAK_BED' { - ext.command = "'{OFS = \"\\t\"} {print \$0, FILENAME}'" + ext.command = "'BEGIN{OFS = \"\\t\"} \$1 !~ /^#/ && \$1 != \"Chromosome\" && \$1 != \"chromosome\" && \$1 != \"track\" && \$1 != \"browser\" {print \$0, FILENAME}'" ext.ext = "bed" publishDir = [ enabled: false @@ -1105,6 +1105,7 @@ if (params.run_reporting && params.run_peak_qc) { withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:BEDTOOLS_INTERSECT' { ext.args = "-C -sorted -f ${params.min_peak_overlap}" + ext.prefix = { "${meta.id}.intersect" } publishDir = [ enabled: false ] diff --git a/modules/local/span_omnipeaks_analyze.nf b/modules/local/span_omnipeaks_analyze.nf index 1a01608a..86d9473d 100644 --- a/modules/local/span_omnipeaks_analyze.nf +++ b/modules/local/span_omnipeaks_analyze.nf @@ -4,8 +4,8 @@ process SPAN_OMNIPEAKS_ANALYZE { conda "conda-forge::openjdk=21.0.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://eclipse-temurin:21-jre' : - 'eclipse-temurin:21-jre' }" + 'docker://eclipse-temurin:21-jdk' : + 'eclipse-temurin:21-jdk' }" input: tuple val(meta), path(treatment_bam), path(control_bam) @@ -24,10 +24,11 @@ process SPAN_OMNIPEAKS_ANALYZE { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}_${meta.caller}" + def raw_prefix = task.ext.prefix ?: "${meta.id}_${meta.caller}" + def prefix = raw_prefix.endsWith('.peak') ? raw_prefix : "${raw_prefix}.peak" def fdr_arg = fdr ? "--fdr ${fdr}" : '' """ - java -Xmx${java_heap} -jar ${omnipeaks_jar} analyze \ + java --add-modules jdk.incubator.vector -Xmx${java_heap} -jar ${omnipeaks_jar} analyze \ -t ${treatment_bam} \ -c ${control_bam} \ --cs ${chrom_sizes} \ diff --git a/subworkflows/local/peak_qc.nf b/subworkflows/local/peak_qc.nf index c2b49fc3..cdd5ccd4 100644 --- a/subworkflows/local/peak_qc.nf +++ b/subworkflows/local/peak_qc.nf @@ -6,6 +6,7 @@ include { PEAK_FRIP } from "../../modules/local/peak_ include { PEAK_COUNTS as PRIMARY_PEAK_COUNTS } from "../../modules/local/peak_counts" include { PEAK_COUNTS as CONSENSUS_PEAK_COUNTS } from "../../modules/local/peak_counts" include { CUT as CUT_CALC_REPROD } from "../../modules/local/linux/cut" +include { BEDTOOLS_SORT as PEAKQC_BEDTOOLS_SORT } from "../../modules/local/for_patch/bedtools/sort/main" include { BEDTOOLS_INTERSECT } from "../../modules/nf-core/bedtools/intersect/main.nf" include { CALCULATE_PEAK_REPROD } from "../../modules/local/python/peak_reprod" include { PLOT_CONSENSUS_PEAKS } from '../../modules/local/python/plot_consensus_peaks' @@ -20,6 +21,7 @@ workflow PEAK_QC { peaks_with_ids // channel: [ val(meta), [ bed ] ] consensus_peaks // channel: [ val(meta), [ bed ] ] consensus_peaks_unfiltered // channel: [ val(meta), [ bed ] ] + chrom_sizes // channel: [ path ] fragments_bed // channel: [ val(meta), [ bed ] ] flagstat // channel: [ val(meta), [ flagstat ] ] min_frip_overlap // val @@ -86,10 +88,27 @@ workflow PEAK_QC { ) ch_versions = ch_versions.mix(CUT_CALC_REPROD.out.versions) + /* + * CHANNEL: Normalize chrom_sizes to a single value for bedtools -g + */ + ch_chrom_sizes_single = chrom_sizes + .collect() + .map { it instanceof List ? it[0] : it } + + /* + * MODULE: Sort repro beds with genome order to satisfy -sorted intersect + */ + PEAKQC_BEDTOOLS_SORT( + CUT_CALC_REPROD.out.file, + "bed", + ch_chrom_sizes_single + ) + ch_versions = ch_versions.mix(PEAKQC_BEDTOOLS_SORT.out.versions) + /* * CHANNEL: Group samples based on group and filter for groups that have more than one file */ - CUT_CALC_REPROD.out.file + PEAKQC_BEDTOOLS_SORT.out.sorted .map { row -> def group_key = consensus_grouping == 'group_condition' ? row[0].group_condition : row[0].group [ "${group_key}__${row[0].caller}", row[1], row[0] ] @@ -131,7 +150,7 @@ workflow PEAK_QC { */ BEDTOOLS_INTERSECT ( ch_beds_intersect, - [[:],[]] + ch_chrom_sizes_single.map { [[:], it] } ) ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions) //EXAMPLE CHANNEL STRUCT: [[META], BED] diff --git a/workflows/cutandrun.nf b/workflows/cutandrun.nf index 753cadae..6a9009c2 100644 --- a/workflows/cutandrun.nf +++ b/workflows/cutandrun.nf @@ -864,6 +864,7 @@ workflow CUTANDRUN { ch_peaks_with_ids_all, ch_consensus_peaks_all, ch_consensus_peaks_unfilt_all, + PREPARE_GENOME.out.chrom_sizes, EXTRACT_FRAGMENTS.out.bed, ch_flagstat_target, params.min_frip_overlap, From 5cc7de00a0921cfd853ee77b00e76333e54c08d2 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Tue, 20 Jan 2026 15:08:24 -0800 Subject: [PATCH 15/19] Fix peak QC repro intersect format --- conf/modules.config | 3 ++- subworkflows/local/peak_qc.nf | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9e923f37..6b9f3a13 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1095,7 +1095,8 @@ if (params.run_reporting && params.run_peak_qc) { } withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:CUT_CALC_REPROD' { - ext.args = "-f 1,2,3,6" + ext.args = "-f 1-3" + ext.command = '| awk -v OFS="\\t" \'{print $1, $2, $3, $1":"$2"-"$3}\'' ext.suffix = ".repro" ext.ext = "bed" publishDir = [ diff --git a/subworkflows/local/peak_qc.nf b/subworkflows/local/peak_qc.nf index cdd5ccd4..a54fbeac 100644 --- a/subworkflows/local/peak_qc.nf +++ b/subworkflows/local/peak_qc.nf @@ -137,7 +137,9 @@ workflow PEAK_QC { row[1].each{ file -> def files_copy = row[1].collect() files_copy.remove(files_copy.indexOf(file)) - new_output.add([[id: file.name.split("\\.")[0]], file, files_copy]) + def file_id = file.name.split("\\.")[0] + def meta_out = row[0] + [id: file_id, sample_id: file_id] + new_output.add([meta_out, file, files_copy]) } new_output } From 3f05d4ac992748ae331e689403fbb15d22b958ba Mon Sep 17 00:00:00 2001 From: dhusmann Date: Tue, 20 Jan 2026 18:13:48 -0800 Subject: [PATCH 16/19] Harden dumpsoftwareversions + bump frag len mem --- conf/resources.config | 2 +- modules/local/custom_dumpsoftwareversions.nf | 29 +++++++++++++++++++- modules/local/epic2_callpeak.nf | 2 +- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/conf/resources.config b/conf/resources.config index d6cb5942..d87d6ac2 100644 --- a/conf/resources.config +++ b/conf/resources.config @@ -40,7 +40,7 @@ if(params.run_reporting) { process { withName: 'NFCORE_CUTANDRUN:CUTANDRUN:FRAG_LEN_HIST' { cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 32.GB * task.attempt, 'memory' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } } } diff --git a/modules/local/custom_dumpsoftwareversions.nf b/modules/local/custom_dumpsoftwareversions.nf index 0fe4d309..4ccd1aa7 100644 --- a/modules/local/custom_dumpsoftwareversions.nf +++ b/modules/local/custom_dumpsoftwareversions.nf @@ -25,6 +25,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { import yaml import platform + import json from textwrap import dedent def _make_versions_html(versions): @@ -119,7 +120,33 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { } with open("$versions") as f: - workflow_versions = yaml.load(f, Loader=yaml.BaseLoader) | module_versions + raw_lines = f.read().splitlines() + + # Sanitize potentially noisy version strings (eg. warnings) into valid YAML + clean_lines = [] + for line in raw_lines: + if not line.strip(): + continue + if line.startswith('"'): + clean_lines.append(line) + continue + stripped = line.lstrip(' ') + indent = line[:len(line) - len(stripped)] + if ':' not in stripped: + # Drop stray lines (eg. warning continuations) + continue + key_part, value = stripped.split(':', 1) + key = f"{indent}{key_part}:" + value = value.strip() + if value == "": + clean_lines.append(line) + continue + if not (value.startswith('"') or value.startswith("'")): + clean_lines.append(f"{key} {json.dumps(value)}") + else: + clean_lines.append(line) + + workflow_versions = yaml.load("\\n".join(clean_lines) + "\\n", Loader=yaml.BaseLoader) | module_versions workflow_versions["Workflow"] = { "Nextflow": "$workflow.nextflow.version", diff --git a/modules/local/epic2_callpeak.nf b/modules/local/epic2_callpeak.nf index a6e5a79b..45a46607 100644 --- a/modules/local/epic2_callpeak.nf +++ b/modules/local/epic2_callpeak.nf @@ -37,7 +37,7 @@ process EPIC2_CALLPEAK { cat <<-END_VERSIONS > versions.yml "${task.process}": - epic2: \$(epic2 --version 2>&1 | head -n 1 | sed -e 's/.*epic2 //g' || echo "unknown") + epic2: \$(epic2 --version 2>&1 | grep -Eo '[0-9]+(\\.[0-9]+)+' | head -n 1 || echo "unknown") END_VERSIONS """ } From 4cbb959d179d3b0c954dd8ef04a9ac21c38eb590 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Tue, 20 Jan 2026 18:19:50 -0800 Subject: [PATCH 17/19] Fix FRiP awk computation --- modules/local/peak_frip.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/peak_frip.nf b/modules/local/peak_frip.nf index 24f9a49d..2a88bd09 100644 --- a/modules/local/peak_frip.nf +++ b/modules/local/peak_frip.nf @@ -25,7 +25,7 @@ process PEAK_FRIP { """ READS_IN_PEAKS=\$(bedtools intersect -a ${fragments_bed} -b ${peaks_bed} -bed -c -f $min_frip_overlap | awk -F '\t' '{sum += \$NF} END {print sum * 2}') MAPPED_READS=\$(grep -m 1 'mapped (' ${flagstat} | awk '{print \$1}') - FRIP_SCORE=\$(awk -v a="\$READS_IN_PEAKS" -v b="\$MAPPED_READS" 'BEGIN {OFS="\\t"} {if (b > 0) {print a / b} else {print 0}}') + FRIP_SCORE=\$(awk -v a="\$READS_IN_PEAKS" -v b="\$MAPPED_READS" 'BEGIN {OFS="\\t"; if (b > 0) {print a / b} else {print 0}}') printf "%s\\n" "\$FRIP_SCORE" > ${prefix}_frip_score.txt printf "Peak FRiP Score\\t%s\\n" "\$FRIP_SCORE" | cat $frip_score_header - > ${prefix}_mqc.tsv From adef1f0a43f82e4304af1e264d75be07aa202d7b Mon Sep 17 00:00:00 2001 From: dhusmann Date: Tue, 20 Jan 2026 21:41:29 -0800 Subject: [PATCH 18/19] Fix callers init and pre-dedup stats for peak prep --- lib/WorkflowCutandrun.groovy | 44 ++++++++++++++++++++++++++++++++++++ workflows/cutandrun.nf | 17 +++++++++++--- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/lib/WorkflowCutandrun.groovy b/lib/WorkflowCutandrun.groovy index 213d5015..62edea95 100755 --- a/lib/WorkflowCutandrun.groovy +++ b/lib/WorkflowCutandrun.groovy @@ -18,6 +18,9 @@ class WorkflowCutandrun { params.consensus_grouping = has_condition == false ? 'group' : 'group_condition' } + // Resolve peak callers after params are fully loaded (e.g. from -params-file) + resolveCallers(params) + genomeExistsError(params, log) validatePeakCallerParams(params, log) @@ -95,6 +98,47 @@ class WorkflowCutandrun { } } + // + // Resolve caller list with correct precedence + // + private static void resolveCallers(params) { + def caller_preset_map = [ + standard: ['seacr'], + extended: [ + 'macs2_narrow', + 'macs2_broad', + 'gopeaks_narrow', + 'gopeaks_broad', + 'epic2_200bp', + 'epic2_150bp', + 'epic2_25bp', + 'span_default', + 'span_stringent' + ] + ] + + def callers = [] + if (params.callers) { + if (params.callers instanceof String) { + callers = params.callers.split(',').collect { it.trim().toLowerCase() }.findAll { it } + } else if (params.callers instanceof List) { + callers = params.callers.collect { it.toString().trim().toLowerCase() }.findAll { it } + } + } + + if (params.peakcaller) { + callers = params.peakcaller.split(',').collect { it.trim().toLowerCase() }.findAll { it } + } else if (!callers) { + if (params.peakcaller_preset && params.peakcaller_preset.toLowerCase() == 'extended') { + callers = caller_preset_map.extended + } else { + callers = caller_preset_map.standard + } + } + + params.callers = callers + } + // // Detect whether the input samplesheet includes a condition column // diff --git a/workflows/cutandrun.nf b/workflows/cutandrun.nf index 6a9009c2..e701247d 100644 --- a/workflows/cutandrun.nf +++ b/workflows/cutandrun.nf @@ -390,6 +390,11 @@ workflow CUTANDRUN { * SUBWORKFLOW: Remove duplicates - default is on IgG controls only */ if (params.run_remove_dups) { + // Preserve pre-dedup stats for targets when only controls are deduplicated + ch_stats_pre = ch_samtools_stats + ch_flagstat_pre = ch_samtools_flagstat + ch_idxstats_pre = ch_samtools_idxstats + DEDUPLICATE_PICARD ( ch_samtools_bam, ch_samtools_bai, @@ -399,9 +404,15 @@ workflow CUTANDRUN { ) ch_samtools_bam = DEDUPLICATE_PICARD.out.bam ch_samtools_bai = DEDUPLICATE_PICARD.out.bai - ch_samtools_stats = DEDUPLICATE_PICARD.out.stats - ch_samtools_flagstat = DEDUPLICATE_PICARD.out.flagstat - ch_samtools_idxstats = DEDUPLICATE_PICARD.out.idxstats + if (params.dedup_target_reads) { + ch_samtools_stats = DEDUPLICATE_PICARD.out.stats + ch_samtools_flagstat = DEDUPLICATE_PICARD.out.flagstat + ch_samtools_idxstats = DEDUPLICATE_PICARD.out.idxstats + } else { + ch_samtools_stats = DEDUPLICATE_PICARD.out.stats.mix(ch_stats_pre.filter { it[0].is_control == false }) + ch_samtools_flagstat = DEDUPLICATE_PICARD.out.flagstat.mix(ch_flagstat_pre.filter { it[0].is_control == false }) + ch_samtools_idxstats = DEDUPLICATE_PICARD.out.idxstats.mix(ch_idxstats_pre.filter { it[0].is_control == false }) + } ch_software_versions = ch_software_versions.mix(DEDUPLICATE_PICARD.out.versions) } //EXAMPLE CHANNEL STRUCT: [[id:h3k27me3_R1, group:h3k27me3, replicate:1, single_end:false, is_control:false], [BAM]] From f345f8e2254148aa71e2e493e87743ae8b33c311 Mon Sep 17 00:00:00 2001 From: dhusmann Date: Tue, 20 Jan 2026 23:59:59 -0800 Subject: [PATCH 19/19] Avoid MultiQC collisions for peak reproducibility --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index 6b9f3a13..43693c7b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1113,6 +1113,7 @@ if (params.run_reporting && params.run_peak_qc) { } withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PEAK_QC:CALCULATE_PEAK_REPROD' { + ext.prefix = { "${meta.sample_id ?: meta.id}_${meta.caller}" } publishDir = [ enabled: false ]