Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
7611f07
Add differential peak calling extension
dhusmann Jan 4, 2026
d7bddb2
Apply diffbind extra params
dhusmann Jan 4, 2026
32264bc
Gate input check for differential-only entry
dhusmann Jan 4, 2026
528d3f9
Fix differential summary inputs
dhusmann Jan 4, 2026
14e761e
Handle missing Java for SPAN native mode
dhusmann Jan 4, 2026
315dcc6
Fix differential-only init and annotation
dhusmann Jan 4, 2026
17fa79c
Deterministic SPAN peak selection
dhusmann Jan 4, 2026
3f6f828
Fix differential-only entry detection
dhusmann Jan 5, 2026
da6d95a
Guard chrom sizes for Chipbinner
dhusmann Jan 5, 2026
ccaab64
Use first() for gene bed
dhusmann Jan 5, 2026
475cb83
Filter samples by contrast labels
dhusmann Jan 5, 2026
cf3d463
Fail fast on missing manifests
dhusmann Jan 5, 2026
ee4715b
Mark manifest-only summaries as skipped
dhusmann Jan 5, 2026
40072bc
Fallback entry detection on differential_from_run
dhusmann Jan 5, 2026
0da6f43
Implement real ChIPBinner workflow
dhusmann Jan 5, 2026
093a5e5
Fix DiffBind error handling and plots
dhusmann Jan 5, 2026
a4b9f34
Fix differential annotation gene bed
dhusmann Jan 5, 2026
5147d64
Implement SPAN/OmniPeak differential native+fallback
dhusmann Jan 5, 2026
88a3a83
Add fast differential tests and fix status publishing
dhusmann Jan 5, 2026
96e3516
Tighten SPAN jar checks and native orientation
dhusmann Jan 5, 2026
8e019bc
Add chipbinner input manifest support
dhusmann Jan 5, 2026
b55661e
Improve ChIPBinner normalization and caching
dhusmann Jan 5, 2026
012488b
Add tests for chipbinner allow-partial and caching
dhusmann Jan 5, 2026
7e3da87
Expand differential analysis documentation
dhusmann Jan 5, 2026
189eb42
Fix differential status semantics and SPAN normalization
dhusmann Jan 5, 2026
3a6f365
Use biocontainers base image for chipbinner/span
dhusmann Jan 9, 2026
89d0c0a
Gate differential-only annotation inputs for manifest-only
dhusmann Jan 9, 2026
7e1b620
Mark chipbinner/span disabled in summary
dhusmann Jan 9, 2026
87fd3ec
Handle small log2FC in headerless SPAN output
dhusmann Jan 9, 2026
6ceb3c2
Validate genome_id when reusing cached windows
dhusmann Jan 9, 2026
c0b900b
Allow SPAN auto without fasta in differential-only
dhusmann Jan 9, 2026
f8a6b37
Fix differential-only detection and spike-in validation
dhusmann Jan 9, 2026
a3e2ed0
Align ChIPBinner log2FC pseudocount
dhusmann Jan 9, 2026
b8d24df
Apply ChIPBinner pseudocount after CPM
dhusmann Jan 9, 2026
ce0ce4e
Require chrom sizes for SPAN auto and fix log2FC
dhusmann Jan 9, 2026
674cdb0
Pass chrom sizes path correctly
dhusmann Jan 9, 2026
1dc541f
Default posthoc chipbinner windows cache
dhusmann Jan 9, 2026
2a00b69
Gunzip FASTA before getchromsizes in posthoc
dhusmann Jan 9, 2026
c10dc6b
Clarify spike-in scaling direction
dhusmann Jan 9, 2026
e6739df
Preserve spike-in/MS scaling through CPM
dhusmann Jan 9, 2026
1d9d18a
Fix manifest paths and cache fallback
dhusmann Jan 9, 2026
c86079c
Handle remote gene_bed in differential-only
dhusmann Jan 9, 2026
18ae889
Clamp HDBSCAN grid to dataset size
dhusmann Jan 9, 2026
bc423f2
Skip CPM when scaling applied
dhusmann Jan 9, 2026
687d16d
Relax cached windows blacklist matching
dhusmann Jan 9, 2026
d3d4c9e
Require spike-in factors when enabled
dhusmann Jan 9, 2026
dd781b6
Allow partial on windows lookup
dhusmann Jan 9, 2026
4725d67
Derive spike-in usage from manifests
dhusmann Jan 9, 2026
865bd9b
Fix differential summary normalization and SPAN mode validation
dhusmann Jan 10, 2026
b3341a6
Update differential workflow outputs and tests
dhusmann Jan 12, 2026
a64a69c
Record pytest suite history for differential_peak_calling_2
dhusmann Jan 12, 2026
2b09152
Guard spike-in broadcast for scalar values
dhusmann Jan 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions assets/chrom_sizes_stub.sizes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chr1 1
1 change: 1 addition & 0 deletions assets/differential_summary_stub.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions assets/ms_coeffs_stub.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sample_id ms_coeff
10 changes: 10 additions & 0 deletions assets/multiqc/chipbinner_summary_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#id: 'chipbinner_summary'
#parent_id: 'differential_analysis'
#parent_name: 'Differential analysis'
#parent_description: 'Differential peak calling outputs'
#section_name: 'ChIPBinner summary'
#description: 'Per-group ChIPBinner summary metrics.'
#plot_type: 'table'
#pconfig:
# id: 'chipbinner_summary_table'
# title: 'ChIPBinner summary'
10 changes: 10 additions & 0 deletions assets/multiqc/differential_design_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#id: 'differential_design'
#parent_id: 'differential_analysis'
#parent_name: 'Differential analysis'
#parent_description: 'Differential peak calling outputs'
#section_name: 'Differential analysis design'
#description: 'Eligibility and design status for differential comparisons.'
#plot_type: 'table'
#pconfig:
# id: 'differential_design_table'
# title: 'Differential design'
10 changes: 10 additions & 0 deletions assets/multiqc/differential_summary_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#id: 'differential_summary'
#parent_id: 'differential_analysis'
#parent_name: 'Differential analysis'
#parent_description: 'Differential peak calling outputs'
#section_name: 'Differential analysis summary'
#description: 'Summary of differential analysis across methods.'
#plot_type: 'table'
#pconfig:
# id: 'differential_summary_table'
# title: 'Differential summary'
10 changes: 10 additions & 0 deletions assets/multiqc/span_summary_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#id: 'span_summary'
#parent_id: 'differential_analysis'
#parent_name: 'Differential analysis'
#parent_description: 'Differential peak calling outputs'
#section_name: 'SPAN summary'
#description: 'Per-group SPAN summary metrics.'
#plot_type: 'table'
#pconfig:
# id: 'span_summary_table'
# title: 'SPAN summary'
5 changes: 5 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ report_section_order:

custom_content:
order:
- differential_design
- differential_summary
- primary_peak_counts
- consensus_peak_counts
- primary_frip_score
Expand Down Expand Up @@ -94,6 +96,9 @@ section_comments:

# Customise the module search patterns to speed up execution time
sp:
custom_content:
fn: "*_mqc.tsv"

cutadapt:
fn: "*trimming_report.txt"

Expand Down
18 changes: 18 additions & 0 deletions assets/multiqc_differential_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
report_comment: >
Differential analysis report generated by nf-core/cutandrun. The summary table
includes method-specific metrics, with dedicated ChIPBinner/SPAN sections for
detailed per-method summaries.

run_modules:
- custom_content

custom_content:
order:
- differential_design
- differential_summary
- chipbinner_summary
- span_summary

sp:
custom_content:
fn: "*_mqc.tsv"
103 changes: 103 additions & 0 deletions bin/annotate_regions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python3
import argparse
import csv
import os
import subprocess
import tempfile


def read_tsv(path):
with open(path, "r", newline="") as handle:
reader = csv.reader(handle, delimiter="\t")
rows = [row for row in reader]
return rows


def is_header(row):
if len(row) < 3:
return True
try:
int(row[1])
return False
except Exception:
return True


def main():
parser = argparse.ArgumentParser(description="Annotate regions with nearest gene")
parser.add_argument("--regions", required=True)
parser.add_argument("--gene-bed", required=True)
parser.add_argument("--out", required=True)
args = parser.parse_args()

out_dir = os.path.dirname(args.out)
if out_dir:
os.makedirs(out_dir, exist_ok=True)

rows = read_tsv(args.regions)
if not rows:
with open(args.out, "w") as handle:
handle.write("")
return
Comment thread
dhusmann marked this conversation as resolved.

header = None
data_rows = rows
if is_header(rows[0]):
header = rows[0]
data_rows = rows[1:]

annotation_cols = ["nearest_feature_id", "nearest_gene_name", "distance_to_feature"]

if not data_rows:
with open(args.out, "w") as handle:
if header:
handle.write("\t".join(header + annotation_cols) + "\n")
return

with tempfile.TemporaryDirectory() as tmpdir:
bed_path = os.path.join(tmpdir, "regions.bed")
with open(bed_path, "w") as handle:
for idx, row in enumerate(data_rows):
handle.write(f"{row[0]}\t{row[1]}\t{row[2]}\t{idx}\n")

cmd = [
"bedtools",
"closest",
"-d",
"-a",
bed_path,
"-b",
args.gene_bed,
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
annotations = {}
for line in result.stdout.strip().split("\n"):
if not line:
continue
parts = line.split("\t")
idx = int(parts[3])
a_cols = 4
distance = parts[-1] if parts else "NA"
b_cols = len(parts) - a_cols - 1
gene_id = "NA"
gene_name = "NA"
if b_cols >= 4:
gene_id = parts[a_cols + 3]
if b_cols >= 5:
gene_name = parts[a_cols + 4]
if gene_id in [".", ""]:
gene_id = "NA"
if gene_name in [".", ""]:
gene_name = "NA"
annotations[idx] = (gene_id, gene_name, distance)

with open(args.out, "w") as handle:
if header:
handle.write("\t".join(header + annotation_cols) + "\n")
for idx, row in enumerate(data_rows):
gene_id, gene_name, distance = annotations.get(idx, ("NA", "NA", "NA"))
handle.write("\t".join(row + [gene_id, gene_name, distance]) + "\n")


if __name__ == "__main__":
main()
89 changes: 89 additions & 0 deletions bin/chipbinner_rots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env Rscript

args <- commandArgs(trailingOnly = TRUE)
if (length(args) %% 2 != 0) {
stop("Arguments must be provided as --key value pairs")
}
arg_map <- list()
for (i in seq(1, length(args), by = 2)) {
key <- sub("^--", "", args[i])
arg_map[[key]] <- args[i + 1]
}

matrix_path <- arg_map[["matrix"]]
samplesheet_path <- arg_map[["samplesheet"]]
treated_label <- arg_map[["treated"]]
control_label <- arg_map[["control"]]
bootstrap <- as.integer(arg_map[["bootstrap"]])
k_value <- as.integer(arg_map[["k-value"]])
out_path <- arg_map[["out"]]

if (is.null(matrix_path) || is.null(samplesheet_path) || is.null(out_path)) {
stop("Missing required arguments")
}

suppressMessages(library(ROTS))

mat <- read.table(matrix_path, header = TRUE, sep = "\t", check.names = FALSE, row.names = 1)
mat <- as.matrix(mat)

samples <- read.csv(samplesheet_path, header = TRUE, stringsAsFactors = FALSE)
subset <- samples[samples$condition %in% c(treated_label, control_label), ]
if (nrow(subset) == 0) {
stop("No samples match contrast labels")
}

sample_ids <- subset$sample_id
missing <- setdiff(sample_ids, colnames(mat))
if (length(missing) > 0) {
stop(paste("Missing samples in matrix:", paste(missing, collapse = ",")))
}

mat <- mat[, sample_ids, drop = FALSE]

classes <- ifelse(subset$condition == treated_label, 1, 2)

rot <- ROTS(mat, groups = classes, B = bootstrap, K = k_value)

pvals <- NULL
fdrs <- NULL

if (!is.null(rot$pvalue)) {
pvals <- rot$pvalue
} else if (!is.null(rot$pval)) {
pvals <- rot$pval
}

if (is.null(pvals)) {
if ("pvalue" %in% slotNames(rot)) {
pvals <- slot(rot, "pvalue")
}
}

if (is.null(pvals)) {
if (exists("pvalue", where = asNamespace("ROTS"), mode = "function")) {
pvals <- ROTS::pvalue(rot)
}
}

if (!is.null(rot$FDR)) {
fdrs <- rot$FDR
} else if (!is.null(rot$fdr)) {
fdrs <- rot$fdr
}

if (is.null(fdrs) && !is.null(pvals)) {
fdrs <- p.adjust(pvals, method = "BH")
}

if (is.null(pvals)) {
stop("Unable to extract p-values from ROTS output")
}

out <- data.frame(
bin_id = rownames(mat),
pval = as.numeric(pvals),
FDR = as.numeric(fdrs)
)

write.table(out, file = out_path, sep = "\t", quote = FALSE, row.names = FALSE)
Loading
Loading