dhusmann · dhusmann · Jan 4, 2026 · Jan 5, 2026 · Jan 5, 2026 · Jan 5, 2026
diff --git a/assets/multiqc/differential_skipped_header.txt b/assets/multiqc/differential_skipped_header.txt
@@ -0,0 +1,11 @@
+#id: 'differential_skipped'
+#parent_id: 'peak_qc'
+#parent_name: 'Peak QC'
+#parent_description: 'Differential peak calling skipped comparisons'
+#section_name: 'Differential Peak Calling Skipped'
+#description: 'Comparisons skipped due to missing conditions or replicate constraints'
+#plot_type: 'table'
+#anchor: 'differential_skipped'
+#pconfig:
+#    id: 'differential_skipped_table'
+#    title: 'Differential Peak Calling Skipped'
diff --git a/assets/multiqc/differential_summary_header.txt b/assets/multiqc/differential_summary_header.txt
@@ -0,0 +1,11 @@
+#id: 'differential_summary'
+#parent_id: 'peak_qc'
+#parent_name: 'Peak QC'
+#parent_description: 'Differential peak calling summary'
+#section_name: 'Differential Peak Calling Summary'
+#description: 'Counts of significant differential regions per method, group, and caller'
+#plot_type: 'table'
+#anchor: 'differential_summary'
+#pconfig:
+#    id: 'differential_summary_table'
+#    title: 'Differential Peak Calling Summary'
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -68,6 +68,8 @@ custom_content:
     - consensus_peak_counts
     - primary_frip_score
     - peak_reprod_perc
+    - differential_summary
+    - differential_skipped
     - software-versions-by-process
     - software-versions-unique
 

diff --git a/bin/annotate_regions.py b/bin/annotate_regions.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+import argparse
+import csv
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+
+def detect_cols(header):
+    header_lower = [h.lower() for h in header]
+    def find(names):
+        for name in names:
+            if name in header_lower:
+                return header[header_lower.index(name)]
+        return None
+    chr_col = find(["chr", "chrom", "seqnames", "seqname"])
+    start_col = find(["start"])
+    end_col = find(["end"])
+    return chr_col, start_col, end_col
+
+
+def parse_gtf_attributes(attr_str):
+    attrs = {}
+    for item in attr_str.strip().split(";"):
+        item = item.strip()
+        if not item:
+            continue
+        if " " not in item:
+            continue
+        key, value = item.split(" ", 1)
+        value = value.strip().strip('"')
+        attrs[key] = value
+    return attrs
+
+
+def gtf_to_tss_bed(gtf_path, out_path):
+    with gtf_path.open() as in_handle, out_path.open("w") as out_handle:
+        for line in in_handle:
+            if not line.strip() or line.startswith("#"):
+                continue
+            fields = line.rstrip().split("\t")
+            if len(fields) < 9:
+                continue
+            chrom, _, _, start, end, _, strand, _, attrs = fields
+            try:
+                start_i = int(start) - 1
+                end_i = int(end)
+            except ValueError:
+                continue
+            if start_i < 0:
+                start_i = 0
+            if strand == "+":
+                tss_start = start_i
+            elif strand == "-":
+                tss_start = max(end_i - 1, 0)
+            else:
+                continue
+            tss_end = tss_start + 1
+            attr_map = parse_gtf_attributes(attrs)
+            name = (
+                attr_map.get("gene_name")
+                or attr_map.get("gene_id")
+                or attr_map.get("transcript_id")
+                or attr_map.get("ID")
+                or "NA"
+            )
+            out_handle.write("\t".join([chrom, str(tss_start), str(tss_end), name]) + "\n")
+
+
+def run(cmd, stdout=None):
+    result = subprocess.run(cmd, stdout=stdout, stderr=subprocess.PIPE, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{result.stderr}")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", required=True)
+    parser.add_argument("--output", required=True)
+    parser.add_argument("--features", default=None)
+    parser.add_argument("--gtf", default=None)
+    args = parser.parse_args()
+
+    input_path = Path(args.input)
+    output_path = Path(args.output)
+
+    with input_path.open() as handle:
+        reader = csv.reader(handle, delimiter='\t')
+        header = next(reader)
+
+    chr_col, start_col, end_col = detect_cols(header)
+    if not chr_col or not start_col or not end_col:
+        raise SystemExit("Could not detect chr/start/end columns for annotation")
+
+    features_bed = None
+    if args.features and Path(args.features).exists():
+        features_bed = Path(args.features)
+    elif args.gtf and Path(args.gtf).exists():
+        tss_bed = Path("features.tss.bed")
+        gtf_to_tss_bed(Path(args.gtf), tss_bed)
+        features_bed = tss_bed
+
+    if not features_bed or not features_bed.exists():
+        # no annotation possible; copy input to output
+        output_path.write_text(input_path.read_text())
+        return
+
+    features_has_name = False
+    feature_name_idx = None
+    with features_bed.open() as handle:
+        for line in handle:
+            if not line.strip() or line.startswith("#"):
+                continue
+            features_cols = len(line.rstrip().split("\t"))
+            features_has_name = features_cols >= 4
+            if features_has_name:
+                feature_name_idx = 4 + 3
+            break
+
+    regions_bed = Path("regions.bed")
+    with input_path.open() as handle, regions_bed.open("w") as out_handle:
+        reader = csv.DictReader(handle, delimiter='\t')
+        for idx, row in enumerate(reader):
+            out_handle.write("\t".join([
+                row[chr_col],
+                str(row[start_col]),
+                str(row[end_col]),
+                str(idx)
+            ]) + "\n")
+
+    closest_out = Path("closest.tsv")
+    run(["bedtools", "closest", "-d", "-a", str(regions_bed), "-b", str(features_bed)], stdout=closest_out.open("w"))
+
+    annotations = {}
+    with closest_out.open() as handle:
+        for line in handle:
+            if not line.strip():
+                continue
+            fields = line.rstrip().split("\t")
+            if len(fields) < 5:
+                continue
+            row_id = int(fields[3])
+            feature_id = "NA"
+            if feature_name_idx is not None and len(fields) > feature_name_idx:
+                feature_id = fields[feature_name_idx]
+            distance = fields[-1]
+            annotations[row_id] = (feature_id, distance)
+
+    with input_path.open() as handle, output_path.open("w") as out_handle:
+        reader = csv.DictReader(handle, delimiter='\t')
+        fieldnames = reader.fieldnames + ["nearest_feature_id", "distance_to_feature"]
+        writer = csv.DictWriter(out_handle, delimiter='\t', fieldnames=fieldnames, lineterminator='\n')
+        writer.writeheader()
+        for idx, row in enumerate(reader):
+            feature_id, distance = annotations.get(idx, ("NA", "NA"))
+            row["nearest_feature_id"] = feature_id
+            row["distance_to_feature"] = distance
+            writer.writerow(row)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/chipbinner_lola.R b/bin/chipbinner_lola.R
@@ -0,0 +1,68 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages({
+    library(optparse)
+    library(LOLA)
+    library(GenomicRanges)
+    library(rtracklayer)
+    library(ggplot2)
+})
+
+option_list <- list(
+    make_option(c("--bed"), type = "character"),
+    make_option(c("--universe"), type = "character"),
+    make_option(c("--db"), type = "character"),
+    make_option(c("--label"), type = "character"),
+    make_option(c("--out_tsv"), type = "character"),
+    make_option(c("--out_pdf"), type = "character", default = "")
+)
+
+opt <- parse_args(OptionParser(option_list = option_list))
+
+if (!dir.exists(opt$db)) {
+    stop(paste("LOLA database not found:", opt$db))
+}
+
+bed_lines <- readLines(opt$bed, warn = FALSE)
+bed_lines <- bed_lines[!grepl("^#", bed_lines)]
+if (length(bed_lines) == 0) {
+    write.table(data.frame(), opt$out_tsv, sep = "\t", quote = FALSE, row.names = FALSE)
+    if (opt$out_pdf != "") {
+        pdf(opt$out_pdf)
+        plot.new()
+        text(0.5, 0.5, "No regions available for enrichment")
+        dev.off()
+    }
+    quit(status = 0)
+}
+
+user_set <- rtracklayer::import(opt$bed, format = "bed")
+universe <- rtracklayer::import(opt$universe, format = "bed")
+region_db <- LOLA::loadRegionDB(opt$db)
+
+user_sets <- GRangesList()
+user_sets[[opt$label]] <- user_set
+
+res <- LOLA::runLOLA(user_sets, universe, region_db)
+if (!is.null(res) && nrow(res) > 0) {
+    res <- res[res$userSet == opt$label, , drop = FALSE]
+}
+
+write.table(res, opt$out_tsv, sep = "\t", quote = FALSE, row.names = FALSE)
+
+if (opt$out_pdf != "" && !is.null(res) && nrow(res) > 0) {
+    top <- res[order(res$pValue), , drop = FALSE]
+    if (nrow(top) > 20) {
+        top <- top[1:20, , drop = FALSE]
+    }
+    top$neglog10p <- -log10(top$pValue)
+    top$label <- if ("description" %in% colnames(top)) top$description else top$filename
+    pdf(opt$out_pdf)
+    ggplot(top, aes(x = reorder(label, neglog10p), y = neglog10p)) +
+        geom_col(fill = "#4C72B0") +
+        coord_flip() +
+        xlab("Region set") +
+        ylab("-log10(p-value)") +
+        ggtitle(paste("LOLA enrichment:", opt$label))
+    dev.off()
+}