Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
2aebdff
Add differential peak calling specifications
dhusmann Jan 4, 2026
f123e6e
Add differential peak calling layer
dhusmann Jan 5, 2026
dcff1ff
Avoid gtf2bed dependency in annotate_regions
dhusmann Jan 5, 2026
5db3109
Refine chipbinner clustering inputs
dhusmann Jan 5, 2026
20fd9f0
Handle empty DiffBind results
dhusmann Jan 5, 2026
e58d2a6
Handle unnamed annotation BEDs
dhusmann Jan 5, 2026
853dd7d
Ensure SPAN compare has samtools
dhusmann Jan 5, 2026
f2b0338
Report full DiffBind results
dhusmann Jan 5, 2026
790ee23
Prevent summary file collisions
dhusmann Jan 5, 2026
bc6ae24
Pass summary file list to merge
dhusmann Jan 5, 2026
a734875
Include SPAN down peaks in overlaps
dhusmann Jan 5, 2026
ac47e30
Fix SPAN compare inputs and capability probe
dhusmann Jan 5, 2026
1783331
Stage BAM indexes for chipbinner counts
dhusmann Jan 5, 2026
d045ec4
Create differential peak calling specification document
dhusmann Jan 5, 2026
def547c
Fix SPAN differential native/fallback handling
dhusmann Jan 5, 2026
3463f2d
Fix differential overlap and spike-in reporting
dhusmann Jan 5, 2026
194f2de
Address SPAN diff review feedback
dhusmann Jan 5, 2026
59fce07
Address differential review feedback
dhusmann Jan 5, 2026
52e138e
Add chipbinner differential regression test
dhusmann Jan 5, 2026
8befbdf
Align chipbinner overlaps and hdbscan ranking
dhusmann Jan 5, 2026
4119fdf
Complete chipbinner clustering outputs
dhusmann Jan 5, 2026
2766d08
Refine chipbinner ranking and enrichment outputs
dhusmann Jan 5, 2026
19a7a1e
Fix pooled control BAI lookup and rots fallback
dhusmann Jan 9, 2026
88b127f
Handle NA control_group in differential-only
dhusmann Jan 9, 2026
d3f1cdd
Use known conditions for pooled control parsing
dhusmann Jan 9, 2026
ae4c3e5
Disambiguate differential summary staging
dhusmann Jan 9, 2026
efbab9d
Fix differential pytest failures and update history
dhusmann Jan 10, 2026
a2d3e85
Filter NA bins in chipbinner significant beds
dhusmann Jan 14, 2026
6f3101c
Mark unassigned clusters as NA
dhusmann Jan 14, 2026
0903658
Preserve empty control map for chipbinner
dhusmann Jan 14, 2026
1ef69b2
Broadcast SPAN capability values
dhusmann Jan 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions assets/multiqc/differential_skipped_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#id: 'differential_skipped'
#parent_id: 'peak_qc'
#parent_name: 'Peak QC'
#parent_description: 'Differential peak calling skipped comparisons'
#section_name: 'Differential Peak Calling Skipped'
#description: 'Comparisons skipped due to missing conditions or replicate constraints'
#plot_type: 'table'
#anchor: 'differential_skipped'
#pconfig:
# id: 'differential_skipped_table'
# title: 'Differential Peak Calling Skipped'
11 changes: 11 additions & 0 deletions assets/multiqc/differential_summary_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#id: 'differential_summary'
#parent_id: 'peak_qc'
#parent_name: 'Peak QC'
#parent_description: 'Differential peak calling summary'
#section_name: 'Differential Peak Calling Summary'
#description: 'Counts of significant differential regions per method, group, and caller'
#plot_type: 'table'
#anchor: 'differential_summary'
#pconfig:
# id: 'differential_summary_table'
# title: 'Differential Peak Calling Summary'
2 changes: 2 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ custom_content:
- consensus_peak_counts
- primary_frip_score
- peak_reprod_perc
- differential_summary
- differential_skipped
- software-versions-by-process
- software-versions-unique

Expand Down
163 changes: 163 additions & 0 deletions bin/annotate_regions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#!/usr/bin/env python3
import argparse
import csv
import os
import subprocess
import sys
from pathlib import Path


def detect_cols(header):
header_lower = [h.lower() for h in header]
def find(names):
for name in names:
if name in header_lower:
return header[header_lower.index(name)]
return None
chr_col = find(["chr", "chrom", "seqnames", "seqname"])
start_col = find(["start"])
end_col = find(["end"])
return chr_col, start_col, end_col


def parse_gtf_attributes(attr_str):
attrs = {}
for item in attr_str.strip().split(";"):
item = item.strip()
if not item:
continue
if " " not in item:
continue
key, value = item.split(" ", 1)
value = value.strip().strip('"')
attrs[key] = value
return attrs


def gtf_to_tss_bed(gtf_path, out_path):
with gtf_path.open() as in_handle, out_path.open("w") as out_handle:
for line in in_handle:
if not line.strip() or line.startswith("#"):
continue
fields = line.rstrip().split("\t")
if len(fields) < 9:
continue
chrom, _, _, start, end, _, strand, _, attrs = fields
try:
start_i = int(start) - 1
end_i = int(end)
except ValueError:
continue
if start_i < 0:
start_i = 0
if strand == "+":
tss_start = start_i
elif strand == "-":
tss_start = max(end_i - 1, 0)
else:
continue
tss_end = tss_start + 1
attr_map = parse_gtf_attributes(attrs)
name = (
attr_map.get("gene_name")
or attr_map.get("gene_id")
or attr_map.get("transcript_id")
or attr_map.get("ID")
or "NA"
)
out_handle.write("\t".join([chrom, str(tss_start), str(tss_end), name]) + "\n")


def run(cmd, stdout=None):
result = subprocess.run(cmd, stdout=stdout, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{result.stderr}")


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--input", required=True)
parser.add_argument("--output", required=True)
parser.add_argument("--features", default=None)
parser.add_argument("--gtf", default=None)
args = parser.parse_args()

input_path = Path(args.input)
output_path = Path(args.output)

with input_path.open() as handle:
reader = csv.reader(handle, delimiter='\t')
header = next(reader)

chr_col, start_col, end_col = detect_cols(header)
if not chr_col or not start_col or not end_col:
raise SystemExit("Could not detect chr/start/end columns for annotation")

features_bed = None
if args.features and Path(args.features).exists():
features_bed = Path(args.features)
elif args.gtf and Path(args.gtf).exists():
tss_bed = Path("features.tss.bed")
gtf_to_tss_bed(Path(args.gtf), tss_bed)
features_bed = tss_bed

if not features_bed or not features_bed.exists():
# no annotation possible; copy input to output
output_path.write_text(input_path.read_text())
return

features_has_name = False
feature_name_idx = None
with features_bed.open() as handle:
for line in handle:
if not line.strip() or line.startswith("#"):
continue
features_cols = len(line.rstrip().split("\t"))
features_has_name = features_cols >= 4
if features_has_name:
feature_name_idx = 4 + 3
break

regions_bed = Path("regions.bed")
with input_path.open() as handle, regions_bed.open("w") as out_handle:
reader = csv.DictReader(handle, delimiter='\t')
for idx, row in enumerate(reader):
out_handle.write("\t".join([
row[chr_col],
str(row[start_col]),
str(row[end_col]),
str(idx)
]) + "\n")

closest_out = Path("closest.tsv")
run(["bedtools", "closest", "-d", "-a", str(regions_bed), "-b", str(features_bed)], stdout=closest_out.open("w"))

annotations = {}
with closest_out.open() as handle:
for line in handle:
if not line.strip():
continue
fields = line.rstrip().split("\t")
if len(fields) < 5:
continue
row_id = int(fields[3])
feature_id = "NA"
if feature_name_idx is not None and len(fields) > feature_name_idx:
feature_id = fields[feature_name_idx]
distance = fields[-1]
Comment thread
dhusmann marked this conversation as resolved.
annotations[row_id] = (feature_id, distance)

with input_path.open() as handle, output_path.open("w") as out_handle:
reader = csv.DictReader(handle, delimiter='\t')
fieldnames = reader.fieldnames + ["nearest_feature_id", "distance_to_feature"]
writer = csv.DictWriter(out_handle, delimiter='\t', fieldnames=fieldnames, lineterminator='\n')
writer.writeheader()
for idx, row in enumerate(reader):
feature_id, distance = annotations.get(idx, ("NA", "NA"))
row["nearest_feature_id"] = feature_id
row["distance_to_feature"] = distance
writer.writerow(row)


if __name__ == "__main__":
main()
68 changes: 68 additions & 0 deletions bin/chipbinner_lola.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env Rscript

suppressPackageStartupMessages({
library(optparse)
library(LOLA)
library(GenomicRanges)
library(rtracklayer)
library(ggplot2)
})

option_list <- list(
make_option(c("--bed"), type = "character"),
make_option(c("--universe"), type = "character"),
make_option(c("--db"), type = "character"),
make_option(c("--label"), type = "character"),
make_option(c("--out_tsv"), type = "character"),
make_option(c("--out_pdf"), type = "character", default = "")
)

opt <- parse_args(OptionParser(option_list = option_list))

if (!dir.exists(opt$db)) {
stop(paste("LOLA database not found:", opt$db))
}

bed_lines <- readLines(opt$bed, warn = FALSE)
bed_lines <- bed_lines[!grepl("^#", bed_lines)]
if (length(bed_lines) == 0) {
write.table(data.frame(), opt$out_tsv, sep = "\t", quote = FALSE, row.names = FALSE)
if (opt$out_pdf != "") {
pdf(opt$out_pdf)
plot.new()
text(0.5, 0.5, "No regions available for enrichment")
dev.off()
}
quit(status = 0)
}

user_set <- rtracklayer::import(opt$bed, format = "bed")
universe <- rtracklayer::import(opt$universe, format = "bed")
region_db <- LOLA::loadRegionDB(opt$db)

user_sets <- GRangesList()
user_sets[[opt$label]] <- user_set

res <- LOLA::runLOLA(user_sets, universe, region_db)
if (!is.null(res) && nrow(res) > 0) {
res <- res[res$userSet == opt$label, , drop = FALSE]
}

write.table(res, opt$out_tsv, sep = "\t", quote = FALSE, row.names = FALSE)

if (opt$out_pdf != "" && !is.null(res) && nrow(res) > 0) {
top <- res[order(res$pValue), , drop = FALSE]
if (nrow(top) > 20) {
top <- top[1:20, , drop = FALSE]
}
top$neglog10p <- -log10(top$pValue)
top$label <- if ("description" %in% colnames(top)) top$description else top$filename
pdf(opt$out_pdf)
ggplot(top, aes(x = reorder(label, neglog10p), y = neglog10p)) +
geom_col(fill = "#4C72B0") +
coord_flip() +
xlab("Region set") +
ylab("-log10(p-value)") +
ggtitle(paste("LOLA enrichment:", opt$label))
dev.off()
}
Loading
Loading