Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: prefix-dev/setup-pixi@v0.8.1
- uses: prefix-dev/setup-pixi@v0.9.6
with:
pixi-version: v0.37.0
pixi-version: v0.66.0
cache: true
- run: pixi run test
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Change Log

All notable changes to this project will be documented in this file.
## v0.2.0
- Update fibertools-rs version (also samtools, htslib, and bedtools) in `workflow/envs/env.yaml`
- Fix Polars issue in #52


## v0.1.2

Expand Down
37 changes: 6 additions & 31 deletions pixi.toml
Original file line number Diff line number Diff line change
@@ -1,41 +1,16 @@
[project]
[workspace]
authors = ["Mitchell Robert Vollger <mrvollger@gmail.com>"]
channels = ["conda-forge", "bioconda"]
description = "Add a short description here"
description = "A Snakemake pipeline for calling FIRE peaks using fibertools-rs."
name = "FIRE"
platforms = ["osx-64", "linux-64"]
version = "0.1.2"
version = "0.2.0"

[tasks]
fmt = "ruff format . && taplo format pixi.toml && snakefmt workflow/"
test-data = { cmd = [
"cd",
"$INIT_CWD",
"&&",
"mkdir",
"-p",
"fire-test-data",
"&&",
"aws",
"s3",
"--no-sign-request",
"sync",
"--endpoint-url",
"https://s3.kopah.orci.washington.edu",
"s3://stergachis/public/FIRE/test-data",
"fire-test-data/",
] }
test = { cmd = [
"cd",
"$INIT_CWD/fire-test-data",
"&&",
"snakemake",
"-s",
"$PIXI_PROJECT_ROOT/workflow/Snakefile",
"--configfile",
"test.yaml",
"-k",
], depends-on = [
test-data = { cmd = '''bash -c 'if [ -f "$INIT_CWD/fire-test-data/test.cram" ]; then echo "test data already present, skipping download"; else mkdir -p "$INIT_CWD/fire-test-data" && aws s3 --no-sign-request sync --endpoint-url https://s3.kopah.orci.washington.edu s3://stergachis/public/FIRE/test-data "$INIT_CWD/fire-test-data/"; fi' ''' }
test-clean = { cmd = '''bash -c 'cd "$INIT_CWD/fire-test-data" && rm -rf results temp .snakemake' ''' }
test = { cmd = '''bash -c 'cd "$INIT_CWD/fire-test-data" && trap "rm -rf results temp .snakemake" EXIT && snakemake -s "$PIXI_PROJECT_ROOT/workflow/Snakefile" --configfile test.yaml -k' ''', depends-on = [
"test-data",
], clean-env = true }
fire = { cmd = [
Expand Down
8 changes: 4 additions & 4 deletions workflow/envs/env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ channels:
- bioconda
- defaults
dependencies:
- samtools==1.19.1
- htslib==1.19.1
- bedtools==2.31
- bioconda::fibertools-rs==0.6
- samtools>=1.19.1
- htslib>=1.19.1
- bedtools>=2.31
- bioconda::fibertools-rs==0.9
- hck>=0.9.2
- bioawk
- ripgrep
Expand Down
11 changes: 6 additions & 5 deletions workflow/rules/coverages.smk
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,14 @@ rule exclude_from_shuffle:
conda:
DEFAULT_ENV
params:
exclude=EXCLUDES,
exclude=lambda wc: " ".join(EXCLUDES) if EXCLUDES else "",
shell:
"""

( \
bedtools genomecov -bga -i {input.filtered} -g {input.fai} | awk '$4 == 0'; \
less {params.exclude} \
(
bedtools genomecov -bga -i {input.filtered} -g {input.fai} | awk '$4 == 0'
if [ -n "{params.exclude}" ]; then
zcat -f {params.exclude}
fi
) \
| cut -f 1-3 \
| bedtools sort \
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/fire-peaks.smk
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ rule shuffled_pileup_chromosome:
DEFAULT_ENV
shell:
"""
{FT_EXE} pileup {input.cram} {wildcards.chrom} -t {threads} \
{FT_EXE} pileup {input.cram} -r {wildcards.chrom} -t {threads} \
--fiber-coverage --shuffle {input.shuffled} \
--no-msp --no-nuc \
| bgzip -@ {threads} \
Expand Down Expand Up @@ -103,7 +103,7 @@ rule pileup_chromosome:
"""
{FT_EXE} pileup -t {threads} \
--haps --fiber-coverage \
{input.bam} {wildcards.chrom} \
{input.bam} -r {wildcards.chrom} \
| bgzip -@ {threads} \
> {output.bed}
"""
Expand Down
13 changes: 9 additions & 4 deletions workflow/scripts/fdr-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,16 @@ def read_pileup_file(infile, nrows):
return None

# add scema overrides for the score columns
# Build schema overrides keyed by positional column names (column_1, column_2, ...)
# because polars infers schema BEFORE new_columns is applied when has_header=False.
# Keying on '#chrom' / 'score' here would be silently ignored.
schema_overrides = {}
for n in ["score", "score_H1", "score_H2", "score_shuffled"]:
if n in header:
schema_overrides[n] = float

for col_idx, col_name in enumerate(header, start=1):
positional = f"column_{col_idx}"
if col_name in ("score", "score_H1", "score_H2", "score_shuffled"):
schema_overrides[positional] = pl.Float64
elif col_name == "#chrom":
schema_overrides[positional] = pl.Utf8
logging.info(f"Header of the pileup file:\n{header}")
logging.info(f"Schema overrides for the pileup file:\n{schema_overrides}")

Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/merge_fire_peaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def main(
logger.setLevel(log_level)

inf = io.StringIO(sys.stdin.read())
df = pl.read_csv(inf, separator="\t", null_values=".")
df = pl.read_csv(inf, separator="\t", null_values=".", schema_overrides={"#chrom": pl.Utf8},)
if df.shape[0] == 0:
logging.info("No peaks to merge")
return 0
Expand Down
Loading