From 0ca9e012f8e68d54ad2d2c30c2c310911e249285 Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Fri, 4 Nov 2016 12:02:02 -0700 Subject: [PATCH 1/8] Specify tab-delimited sequence ooutput for validate --- outrigger/validate/check_splice_sites.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/outrigger/validate/check_splice_sites.py b/outrigger/validate/check_splice_sites.py index f9686f6..27721bd 100644 --- a/outrigger/validate/check_splice_sites.py +++ b/outrigger/validate/check_splice_sites.py @@ -1,10 +1,10 @@ from collections import OrderedDict -from Bio import SeqIO import pandas as pd import pybedtools + NT = 2 MAMMALIAN_SPLICE_SITES = 'GT/AG,AT/AC' @@ -46,6 +46,8 @@ def read_splice_sites(bed, genome, fasta, direction='upstream'): bed = pybedtools.BedTool(bed) genome = maybe_read_chromsizes(genome) + fasta_object = pyfasta.Fasta(fasta, key_fn=lambda key: key.split()[0]) + fasta_chroms = fasta_object.keys() if direction == 'upstream': left = NT @@ -55,11 +57,8 @@ def read_splice_sites(bed, genome, fasta, direction='upstream'): right = NT flanked = bed.flank(l=left, r=right, s=True, genome=genome) - seqs = flanked.sequence(fi=fasta, s=True) - - with open(seqs.seqfn) as f: - records = SeqIO.parse(f, 'fasta') - records = pd.Series([str(r.seq) for r in records], - index=[b.name for b in bed]) - # import pdb; pdb.set_trace() - return records + seqs = flanked.sequence(fi=fasta, s=True, name=True, tab=True) + + splice_sites = pd.read_table(seqs.seqfn, index_col=0, header=None, + squeeze=True) + return splice_sites From 6cbd6c2fd02bbb06e37c020bda94c48e694ac5c3 Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Fri, 4 Nov 2016 12:02:16 -0700 Subject: [PATCH 2/8] Extra whitespace --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index faf74ef..7f0a99f 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,6 @@ You'll want to add the [`bioconda`](https://bioconda.github.io/) channel to make ``` conda config --add channels r conda config --add channels bioconda - ``` Create an environment called `outrigger-env`. Python 2.7, Python 3.4, and Python 3.5 are supported. From c620325c4d7d4866cc1ba3a142f6cd97e9b67d16 Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Fri, 4 Nov 2016 12:02:43 -0700 Subject: [PATCH 3/8] Specify open source --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f0a99f..75ffa44 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Outrigger is a program which uses junction reads from RNA seq data, and a graph database to create a *de novo* alternative splicing annotation with a graph database, and quantify percent spliced-in (Psi) of the events. -* Free software: BSD license +* Free and open source software: BSD license ## Features From 94e4434a2ade7c493c92d8ba50e28bf1de84d55c Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Fri, 4 Nov 2016 12:05:06 -0700 Subject: [PATCH 4/8] Version bump to 0.2.9dev and release notes --- docs/releases/v0.2.9.rst | 25 +++++++++++++++++++++++++ outrigger/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 docs/releases/v0.2.9.rst diff --git a/docs/releases/v0.2.9.rst b/docs/releases/v0.2.9.rst new file mode 100644 index 0000000..38cd99f --- /dev/null +++ b/docs/releases/v0.2.9.rst @@ -0,0 +1,25 @@ +v0.2.9 (...) +------------ + + + +New features +~~~~~~~~~~~~ + +Plotting functions +~~~~~~~~~~~~~~~~~~ + +API changes +~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + +- Fixed an issue in ``outrigger validate`` when fasta file and genome + annotation didn't have overlapping chromosomes and ``outrigger validate`` + would simply fail rather than gracefully skipping those events. + +Miscellaneous +~~~~~~~~~~~~~ + diff --git a/outrigger/__init__.py b/outrigger/__init__.py index 0d6a52a..9fd93f8 100755 --- a/outrigger/__init__.py +++ b/outrigger/__init__.py @@ -2,7 +2,7 @@ __author__ = 'Olga Botvinnik' __email__ = 'olga.botvinnik@gmail.com' -__version__ = '0.2.8dev' +__version__ = '0.2.9dev' __all__ = ['psi', 'region', 'util', 'io', 'validate', 'index', 'common'] diff --git a/setup.py b/setup.py index 1e6ebf0..1c4dfce 100755 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name='outrigger', - version='0.2.8dev', + version='0.2.9dev', description="Outrigger is a tool to de novo annotate splice sites " "and exons", long_description=readme + '\n\n' + history, From 8a39c86a084e373f215193db013ba40a9b988b1c Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Fri, 4 Nov 2016 13:52:28 -0700 Subject: [PATCH 5/8] Remove pyfasta code --- outrigger/validate/check_splice_sites.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/outrigger/validate/check_splice_sites.py b/outrigger/validate/check_splice_sites.py index 27721bd..9d8f0c4 100644 --- a/outrigger/validate/check_splice_sites.py +++ b/outrigger/validate/check_splice_sites.py @@ -46,8 +46,6 @@ def read_splice_sites(bed, genome, fasta, direction='upstream'): bed = pybedtools.BedTool(bed) genome = maybe_read_chromsizes(genome) - fasta_object = pyfasta.Fasta(fasta, key_fn=lambda key: key.split()[0]) - fasta_chroms = fasta_object.keys() if direction == 'upstream': left = NT From 52067bfde9babe0c69d24a5460d7b59a21cbe7cb Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Fri, 4 Nov 2016 20:33:56 -0700 Subject: [PATCH 6/8] Get rid of additional stuff added by pybedtools to the event id --- outrigger/validate/check_splice_sites.py | 1 + 1 file changed, 1 insertion(+) diff --git a/outrigger/validate/check_splice_sites.py b/outrigger/validate/check_splice_sites.py index 9d8f0c4..45fbc85 100644 --- a/outrigger/validate/check_splice_sites.py +++ b/outrigger/validate/check_splice_sites.py @@ -59,4 +59,5 @@ def read_splice_sites(bed, genome, fasta, direction='upstream'): splice_sites = pd.read_table(seqs.seqfn, index_col=0, header=None, squeeze=True) + splice_sites.index = [x.split('::')[0] for x in splice_sites.index] return splice_sites From 1133b79970f617a2b0202bf1078b1ea0bf5f079a Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Fri, 4 Nov 2016 20:34:38 -0700 Subject: [PATCH 7/8] Add fake chromsizes for simulated negative control for "outrigger validate" --- .../tests/data/simulated/validate_negative_control/chromsizes | 1 + .../outrigger_output/index/se/events.csv | 4 +++- .../outrigger_output/index/se/exon1.bed | 2 ++ .../outrigger_output/index/se/exon2.bed | 2 ++ .../outrigger_output/index/se/exon3.bed | 2 ++ 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/outrigger/tests/data/simulated/validate_negative_control/chromsizes b/outrigger/tests/data/simulated/validate_negative_control/chromsizes index 67d16b0..ecbcbaa 100644 --- a/outrigger/tests/data/simulated/validate_negative_control/chromsizes +++ b/outrigger/tests/data/simulated/validate_negative_control/chromsizes @@ -1 +1,2 @@ simulated 1000 +in_chromsizes 100 diff --git a/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/events.csv b/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/events.csv index f1f721b..f7f0ae4 100644 --- a/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/events.csv +++ b/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/events.csv @@ -1,2 +1,4 @@ isoform1=junction:simulated:150-500:+|isoform2=junction:simulated:150-300:+@exon:simulated:300-400:+@junction:simulated:400-500:+ -isoform1=junction:simulated:100-600:-|isoform2=junction:simulated:300-600:-@exon:simulated:300-400:-@junction:simulated:100-400:- \ No newline at end of file +isoform1=junction:simulated:100-600:-|isoform2=junction:simulated:300-600:-@exon:simulated:300-400:-@junction:simulated:100-400:- +isoform1=junction:nonexistent:100-600:-|isoform2=junction:nonexistent:300-600:-@exon:nonexistent:300-400:-@junction:nonexistent:100-400:- +isoform1=junction:in_chromsizes:100-600:-|isoform2=junction:in_chromsizes:300-600:-@exon:in_chromsizes:300-400:-@junction:in_chromsizes:100-400:- diff --git a/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon1.bed b/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon1.bed index 6786da9..b7806a6 100644 --- a/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon1.bed +++ b/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon1.bed @@ -1,2 +1,4 @@ simulated 100 150 isoform1=junction:simulated:150-500:+|isoform2=junction:simulated:150-300:+@exon:simulated:300-400:+@junction:simulated:400-500:+ . + simulated 500 600 isoform1=junction:simulated:100-600:-|isoform2=junction:simulated:300-600:-@exon:simulated:300-400:-@junction:simulated:100-400:- . - +nonexistent 500 600 isoform1=junction:nonexistent:100-600:-|isoform2=junction:nonexistent:300-600:-@exon:nonexistent:300-400:-@junction:nonexistent:100-400:- . - +in_chromsizes 500 600 isoform1=junction:in_chromsizes:100-600:-|isoform2=junction:in_chromsizes:300-600:-@exon:in_chromsizes:300-400:-@junction:in_chromsizes:100-400:- . - diff --git a/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon2.bed b/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon2.bed index 6d15854..862a665 100644 --- a/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon2.bed +++ b/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon2.bed @@ -1,2 +1,4 @@ simulated 300 400 isoform1=junction:simulated:150-500:+|isoform2=junction:simulated:150-300:+@exon:simulated:300-400:+@junction:simulated:400-500:+ . + simulated 300 400 isoform1=junction:simulated:100-600:-|isoform2=junction:simulated:300-600:-@exon:simulated:300-400:-@junction:simulated:100-400:- . - +nonexistent 300 400 isoform1=junction:nonexistent:100-600:-|isoform2=junction:nonexistent:300-600:-@exon:nonexistent:300-400:-@junction:nonexistent:100-400:- . - +in_chromsizes 300 400 isoform1=junction:in_chromsizes:100-600:-|isoform2=junction:in_chromsizes:300-600:-@exon:in_chromsizes:300-400:-@junction:in_chromsizes:100-400:- . - diff --git a/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon3.bed b/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon3.bed index 8fe9f89..eab2751 100644 --- a/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon3.bed +++ b/outrigger/tests/data/simulated/validate_negative_control/outrigger_output/index/se/exon3.bed @@ -1,2 +1,4 @@ simulated 500 600 isoform1=junction:simulated:150-500:+|isoform2=junction:simulated:150-300:+@exon:simulated:300-400:+@junction:simulated:400-500:+ . + simulated 100 150 isoform1=junction:simulated:100-600:-|isoform2=junction:simulated:300-600:-@exon:simulated:300-400:-@junction:simulated:100-400:- . - +nonexistent 100 150 isoform1=junction:nonexistent:100-600:-|isoform2=junction:nonexistent:300-600:-@exon:nonexistent:300-400:-@junction:nonexistent:100-400:- . - +in_chromsizes 100 150 isoform1=junction:in_chromsizes:100-600:-|isoform2=junction:in_chromsizes:300-600:-@exon:in_chromsizes:300-400:-@junction:in_chromsizes:100-400:- . - From c75134eb1601b33b09e587bdeefe328becabad97 Mon Sep 17 00:00:00 2001 From: Olga Botvinnik Date: Fri, 4 Nov 2016 20:37:11 -0700 Subject: [PATCH 8/8] Remove name of the splice site series --- outrigger/validate/check_splice_sites.py | 1 + 1 file changed, 1 insertion(+) diff --git a/outrigger/validate/check_splice_sites.py b/outrigger/validate/check_splice_sites.py index 45fbc85..c950c0d 100644 --- a/outrigger/validate/check_splice_sites.py +++ b/outrigger/validate/check_splice_sites.py @@ -60,4 +60,5 @@ def read_splice_sites(bed, genome, fasta, direction='upstream'): splice_sites = pd.read_table(seqs.seqfn, index_col=0, header=None, squeeze=True) splice_sites.index = [x.split('::')[0] for x in splice_sites.index] + splice_sites.name = None return splice_sites