From ae48f7ce9191c10e4d607697b62990aa197c1fca Mon Sep 17 00:00:00 2001 From: Rolf Schroeder Date: Wed, 15 Nov 2017 15:01:57 +0100 Subject: [PATCH 1/3] Fix typo --- pyhgvs/tests/test_hgvs_names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyhgvs/tests/test_hgvs_names.py b/pyhgvs/tests/test_hgvs_names.py index 0f12cd8..3237d5b 100644 --- a/pyhgvs/tests/test_hgvs_names.py +++ b/pyhgvs/tests/test_hgvs_names.py @@ -205,7 +205,7 @@ def test_invalid_coordinates(): ] -# Test examples of coverting coordinates. +# Test examples of converting coordinates. _convert_coords = [ # Positions near start codon. ('NM_000016.4', ('chr1', 76190473), CDNACoord(1)), From af7c5ce8c67edcbab32fac8d7a119dda1035eefa Mon Sep 17 00:00:00 2001 From: Rolf Schroeder Date: Thu, 16 Nov 2017 09:26:51 +0100 Subject: [PATCH 2/3] Add (some failing) unittests --- pyhgvs/tests/test_hgvs_names.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pyhgvs/tests/test_hgvs_names.py b/pyhgvs/tests/test_hgvs_names.py index 3237d5b..5389c57 100644 --- a/pyhgvs/tests/test_hgvs_names.py +++ b/pyhgvs/tests/test_hgvs_names.py @@ -256,6 +256,24 @@ def test_invalid_coordinates(): ('NM_000016.4', ('chr1', 76229354), CDNACoord(906, 0, CDNA_STOP_CODON)), ('NM_000016.4', ('chr1', 76229355), CDNACoord(907, 0, CDNA_STOP_CODON)), ('NM_000016.4', ('chr1', 76229356), CDNACoord(908, 0, CDNA_STOP_CODON)), + + # NR, pos strand + ('NR_002717.2', ('chr13', 70681609), CDNACoord(265, 0)), # exon 01 + ('NR_002717.2', ('chr13', 70713884), CDNACoord(1471, 0)), # exon 05 + ('NR_002717.2', ('chr13', 70682847), CDNACoord(529, 974)), # intron 01 + ('NR_002717.2', ('chr13', 70681344), CDNACoord(-1, 0)), # upstream + ('NR_002717.2', ('chr13', 70681018), CDNACoord(-327, 0)), # upstream + ('NR_002717.2', ('chr13', 70713886), CDNACoord(1, 0, CDNA_STOP_CODON)), # downstream + ('NR_002717.2', ('chr13', 70714013), CDNACoord(128, 0, CDNA_STOP_CODON)), # downstream + + # NR, neg strand + ('NR_026759.1', ('chr9', 43033105), CDNACoord(208, 0)), # exon 01 + ('NR_026759.1', ('chr9', 43031289), CDNACoord(1106, 0)), # exon 04 + ('NR_026759.1', ('chr9', 43032642), CDNACoord(515, 156)), # intron 01 + ('NR_026759.1', ('chr9', 43033313), CDNACoord(-1, 0)), # upstream + ('NR_026759.1', ('chr9', 43033387), CDNACoord(-75, 0)), # upstream + ('NR_026759.1', ('chr9', 43027662), CDNACoord(1, 0, CDNA_STOP_CODON)), # downstream + ('NR_026759.1', ('chr9', 43027589), CDNACoord(74, 0, CDNA_STOP_CODON)), # downstream ] @@ -784,6 +802,8 @@ def test_invalid_coordinates(): '1440 NM_000038.5 chr5 + 112073555 112181936 112090587 112179823 16 112073555,112090569,112102022,112102885,112111325,112116486,112128142,112136975,112151191,112154662,112157592,112162804,112163625,112164552,112170647,112173249, 112073622,112090722,112102107,112103087,112111434,112116600,112128226,112137080,112151290,112155041,112157688,112162944,112163703,112164669,112170862,112181936, 0 APC cmpl cmpl -1,0,0,1,2,0,0,0,0,0,1,1,0,0,0,2,', # nopep8 '184 NM_000492.3 chr7 + 117120016 117308718 117120148 117307162 27 117120016,117144306,117149087,117170952,117174329,117175301,117176601,117180153,117182069,117188694,117199517,117227792,117230406,117231987,117234983,117242879,117243585,117246727,117250572,117251634,117254666,117267575,117282491,117292895,117304741,117305512,117306961, 117120201,117144417,117149196,117171168,117174419,117175465,117176727,117180400,117182162,117188877,117199709,117227887,117230493,117232711,117235112,117242917,117243836,117246807,117250723,117251862,117254767,117267824,117282647,117292985,117304914,117305618,117308718, 0 CFTR cmpl cmpl 0,2,2,0,0,0,2,2,0,0,0,0,2,2,0,0,2,1,0,1,1,0,0,0,0,2,0,', # nopep8 '1 ENST00000357654 chr17 - 41196311 41277387 41197694 41276113 23 41196311,41199659,41201137,41203079,41209068,41215349,41215890,41219624,41222944,41226347,41228504,41234420,41242960,41243451,41247862,41249260,41251791,41256138,41256884,41258472,41267742,41276033,41277287, 41197819,41199720,41201211,41203134,41209152,41215390,41215968,41219712,41223255,41226538,41228631,41234592,41243049,41246877,41247939,41249306,41251897,41256278,41256973,41258550,41267796,41276132,41277387, 0 ENSG00000012048 cmpl cmpl 1,0,1,0,0,1,1,0,1,2,1,0,1,1,2,1,0,1,2,2,2,0,-1,', # nopep8 + '1124 NR_002717.2 chr13 + 70681344 70713885 70713885 70713885 5 70681344,70689272,70703915,70704809,70713372, 70681873,70689430,70704016,70704980,70713885, 0 ATXN8OS unk unk -1,-1,-1,-1,-1,', # nopep8 + '913 NR_026759.1 chr9 - 43027662 43033312 43033312 43033312 5 43027662,43031154,43031790,43032080,43032797, 43027965,43031423,43031994,43032332,43033312, 0 FAM95B1 unk unk -1,-1,-1,-1,-1,', # nopep8 ]) From 24f5a508b1dce7ee9dd69e58e6ca5f1c641b0aa5 Mon Sep 17 00:00:00 2001 From: Rolf Schroeder Date: Thu, 16 Nov 2017 09:28:34 +0100 Subject: [PATCH 3/3] Fix downstream/upstream cDNA errors when converting genomic coordinates for non coding transcripts. --- pyhgvs/__init__.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pyhgvs/__init__.py b/pyhgvs/__init__.py index c9b2c1b..599066c 100644 --- a/pyhgvs/__init__.py +++ b/pyhgvs/__init__.py @@ -527,6 +527,10 @@ def cdna_to_genomic_coord(transcript, coord): if coord.coord < 0: raise ValueError('CDNACoord cannot have a negative coord and ' 'landmark CDNA_STOP_CODON') + if not transcript.is_coding: + if not transcript_strand: + pos = exons[-1].tx_position.chrom_start + return pos - coord.coord + 1 pos = find_stop_codon(exons, transcript.cds_position) + coord.coord else: raise ValueError('unknown CDNACoord landmark "%s"' % coord.landmark) @@ -641,6 +645,25 @@ def genomic_to_cdna_coord(transcript, genomic_coord): cdna_coord.coord == stop_codon and cdna_coord.offset > 0): cdna_coord.coord -= stop_codon cdna_coord.landmark = CDNA_STOP_CODON + else: # non coding + if strand == "+": + # Detect if position is after last exon. + if genomic_coord > exons[-1].chrom_end: + cdna_coord.coord = genomic_coord - exons[-1].chrom_end + cdna_coord.landmark = CDNA_STOP_CODON + else: + # Detect if position is before first exon. + if genomic_coord <= exons[0].chrom_start: + cdna_coord.coord -= 1 + else: # neg strand + # Detect if position is after last exon. + if genomic_coord <= exons[-1].chrom_start: + cdna_coord.coord = exons[-1].chrom_start - genomic_coord + 1 + cdna_coord.landmark = CDNA_STOP_CODON + else: + # Detect if position is before first exon. + if genomic_coord >= exons[0].chrom_end: + cdna_coord.coord -= 1 return cdna_coord