This repository was archived by the owner on May 4, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathape2table.py
More file actions
112 lines (97 loc) · 3.48 KB
/
ape2table.py
File metadata and controls
112 lines (97 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python
####################################################################################
# xml2ape v0.1
# Copyright 2014, Stuart Archer
#
# xml2ape is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# xml2ape is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You can view the GNU General Public License at <http://www.gnu.org/licenses/>.
####################################################################################
####################################################################################
#
# Instructions:
#
#
####################################################################################
import sys
try:
import Bio
from Bio.SeqUtils import MeltingTemp
print "importing Biopython"
calc_tms =True
except:
print "Cannot detect Biopython. Cannot calculate primer Tms\n"
calc_tms = False
import re
import os
import getopt
usage = 'Usage: python ape2table.py -a apefile.ape <-o output_file.txt> -i'
tablefile = "ape2table_output.txt"
include_blast_hits = False
options, remainder = getopt.getopt(sys.argv[1:], 'a:o:hi')
for opt, arg in options:
if opt == '-a':
apefile = arg
if not os.path.isfile(apefile):
print 'Ape file '+apefile+' does not appear to exist in the current directory. Exiting.'
print usage
exit()
if opt == '-o':
tablefile = arg
if opt == '-h':
print usage
exit()
if opt == '-o':
tablefile = arg
if opt == '-i':
include_blast_hits = True
if os.path.isfile(tablefile):
print "Warning: file "+tablefile+" already exists in the current directory. Exiting to avoid overwrite of data."
print "Please rename or delete this file or nominate a different file name using the -o argument."
print usage
exit()
features = list()
query = str('')
with open(apefile, 'r') as ape_in:
in_features=False
in_sequence=False
for line in ape_in:
if line.startswith('FEATURES'):
in_features = True
continue
if line.startswith(' '):
continue
if line.startswith('ORIGIN'):
in_sequence = True
in_features = False
continue
if in_features:
mobj=re.match('\s+([\S]+)\s+([\d]+)\.\.([\d]+)$', line)
if mobj:
features.append([mobj.group(1), mobj.group(2), mobj.group(3)])
elif in_sequence:
line_seq = re.sub(r'[\d\s/]', '', line) # take out numbers, whitespace
query += line_seq.rstrip()
with open(tablefile, 'w') as tout:
header = "feat_type\tstart\tend\tsequence"
if calc_tms:
header += "\ttm"
print >> tout, header
for row in features:
if include_blast_hits or not row[0].startswith('blast_hit'):
seq = ( query[int(row[1])-1:int(row[2])] )
row.append(seq)
if calc_tms:
tm = MeltingTemp.Tm_staluc(seq)
row.append( tm )
row = list(str(r) for r in row)
print >> tout, '\t'.join(row)
print "Exported table, see: "+tablefile