-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtree_operon_struct.py
More file actions
executable file
·80 lines (56 loc) · 2.14 KB
/
tree_operon_struct.py
File metadata and controls
executable file
·80 lines (56 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from multiprocessing import Pool
import numpy
import time
import sys
import os
import Bio
from Bio import SeqIO,SeqFeature
from Bio.SeqRecord import SeqRecord
# So the purpose of this bit of code is to determine possible targets of HGT for the first paper. This is done by generating a summary
# by making CSV files wich have the local operon structure. The resulst are organized in the order of phylogenetic distance. We will suspect
# that there is HGT if an operon is found in one organism whose branch does not contain the operon.
#this function will return all of the files that are in a directory. os.walk is recursive traversal.
def returnRecursiveDirFiles(root_dir):
result = []
for path, dir_name, flist in os.walk(root_dir):
for f in flist:
fname = os.path.join(path, f)
if os.path.isfile(fname):
result.append(fname)
return result
def return_filtered_information(fname):
print fname
result = {}
for line in [i.strip() for i in open(fname).readlines()]:
tmp = line.split('\t')
if tmp[0] in ['@@', '$$', '##']:
pass
elif tmp[0] == '++':
result.update({nc: "%s,%s,%s" % (nc, org_name, tmp[1]) })
elif len(tmp) > 1:
#print tmp
nc = tmp[0]
org_name = tmp[1]
return result
def main():
#print "hi there"
folder = './optimized_results_proteobacteria'
result_folder = './operon_structure_result/'
order_list = [i.strip() for i in open('./phylo_order.txt').readlines()]
#print order_list
flist = returnRecursiveDirFiles(folder)
for fname in flist:
print fname
operon = fname.split('/')[len(fname.split('/'))-1].split('.')[0]
print operon
res = return_filtered_information(fname)
result = [operon]
for nc in order_list:
if nc in res.keys():
result.append(res[nc])
print result
handle = open(result_folder + operon + '.csv', 'w')
handle.write('\n'.join(result))
handle.close()
if __name__ == '__main__':
main()