Skip to content

Commit b6f0ce1

Browse files
committed
adding fasta nucleic sequence in outputs
1 parent 120ca72 commit b6f0ce1

File tree

3 files changed

+27
-18
lines changed

3 files changed

+27
-18
lines changed

orfmap/lib/fasta_parser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
55
@author: nicolas
66
"""
7-
import random
7+
# import random
88
from orfmap.lib import logHandler
99

1010
logger = logHandler.Logger(name=__name__)
@@ -120,9 +120,9 @@ def sequence(self, start=1, end=5, phase=0, strand='+'):
120120
end_pos = start_pos - 1 + len_sequence
121121

122122
if strand == '+':
123-
return ''.join(lines)[start_pos - 1 + phase:end_pos]
123+
return ''.join(lines)[start_pos - 1 + phase:end_pos].upper()
124124
else:
125-
return self.reverse_complement(''.join(lines)[start_pos - 1:end_pos - phase])
125+
return self.reverse_complement(''.join(lines)[start_pos - 1:end_pos - phase]).upper()
126126

127127
def get_line_nucindex(self, index=1):
128128
"""

orfmap/lib/gff_parser.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ def get_fastaline(self):
117117

118118
return fastaline
119119

120+
def get_fastanuc_line(self):
121+
fastaline = '>'+self.id_+'\n'+self.sequence()+'\n'
122+
123+
return fastaline
124+
120125
def get_gffline(self):
121126
# if self.gff_line and 'frag' not in self.type:
122127
# return '\t'.join(self.gff_line)

orfmap/lib/orfmap.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,31 @@ def mapping(gff_data, param):
99

1010
if os.path.exists(param.outfile + '.gff'):
1111
os.remove(param.outfile + '.gff')
12-
if os.path.exists(param.outfile + '.fa'):
13-
os.remove(param.outfile + '.fa')
12+
if os.path.exists(param.outfile + '.faa'):
13+
os.remove(param.outfile + '.faa')
14+
if os.path.exists(param.outfile + '.fna'):
15+
os.remove(param.outfile + '.fna')
1416

1517
with open(param.outfile + '.gff', "a+") as out_gff:
1618
header = '# Input genomic fasta file: {}\n'.format(os.path.basename(param.fasta_fname))
1719
header += '# Input gff file: {}\n'.format(os.path.basename(param.gff_fname))
1820
out_gff.write(header)
19-
with open(param.outfile + '.fa', "a+") as out_fasta:
21+
with open(param.outfile + '.faa', "a+") as out_fasta:
22+
with open(param.outfile + '.fna', "a+") as out_nucleic:
23+
for chr_id in sorted(gff_data):
24+
logger.info('Reading chromosome {} ...'.format(chr_id))
25+
gff_chr = gff_data[chr_id]
2026

21-
for chr_id in sorted(gff_data):
22-
logger.info('Reading chromosome {} ...'.format(chr_id))
23-
gff_chr = gff_data[chr_id]
24-
25-
logger.info(' - ORF mapping and assignment')
26-
get_orfs(gff_chr=gff_chr, param=param, outfiles=[out_gff, out_fasta])
27-
logger.info('')
27+
logger.info(' - ORF mapping and assignment')
28+
get_orfs(gff_chr=gff_chr, param=param, outfiles=[out_gff, out_fasta, out_nucleic])
29+
logger.info('')
2830

2931

3032
def get_orfs(gff_chr, param, outfiles: list):
3133
max_subsequence_length = 1999998
3234
out_gff = outfiles[0]
3335
out_fasta = outfiles[1]
36+
out_nucleic = outfiles[2]
3437
orf_len = param.orf_len + 6
3538
pos = 0
3639

@@ -57,7 +60,7 @@ def get_orfs(gff_chr, param, outfiles: list):
5760
if end_pos - start_pos + 1 >= orf_len:
5861
orf = build_orf(gff_chr=gff_chr, strand='+', frame=frame, coors=(start_pos, end_pos),
5962
param=param)
60-
write_outputs(out_fasta=out_fasta, out_gff=out_gff, orf=orf, param=param)
63+
write_outputs(out_fasta=out_fasta, out_gff=out_gff, out_nucleic=out_nucleic, orf=orf, param=param)
6164

6265
start_pos = end_pos - 2
6366

@@ -69,7 +72,7 @@ def get_orfs(gff_chr, param, outfiles: list):
6972
if end_pos_rev - start_pos_rev + 1 >= orf_len:
7073
orf = build_orf(gff_chr=gff_chr, strand='-', frame=frame_rev, coors=(start_pos_rev, end_pos_rev),
7174
param=param)
72-
write_outputs(out_fasta=out_fasta, out_gff=out_gff, orf=orf, param=param)
75+
write_outputs(out_fasta=out_fasta, out_gff=out_gff, out_nucleic=out_nucleic, orf=orf, param=param)
7376

7477
start_pos_rev = end_pos_rev - 2
7578

@@ -80,7 +83,7 @@ def get_orfs(gff_chr, param, outfiles: list):
8083
if end_pos_rev - start_pos_rev + 1 >= orf_len:
8184
orf = build_orf(gff_chr=gff_chr, strand='-', frame=frame_rev, coors=(start_pos_rev, end_pos_rev),
8285
param=param, extremity=True)
83-
write_outputs(out_fasta=out_fasta, out_gff=out_gff, orf=orf, param=param)
86+
write_outputs(out_fasta=out_fasta, out_gff=out_gff, out_nucleic=out_nucleic, orf=orf, param=param)
8487

8588

8689
def build_orf(gff_chr, strand, frame, coors, param, extremity=False):
@@ -116,16 +119,17 @@ def build_orf(gff_chr, strand, frame, coors, param, extremity=False):
116119
return orf
117120

118121

119-
def write_outputs(out_fasta, out_gff, orf, param):
122+
def write_outputs(out_fasta, out_gff, out_nucleic, orf, param):
120123
if is_orf_asked(orf=orf, param=param):
121124
out_gff.write(orf.get_gffline())
122125
out_fasta.write(orf.get_fastaline())
126+
out_nucleic.write(orf.get_fastanuc_line())
123127
if orf.suborfs:
124128
for suborf in orf.suborfs:
125129
if is_orf_asked(orf=suborf, param=param):
126130
out_gff.write(suborf.get_gffline())
127131
out_fasta.write(suborf.get_fastaline())
128-
132+
out_nucleic.write(suborf.get_fastanuc_line())
129133

130134
def is_orf_asked(orf=None, param=None):
131135
if 'all' in param.o_include:

0 commit comments

Comments
 (0)