@@ -9,28 +9,31 @@ def mapping(gff_data, param):
99
1010 if os .path .exists (param .outfile + '.gff' ):
1111 os .remove (param .outfile + '.gff' )
12- if os .path .exists (param .outfile + '.fa' ):
13- os .remove (param .outfile + '.fa' )
12+ if os .path .exists (param .outfile + '.faa' ):
13+ os .remove (param .outfile + '.faa' )
14+ if os .path .exists (param .outfile + '.fna' ):
15+ os .remove (param .outfile + '.fna' )
1416
1517 with open (param .outfile + '.gff' , "a+" ) as out_gff :
1618 header = '# Input genomic fasta file: {}\n ' .format (os .path .basename (param .fasta_fname ))
1719 header += '# Input gff file: {}\n ' .format (os .path .basename (param .gff_fname ))
1820 out_gff .write (header )
19- with open (param .outfile + '.fa' , "a+" ) as out_fasta :
21+ with open (param .outfile + '.faa' , "a+" ) as out_fasta :
22+ with open (param .outfile + '.fna' , "a+" ) as out_nucleic :
23+ for chr_id in sorted (gff_data ):
24+ logger .info ('Reading chromosome {} ...' .format (chr_id ))
25+ gff_chr = gff_data [chr_id ]
2026
21- for chr_id in sorted (gff_data ):
22- logger .info ('Reading chromosome {} ...' .format (chr_id ))
23- gff_chr = gff_data [chr_id ]
24-
25- logger .info (' - ORF mapping and assignment' )
26- get_orfs (gff_chr = gff_chr , param = param , outfiles = [out_gff , out_fasta ])
27- logger .info ('' )
27+ logger .info (' - ORF mapping and assignment' )
28+ get_orfs (gff_chr = gff_chr , param = param , outfiles = [out_gff , out_fasta , out_nucleic ])
29+ logger .info ('' )
2830
2931
3032def get_orfs (gff_chr , param , outfiles : list ):
3133 max_subsequence_length = 1999998
3234 out_gff = outfiles [0 ]
3335 out_fasta = outfiles [1 ]
36+ out_nucleic = outfiles [2 ]
3437 orf_len = param .orf_len + 6
3538 pos = 0
3639
@@ -57,7 +60,7 @@ def get_orfs(gff_chr, param, outfiles: list):
5760 if end_pos - start_pos + 1 >= orf_len :
5861 orf = build_orf (gff_chr = gff_chr , strand = '+' , frame = frame , coors = (start_pos , end_pos ),
5962 param = param )
60- write_outputs (out_fasta = out_fasta , out_gff = out_gff , orf = orf , param = param )
63+ write_outputs (out_fasta = out_fasta , out_gff = out_gff , out_nucleic = out_nucleic , orf = orf , param = param )
6164
6265 start_pos = end_pos - 2
6366
@@ -69,7 +72,7 @@ def get_orfs(gff_chr, param, outfiles: list):
6972 if end_pos_rev - start_pos_rev + 1 >= orf_len :
7073 orf = build_orf (gff_chr = gff_chr , strand = '-' , frame = frame_rev , coors = (start_pos_rev , end_pos_rev ),
7174 param = param )
72- write_outputs (out_fasta = out_fasta , out_gff = out_gff , orf = orf , param = param )
75+ write_outputs (out_fasta = out_fasta , out_gff = out_gff , out_nucleic = out_nucleic , orf = orf , param = param )
7376
7477 start_pos_rev = end_pos_rev - 2
7578
@@ -80,7 +83,7 @@ def get_orfs(gff_chr, param, outfiles: list):
8083 if end_pos_rev - start_pos_rev + 1 >= orf_len :
8184 orf = build_orf (gff_chr = gff_chr , strand = '-' , frame = frame_rev , coors = (start_pos_rev , end_pos_rev ),
8285 param = param , extremity = True )
83- write_outputs (out_fasta = out_fasta , out_gff = out_gff , orf = orf , param = param )
86+ write_outputs (out_fasta = out_fasta , out_gff = out_gff , out_nucleic = out_nucleic , orf = orf , param = param )
8487
8588
8689def build_orf (gff_chr , strand , frame , coors , param , extremity = False ):
@@ -116,16 +119,17 @@ def build_orf(gff_chr, strand, frame, coors, param, extremity=False):
116119 return orf
117120
118121
119- def write_outputs (out_fasta , out_gff , orf , param ):
122+ def write_outputs (out_fasta , out_gff , out_nucleic , orf , param ):
120123 if is_orf_asked (orf = orf , param = param ):
121124 out_gff .write (orf .get_gffline ())
122125 out_fasta .write (orf .get_fastaline ())
126+ out_nucleic .write (orf .get_fastanuc_line ())
123127 if orf .suborfs :
124128 for suborf in orf .suborfs :
125129 if is_orf_asked (orf = suborf , param = param ):
126130 out_gff .write (suborf .get_gffline ())
127131 out_fasta .write (suborf .get_fastaline ())
128-
132+ out_nucleic . write ( suborf . get_fastanuc_line ())
129133
130134def is_orf_asked (orf = None , param = None ):
131135 if 'all' in param .o_include :
0 commit comments