11#!/usr/bin/env python
22
3+ """
4+ integron_finder is a program that looks for integron in DNA sequences.
5+ """
6+
37import numpy as np
48import pandas as pd
59from Bio import SeqIO
@@ -93,7 +97,9 @@ def add_promoter(self):
9397 Function that looks for known promoters if they exists within your integrons element.
9498 It takes 1s for about 13kb.
9599 """
100+
96101 dist_prom = 500 # pb distance from edge of the element for which we seek promoter
102+
97103 ######## Promoter of integrase #########
98104
99105 if self .has_integrase ():
@@ -1002,15 +1008,15 @@ def find_integrase(name, in_dir = ".", out_dir = "."):
10021008 PROT_file ])
10031009
10041010
1005- def find_resfams (name , in_dir = "." , out_dir = "." , hmm_file = "Resfams.hmm" ):
1011+ def find_resfams (name , in_dir = "." , out_dir = "." , evalue = 10 , hmm_file = "Resfams.hmm" ):
10061012 """
10071013 Call hmmmer to annotate antibiotique resistance gene with the model from Resfams (Gibson et al, ISME J., 2014)
10081014 """
10091015 call ([HMMSEARCH , "--cpu" , N_CPU , "--tblout" , out_dir + name + "_atb_table.res" ,
10101016 "-o" , out_dir + name + "_atb.res" , MODEL_DIR + hmm_file ,
10111017 PROT_file ])
10121018
1013- def read_hmm (infile ):
1019+ def read_hmm (infile , evalue = 1 ):
10141020 """
10151021 Function that parse hmmer --tblout output and returns a pandas DataFrame
10161022 """
@@ -1022,7 +1028,7 @@ def read_hmm(infile):
10221028 if not args .gembase :
10231029 df = pd .read_table (infile , sep = "\s*" , engine = "python" , header = None , skipfooter = 10 , skiprows = 3 )
10241030 df = df [[2 ,3 ,0 ,23 ,19 ,21 ,4 ]]
1025- df = df [df [4 ] < 10 ]
1031+ df = df [df [4 ] < evalue ]
10261032 df ["Accession_number" ] = name
10271033 c = df .columns .tolist ()
10281034 df = df [c [- 1 :] + c [:- 1 ]]
@@ -1035,7 +1041,7 @@ def read_hmm(infile):
10351041 df = pd .DataFrame (df_tmp [0 ].str .split ().tolist ())
10361042 df = df [[2 ,3 ,0 ,18 ,21 ,22 ,4 ]]
10371043 df [[21 ,22 ,4 ]] = df [[21 ,22 ,4 ]].astype ("float" )
1038- df = df [df [4 ] < 10 ]
1044+ df = df [df [4 ] < evalue ]
10391045 df ["Accession_number" ] = name
10401046 c = df .columns .tolist ()
10411047 df = df [c [- 1 :] + c [:- 1 ]]
@@ -1374,10 +1380,10 @@ def to_gbk(df, sequence):
13741380 for i in integrons :
13751381 if i .type () != "In0" : # complete & attC0
13761382 i .add_proteins ()
1383+
1384+ if i .type () == "complete" :
13771385 i .add_promoter ()
13781386 i .add_attI ()
1379- if i .type () == "complete" :
1380-
13811387 i .draw_integron (file = out_dir + name + "_" + str (j ) + ".pdf" )
13821388 j += 1
13831389 if i .type () == "In0" :
0 commit comments