Skip to content

Commit d9ce48a

Browse files
author
Jean
committed
Small changes on evalue variable
1 parent 833c06a commit d9ce48a

1 file changed

Lines changed: 12 additions & 6 deletions

File tree

Integron_Finder.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/usr/bin/env python
22

3+
"""
4+
integron_finder is a program that looks for integron in DNA sequences.
5+
"""
6+
37
import numpy as np
48
import pandas as pd
59
from Bio import SeqIO
@@ -93,7 +97,9 @@ def add_promoter(self):
9397
Function that looks for known promoters if they exists within your integrons element.
9498
It takes 1s for about 13kb.
9599
"""
100+
96101
dist_prom = 500 # pb distance from edge of the element for which we seek promoter
102+
97103
######## Promoter of integrase #########
98104

99105
if self.has_integrase():
@@ -1002,15 +1008,15 @@ def find_integrase(name, in_dir = ".", out_dir = "."):
10021008
PROT_file])
10031009

10041010

1005-
def find_resfams(name, in_dir=".", out_dir=".", hmm_file="Resfams.hmm"):
1011+
def find_resfams(name, in_dir=".", out_dir=".", evalue=10, hmm_file="Resfams.hmm"):
10061012
"""
10071013
Call hmmmer to annotate antibiotique resistance gene with the model from Resfams (Gibson et al, ISME J., 2014)
10081014
"""
10091015
call([HMMSEARCH, "--cpu", N_CPU, "--tblout", out_dir + name + "_atb_table.res",
10101016
"-o", out_dir + name + "_atb.res", MODEL_DIR + hmm_file,
10111017
PROT_file])
10121018

1013-
def read_hmm(infile):
1019+
def read_hmm(infile, evalue=1):
10141020
"""
10151021
Function that parse hmmer --tblout output and returns a pandas DataFrame
10161022
"""
@@ -1022,7 +1028,7 @@ def read_hmm(infile):
10221028
if not args.gembase:
10231029
df = pd.read_table(infile, sep="\s*", engine="python", header=None, skipfooter=10, skiprows=3)
10241030
df = df[[2,3,0,23,19,21,4]]
1025-
df = df[df[4] < 10]
1031+
df = df[df[4] < evalue]
10261032
df["Accession_number"] = name
10271033
c = df.columns.tolist()
10281034
df = df[c[-1:] + c[:-1]]
@@ -1035,7 +1041,7 @@ def read_hmm(infile):
10351041
df = pd.DataFrame(df_tmp[0].str.split().tolist())
10361042
df = df[[2,3,0,18,21,22,4]]
10371043
df[[21,22,4]] = df[[21,22,4]].astype("float")
1038-
df = df[df[4] < 10]
1044+
df = df[df[4] < evalue]
10391045
df["Accession_number"] = name
10401046
c = df.columns.tolist()
10411047
df = df[c[-1:] + c[:-1]]
@@ -1374,10 +1380,10 @@ def to_gbk(df, sequence):
13741380
for i in integrons:
13751381
if i.type() != "In0": # complete & attC0
13761382
i.add_proteins()
1383+
1384+
if i.type() == "complete":
13771385
i.add_promoter()
13781386
i.add_attI()
1379-
if i.type() == "complete":
1380-
13811387
i.draw_integron(file=out_dir + name + "_" + str(j) + ".pdf")
13821388
j += 1
13831389
if i.type() == "In0":

0 commit comments

Comments
 (0)