forked from arkatebi/CAFA-Toolset
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathArgParser_Benchmark.py
More file actions
executable file
·198 lines (185 loc) · 8.88 KB
/
ArgParser_Benchmark.py
File metadata and controls
executable file
·198 lines (185 loc) · 8.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python
'''
The entry point of this script is parse_args() method which calls
other methods to collect user supplied arguments, parses and
verifies them, and at the end returns those arguments as a dictionary.
Description of these methods are the following:
parse_args:
This method calls the methods described below and returns the final
dictionary containing the user supplied arguments to the Benchmark
or Verify program at the calling point.
collect_args:
This method collects the user supplied arguments. The method takes
a string argument which can take two string values 'benchmark' or
'verify'. The string value 'benchmark' indicates that the method is
invoked by Benchmark program and 'verify' indicates that the
method is invoked by Verify program. When this method is invoked
by Benchmark program, the method gives user an option to provide
an output file name. On the other hand, when the method is invoked
by Verify program, the method accepts a mandatory benchmark file
name as the user argument.
extract_args:
This method puts the user supplied arguments into an ordered
dictionary which it returns at the end.
check_args:
This method verifies the correctness of the user supplied
arguments and puts them into an ordered dictionary which the method
returns at the end.
'''
import os
import sys
import argparse
import re
from collections import OrderedDict
def collect_args(prog='benchmark'):
"""
This method collects the user supplied arguments and returns
them at the end.
"""
if prog == 'benchmark':
parser = argparse.ArgumentParser(description='Creates benchmark ' + \
'sets from two annotation files at two time points')
elif prog == 'verify':
parser = argparse.ArgumentParser(description='Verifies benchmark ' + \
'sets generated by Benchrmark Creation Tool ')
parser.add_argument('-I1', '--input1', help='Specifies path to the ' + \
'first input file. This opton is mandatory.')
parser.add_argument('-I2', '--input2', help='Specifies path to the ' + \
'second input file. This option is mandatory.')
if prog == 'benchmark':
parser.add_argument('-O', '--output', default='', help='Provides ' + \
'user an option to specify an output filename.')
elif prog == 'verify':
parser.add_argument('-I3', '--input3', help='Specifies path to ' + \
'one of the SIX benchmark files. This option is mandaroty.')
parser.add_argument('-G','--organism',nargs='*', default=['all'],help= \
'Provides user a choice to specify a set of organisms ' + \
'(example:Saccharomyces cerevisiae or 7227) separated ' + \
'by space. Default is all.')
parser.add_argument('-N','--ontology',nargs='*', default=['all'],help= \
'Provides user a choice to specify a set of ' + \
'ontologies (F, P, C) separated by space. ' + \
'Default is all.')
parser.add_argument('-V','--evidence',nargs='*', default=['all'],help= \
'Provides user a choice to specify a set of GO ' + \
'experimental evidence codes (example: IPI, IDA, ' + \
'EXP) separated by space. Default is all.')
parser.add_argument('-S', '--source',action='store', nargs='*',default=\
['all'],help='Provides user a choice to specify ' + \
'sources (example: UniProt, InterPro) separated ' + \
'by spaces. Default is all.')
parser.add_argument('-C', '--confidence',default='F',help='Allows ' + \
'user to turn on the annotation confidence filter. ' + \
'If turned on, GO terms assignments to proteins that ' + \
'are documented in few papers (4 or less by default) ' + \
'will not be considered part of the benchmark set. By ' + \
'default, it is turned off.')
parser.add_argument('-T', '--threshold',type=int, default=4,help= \
'Allows users to specify a threshold for the minimum ' + \
'number of papers to be used for having a confident ' + \
'annotation. If not specified, defaults to a value of 4.')
parser.add_argument('-P', '--pubmed',default='F',help='Allows user to ' + \
'turn on the pubmed filter. If turned on, GO terms ' + \
'w/o any Pubmed references will not be considered ' + \
'part of the benchmark set. By default, it is ' + \
'turned off.')
parser.add_argument('-B', '--blacklist', nargs='*',default=[], help= \
'This parameter can take in a list of pubmed ids. ' + \
'All GO terms and proteins annotated in them will ' + \
'be eliminated from the benchmark set. Default is ' + \
'an empty list.')
return parser
def extract_args(args, prog):
"""
This method builds a dictionary from the user supplied arguments
and returns the constructed dictionary at the end.
"""
args_dict = OrderedDict()
args_dict['t1'] = args.input1
args_dict['t2'] = args.input2
if prog == 'benchmark':
args_dict['outfile'] = args.output # Default: ''
elif prog == 'verify':
args_dict['t3'] = args.input3
args_dict['Taxon_ID'] = args.organism # Default: 'all'
args_dict['Aspect'] = args.ontology # Default: 'all'
args_dict['Evidence'] = args.evidence # Default: 'all'
args_dict['Assigned_By'] = args.source # Default is 'all'
args_dict['Confidence'] = args.confidence # Default: 'F'
args_dict['Threshold'] = args.threshold # Default: 4
args_dict['Pubmed'] = args.pubmed # Default: 'F'
args_dict['Blacklist'] = args.blacklist # Default: []
return args_dict
def check_args(args_dict, parser):
"""
This method checks the consistency of the user arguments. It builds
a new ordered dictionary of the input arguments and returns the
created dictionary at the end.
"""
user_dict = OrderedDict()
for arg in args_dict:
if arg == 't1':
if args_dict[arg] == None:
print 'Missing input file at time t1\n'
print parser.parse_args(['--help'])
else:
user_dict['t1'] = args_dict[arg]
elif arg == 't2':
if args_dict[arg] == None:
print 'Missing input file at time t2\n'
print parser.parse_args(['--help'])
else:
user_dict['t2'] = args_dict[arg]
elif arg == 't3':
if args_dict[arg] == None:
print 'Missing the benchmark file name\n'
print parser.parse_args(['--help'])
else:
user_dict['t3'] = args_dict[arg]
elif arg == 'outfile':
user_dict[arg] = args_dict[arg]
elif arg == 'Threshold':
user_dict[arg] = args_dict[arg]
elif arg == 'Confidence':
user_dict[arg] = args_dict[arg]
elif arg == 'Pubmed':
user_dict[arg] = args_dict[arg]
elif arg == 'Taxon_ID':
if 'all' in args_dict[arg] or len(args_dict[arg]) == 0:
user_dict[arg] = set([])
else:
args_dict[arg] = [x.capitalize() for x in args_dict[arg]]
user_dict[arg] = set(args_dict[arg])
else: # Aspect, Evidence, Assigned_By, Blacklist
if 'all' in args_dict[arg] or len(args_dict[arg]) == 0:
user_dict[arg] = set([])
else:
args_dict[arg] = [x.upper() for x in args_dict[arg]]
user_dict[arg] = set(args_dict[arg])
return user_dict
def parse_args(prog='benchmark'):
"""
This is the entry point for the other methods in this module. It
1. invokes collect_args to collect user arguments
2. puts those arguments into a dictionary by calling extract_args method
3. checks the consistency of those arguments by invoking check_args which
returns a dictionary of correct arguments
4. returns the dictionary at the end.
"""
# Collect user supplied argument values:
parser = collect_args(prog)
args_dict = {}
args, unknown = parser.parse_known_args()
if len(unknown) > 0:
print '\n*********************************'
print "Invalid Arguments"
print '*********************************\n'
# Shows help messages and quits:
print parser.parse_args(['--help'])
args_dict = extract_args(args, prog)
user_dict = check_args(args_dict, parser)
return user_dict
if __name__ == '__main__':
print (sys.argv[0] + ':')
print (__doc__)
sys.exit(0)