-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathflareml_test.py
More file actions
181 lines (152 loc) · 6.97 KB
/
flareml_test.py
File metadata and controls
181 lines (152 loc) · 6.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
'''
(c) Copyright 2021
All rights reserved
Programs written by Yasser Abduallah
Department of Computer Science
New Jersey Institute of Technology
University Heights, Newark, NJ 07102, USA
Permission to use, copy, modify, and distribute this
software and its documentation for any purpose and without
fee is hereby granted, provided that this copyright
notice appears in all copies. Programmer(s) makes no
representations about the suitability of this
software for any purpose. It is provided "as is" without
express or implied warranty.
@author: Yasser Abduallah
'''
import numpy as np
import os
import csv
from datetime import datetime
import argparse
import time
from sklearn.metrics import confusion_matrix
from flareml_utils import *
TEST_INPUT = 'data/test_data/flaringar_simple_random_40.csv'
normalize_data = False
def test_model(args):
pm = {}
if not 'algorithm' in args.keys():
args['algorithm'] = 'ENS'
algorithm = args['algorithm']
if not algorithm.strip().upper() in algorithms:
print('Invalid algorithm:', algorithm, '\nAlgorithm must one of: ', algorithms)
sys.exit()
TEST_INPUT = args['test_data_file']
if TEST_INPUT.strip() == '':
print('Testing data file can not be empty')
sys.exit()
if not os.path.exists(TEST_INPUT):
print('Testing data file does not exist:', TEST_INPUT)
sys.exit()
if not os.path.isfile(TEST_INPUT):
print('Testing data is not a file:', TEST_INPUT)
sys.exit()
modelid = args['modelid']
if modelid.strip() == '':
print('Model id can not be empty')
sys.exit()
verbose = False;
if 'verbose' in args:
verbose = boolean(args['verbose'])
set_log_to_terminal(verbose)
log('Your provided arguments as: ', args)
models_dir = custom_models_dir
alg = algorithm.strip().upper()
printOutput = False
default_id_message = ' or use the default model id.'
if modelid == 'default_model' :
models_dir = default_models_dir
default_id_message = '.'
exists = are_model_files_exist(models_dir , modelid, alg=alg)
log('model exists:', exists, 'for:', modelid, 'and', alg)
log('partial_ens_trained:', get_partial_ens_trained())
if not exists:
if not get_partial_ens_trained():
log("\nModel id", modelid," does not exist for algorithm " + alg + "." + '\nPlease make sure to run training task with this id first' + default_id_message, logToTerminal=True, no_time=True)
sys.exit()
log("=============================== Logging Stared using algorithm: " + algorithm +" ==============================")
log("Execution time started: " + timestr)
log("Log files used in this run: " + logFile)
log("train data set: " + TEST_INPUT)
log("Creating a model with id: " + modelid)
print("Starting testing with a model with id:", modelid, 'testing data file:', TEST_INPUT)
print('Loading data set...')
dataset = load_dataset_csv(TEST_INPUT)
log("orig cols: " + dataset.columns)
for c in dataset.columns:
if not c in req_columns:
dataset = removeDataColumn(c,dataset)
log("after removal cols: " , dataset.columns)
print('Done loading data...')
cols = list(dataset.columns)
if not flares_col_name in cols:
print('The required flares class column:', flares_col_name, ' is not included in the data file')
sys.exit()
print('Formatting and mapping the flares classes..')
dataset['flarecn'] = [convert_class_to_num(c) for c in dataset[flares_col_name]]
log('all columns: ', dataset.columns)
log('\n', dataset.head())
flares_names = list (dataset[flares_col_name].values)
dataset = removeDataColumn(flares_col_name, dataset)
log("after removal cols: " + dataset.columns)
cols = list(dataset.columns)
if normalize_data:
log('Normalizing and scaling the data...')
for c in cols:
if not c =='flarecn':
dataset[c] = normalize_scale_data(dataset[c].values)
test_y = dataset['flarecn']
test_x = removeDataColumn('flarecn',dataset)
print('Prediction is in progress, please wait until it is done...')
true_y = [mapping[y] for y in test_y]
if alg in ['RF','ENS']:
rf_result = model_prediction_wrapper('RF',None, test_x, test_y, model_id = modelid)
if alg in ['MLP','ENS']:
mlp_result = model_prediction_wrapper('MLP',None, test_x, test_y, model_id = modelid)
if alg in ['ELM','ENS']:
elm_result = model_prediction_wrapper('ELM',None, test_x, test_y, model_id = modelid)
if alg == 'ENS':
result = compute_ens_result(rf_result, mlp_result, elm_result)
pm ['ENS'] = log_cv_report(true_y,result)
rf_result = map_prediction(rf_result)
pm['RF'] = log_cv_report(true_y,rf_result)
mlp_result = map_prediction(mlp_result)
pm['MLP'] = log_cv_report(true_y,mlp_result)
elm_result = map_prediction(elm_result)
pm['ELM'] = log_cv_report(true_y,elm_result)
pm = check_pm_precision('ENS','RF','B',pm)
pm = check_pm_precision('ENS','RF','C',pm)
pm = check_pm_precision('ENS','RF','X',pm)
elif alg == 'RF':
result = map_prediction(rf_result)
pm['RF'] = log_cv_report(true_y,result)
elif alg == 'MLP':
result = map_prediction(mlp_result)
pm['MLP'] = log_cv_report(true_y,result)
else:
result = map_prediction(elm_result)
pm['ELM'] = log_cv_report(true_y,result)
log_cv_report(true_y,result)
print('Finished the prediction task..')
# return pm
res = {}
res[alg] = pm[alg]
res["alg"] = alg
res['result'] = pm
return res
'''
Command line parameters parser
'''
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--test_data_file',default=TEST_INPUT, help='full path to a file includes test data to test/predict using a trained model, must be in csv with comma separator')
parser.add_argument('-l', '--logfile', default=logFile, help='full path to a file to write logging information about current execution.')
parser.add_argument('-v', '--verbose', default=False, help='True/False value to include logging information in result json object, note that result will contain a lot of information')
parser.add_argument('-a', '--algorithm', default='ENS', help='Algorithm to use for training. Available algorithms: ENS, RF, MLP, and ELM. \nENS \tthe Ensemble algorithm is the default, RF Random Forest algorithm, \nMLP\tMultilayer Perceptron algorithm, \nELM\tExtreme Learning Machine.')
parser.add_argument('-m', '--modelid', default='default_model', help='model id to save or load it as a file name. This is to identity each trained model.')
parser.add_argument('-n', '--normalize_data', default=normalize_data, help='Normalize and scale data.')
args, unknown = parser.parse_known_args()
args = vars(args)
if __name__ == "__main__":
pm=test_model(args)
plot_result(pm)