-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathflareml_train.py
More file actions
146 lines (138 loc) · 6.28 KB
/
flareml_train.py
File metadata and controls
146 lines (138 loc) · 6.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
'''
(c) Copyright 2021
All rights reserved
Programs written by Yasser Abduallah
Department of Computer Science
New Jersey Institute of Technology
University Heights, Newark, NJ 07102, USA
Permission to use, copy, modify, and distribute this
software and its documentation for any purpose and without
fee is hereby granted, provided that this copyright
notice appears in all copies. Programmer(s) makes no
representations about the suitability of this
software for any purpose. It is provided "as is" without
express or implied warranty.
@author: Yasser Abduallah
'''
import numpy as np
import os
import csv
import argparse
import time
from time import sleep
from flareml_utils import *
TRAIN_INPUT = 'data/train_data/flaringar_training_sample.csv'
TEST_INPUT = 'data/test_data/flaringar_simple_random_40.csv'
normalize_data = False
def train_model(args):
if not 'algorithm' in args:
args['algorithm'] ='ENS'
algorithm = args['algorithm']
if not algorithm.strip().upper() in algorithms:
print('Invalid algorithm:', algorithm, '\nAlgorithm must one of: ', algorithms)
sys.exit()
TRAIN_INPUT = args['train_data_file']
if TRAIN_INPUT.strip() == '':
print('Training data file can not be empty')
sys.exit()
if not os.path.exists(TRAIN_INPUT):
print('Training data file does not exist:', TRAIN_INPUT)
sys.exit()
if not os.path.isfile(TRAIN_INPUT):
print('Training data is not a file:', TRAIN_INPUT)
sys.exit()
modelid = args['modelid']
if modelid.strip() == '':
print('Model id can not be empty')
sys.exit()
if modelid.strip().lower() == 'default_model':
ans = input('Using default_model as an id will overwrite the default models. Are you want to want to continue? [n] ')
if not boolean(ans):
print('Existing..')
sys.exit()
if 'normalize_data' in args.keys():
normalize_data = boolean(args['normalize_data'])
else:
normalize_data = False
log('normalize_data:', normalize_data)
if 'verbose' in args.keys():
set_log_to_terminal(boolean(args['verbose']))
else:
verbose = False
log('Your provided arguments as: ', args)
log("=============================== Logging Stared using algorithm: " + algorithm +" ==============================")
log("Execution time started: " + timestr)
log("Log files used in this run: " + logFile)
log("train data set: " + TRAIN_INPUT)
log("Creating a model with id: " + modelid)
print("Starting training with a model with id:", modelid, 'training data file:', TRAIN_INPUT)
print('Loading data set...')
dataset = load_dataset_csv(TRAIN_INPUT)
log("orig cols: " , dataset.columns)
for c in dataset.columns:
if not c in req_columns:
dataset = removeDataColumn(c,dataset)
log("after removal cols: " , dataset.columns)
cols = list(dataset.columns)
if not flares_col_name in cols:
print('The required flares class column:', flares_col_name, ' is not included in the data file')
sys.exit()
dataset['flarecn'] = [convert_class_to_num(c) for c in dataset[flares_col_name]]
log('all columns: ', dataset.columns)
log('\n', dataset.head())
dataset = removeDataColumn(flares_col_name, dataset)
if normalize_data:
log('Normalizing and scaling the data...')
for c in cols:
if not c =='flarecn' and not c== flares_col_name:
dataset[c] = normalize_scale_data(dataset[c].values)
# (train_x, test_x, train_y, test_y) = split_data(dataset)
train_y = dataset['flarecn']
train_x = removeDataColumn('flarecn',dataset)
test_x = None
test_y = None
models_dir = custom_models_dir
printOutput = False
if modelid == 'default_model' :
models_dir = default_models_dir
alg = algorithm.strip().upper()
print('Training is in progress, please wait until it is done...')
t = 33
start_time = datetime.datetime.now()
print('Training started at:', start_time.strftime("%Y-%m-%d %H:%M:%S"))
if alg in ['RF','ENS']:
rf_model = rf_train_model(train_x, test_x, train_y, test_y, model_id=modelid)
if alg =='ENS':
print('Finished 1/3 training..')
t = t + 33
if alg in ['MLP','ENS']:
mlp_model = mlp_train_model(train_x, test_x, train_y, test_y, model_id=modelid)
if alg =='ENS':
print('Finished 2/3 training..')
if alg in ['ELM','ENS']:
elm_model = elm_train_model(train_x, test_x, train_y, test_y,model_id=modelid)
if alg =='ENS':
print('Finished 3/3 training..')
end_time= datetime.datetime.now()
print('Training finished at:', end_time.strftime("%Y-%m-%d %H:%M:%S"))
total_time = end_time - start_time
total_minutes = round(total_time.total_seconds()/ 60,2)
print('Training total time:', total_minutes, 'Minute(s)')
ens = ''
# if alg == 'ENS':
# ens = '(s)'
print('\nFinished training the', alg ,'model' + str(ens)+ ', you may use the flareml_test.py program to make prediction.')
'''
Command line parameters parser
'''
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--train_data_file',default=TRAIN_INPUT, help='full path to a file includes training data to create a model, must be in csv with comma separator')
parser.add_argument('-l', '--logfile', default=logFile, help='full path to a file to write logging information about current execution.')
parser.add_argument('-v', '--verbose', default=False, help='True/False value to include logging information in result json object, note that result will contain a lot of information')
parser.add_argument('-a', '--algorithm', default='ENS', help='Algorithm to use for training. Available algorithms: ENS, RF,MLP, and ELM. \nENS \tthe Ensemble algorithm is the default, RF\t Random Forest algorithm, \nMLP\tMultilayer Perceptron algorithm, \nELM\tExtreme Learning Machine')
parser.add_argument('-m', '--modelid', default='default_model', help='model id to save or load it as a file name. This is to identity each trained model.')
parser.add_argument('-n', '--normalize_data', default=normalize_data, help='Normalize and scale data.')
args, unknown = parser.parse_known_args()
args = vars(args)
if __name__ == "__main__":
train_model(args)