-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
104 lines (77 loc) · 2.84 KB
/
app.py
File metadata and controls
104 lines (77 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import pandas as pd
import xgboost as xgb
import sys, os
from flask import Flask, request, render_template
from scipy.stats import zscore
import numpy as np
app = Flask(__name__)
# Load the model from the notebook
xgb_model = xgb.Booster(model_file='best_model.model')
@app.route('/')
def index():
return render_template('index.html')
@app.route('/prediction_result', methods=['GET', 'POST'])
def predict():
if request.method == 'POST':
features = request.form.to_dict()
print(features)
try:
prediction = get_prediction(features)
print(prediction)
return render_template('prediction_result.html', prediction=prediction)
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
return f'Error: {exc_type}\n{fname}\n{exc_tb.tb_lineno}\n{str(e)}'
def preprocess(features):
"""Gets and prints the spreadsheet's header columns
Parameters
----------
features : dict
The values of the features used for the prediction
Returns
-------
z_df
a dataframe containing the input features in the right format and normalized
"""
typed_features = {k: float(v) for k, v in features.items()}
df = pd.DataFrame(typed_features, index=[0])
# adding the last feature thanks to the input data
df['modular ratio / interlinear spacing'] = df['modular ratio'] / df['interlinear spacing']
# Need to apply z-normalization on data as it's been applied on the training data
z_df = zscore(df, axis=1)
return z_df
def get_predicted_class(df):
"""Gets and prints the spreadsheet's header columns
Parameters
----------
df : DataFrame
A dataframe containing the input features in the right format and normalized
Returns
-------
tuple
the first element being a character representing the predicted class, the second being the probability of reliability
"""
# List of class labels
classes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'W', 'X', 'Y']
predictions = xgb_model.predict(xgb.DMatrix(df))[0]
predictions = np.array(predictions)
# Find the index of the class with the highest probability
predicted_class = classes[predictions.argmax()]
probability = predictions.max() * 100
return (predicted_class, round(probability, 2))
def get_prediction(features):
"""Gets and prints the spreadsheet's header columns
Parameters
----------
features : dict
The values of the features used for the prediction
Returns
-------
get_predicted_class(clean_df)
"""
clean_df = preprocess(features)
return get_predicted_class(clean_df)
if __name__ == '__main__':
app.run(debug=True)