-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
137 lines (113 loc) · 4.91 KB
/
main.py
File metadata and controls
137 lines (113 loc) · 4.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Import libraries
import logging
import yaml
import mlflow
import pandas as pd
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
# Import steps
from steps.data_split import DataSplitter
from steps.ingest import Ingestion
from steps.clean import Cleaner
from steps.train import Trainer
from steps.predict import Predictor
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s')
def main():
data_splitter = DataSplitter()
train_data, test_data = data_splitter.split_and_save_data()
logging.info('Data has be splitted')
# Load data
ingestion = Ingestion()
train, test = ingestion.load_data()
logging.info("Data ingestion completed successfully")
# Clean data
cleaner = Cleaner()
train_data = cleaner.clean_data(train)
test_data = cleaner.clean_data(test)
logging.info("Data cleaning completed successfully")
# Prepare and train model
trainer = Trainer()
X_train, y_train = trainer.feature_target_separator(train_data)
trainer.train_model(X_train, y_train)
trainer.save_model()
logging.info("Model training completed successfully")
# Evaluate model
predictor = Predictor()
X_test, y_test = predictor.feature_target_separator(test_data)
accuracy, class_report, roc_auc, conf_matrix = predictor.evaluate_model(X_test, y_test)
logging.info("Model evaluation completed successfully")
# Print evaluation results
print("\n============= Model Evaluation Results ==============")
print(f"Model: {trainer.model_name}")
print(f"Accuracy Score: {accuracy:.4f}, ROC AUC Score: {roc_auc:.4f}")
print(f"\n{class_report}")
print("=====================================================\n")
print(f"Confusion Matrix\n{conf_matrix}")
def main_with_mlflow():
# Load configuration
with open("config.yml", 'r') as file:
config = yaml.safe_load(file)
# Set the MLflow experiment name
mlflow.set_experiment("Model Training Experiment")
with mlflow.start_run() as run:
# Initialize DataSplitter and split data
data_splitter = DataSplitter()
train_data, test_data = data_splitter.split_and_save_data()
logging.info('Data has been splitted successfully')
# Data ingestion
ingestion = Ingestion()
train, test = ingestion.load_data()
logging.info("Data ingestion completed successfully")
# Data cleaning
cleaner = Cleaner()
train_data = cleaner.clean_data(train)
test_data = cleaner.clean_data(test)
logging.info("Data cleaning completed successfully")
# Model training
trainer = Trainer()
X_train, y_train = trainer.feature_target_separator(train_data)
trainer.train_model(X_train, y_train)
trainer.save_model()
logging.info("Model training completed successfully")
# Model evaluation
predictor = Predictor()
X_test, y_test = predictor.feature_target_separator(test_data)
accuracy, class_report, roc_auc_score, conf_matrix = predictor.evaluate_model(X_test, y_test)
report = classification_report(y_test, trainer.pipeline.predict(X_test), output_dict=True)
logging.info("Model evaluation completed successfully")
# MLflow tags
mlflow.set_tag('Model developer', 'OkeyAmy')
mlflow.set_tag('preprocessing', 'LabelEncoder, Standard Scaler,')
# Logging parameters and metrics
model_params = config['model'][5]['params']
mlflow.log_params(model_params)
mlflow.log_metric("accuracy", accuracy)
mlflow.log_metric("roc_auc", roc_auc_score)
mlflow.log_metric("precision", report['weighted avg']['precision'])
mlflow.log_metric("recall", report['weighted avg']['recall'])
# Convert confusion matrix to a DataFrame and save as CSV
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.savefig("confusion_matrix.png")
mlflow.log_artifact("confusion_matrix.png")
mlflow.sklearn.log_model(trainer.pipeline, "model")
# Registering the model
model_name = "Network Traffic Prediction"
model_uri = f"runs:/{run.info.run_id}/model"
mlflow.register_model(model_uri, model_name)
logging.info("MLflow tracking completed successfully")
# Print evaluation results
print("\n============= Model Evaluation Results ==============")
print(f"Model: {trainer.model_name}")
print(f"Accuracy Score: {accuracy:.4f}, ROC AUC Score: {roc_auc_score:.4f}")
print(f"\n{class_report}")
print("=====================================================\n")
print(f"Confusion Matrix:\n{conf_matrix}")
if __name__ == "__main__":
main_with_mlflow()