-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtrain.py
More file actions
88 lines (74 loc) · 3.21 KB
/
Copy pathtrain.py
File metadata and controls
88 lines (74 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import sys
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
import pickle
# Train per-resource model for memory subsystem contention
# nf: network function
#
# df: dataframe of training data.
#
# model_name: the model to train
# - linear: linear regression
# - slomo: slomo mem model (gradient boosting regression)
# - yala: yala mem model (gradient boosting regression)
#
# model_path: path to store the trained models
def train_mem(nf, df, model_name="linear", model_path="models.pkl"):
try:
models = pickle.load(open(model_path, 'rb'))
except:
models = {}
# models
if model_name == "linear":
model = LinearRegression()
x_train = df[['ipc', 'irt', 'l2crd', 'l2cwr', 'memrd', 'memwr', 'wss']]
elif model_name == "slomo":
model = GradientBoostingRegressor()
x_train = df[['ipc', 'irt', 'l2crd', 'l2cwr', 'memrd', 'memwr', 'wss']]
elif model_name == "yala_mem":
model = GradientBoostingRegressor()
x_train = df[['pktsize','flowsize','ipc', 'irt', 'l2crd', 'l2cwr', 'memrd', 'memwr', 'wss']]
else:
print("Invalid model name")
return
y_train = df['perf']
if nf not in models.keys():
models[nf] = {}
if model_name not in models[nf]:
print(f"Training {model_name} model for {nf} ...")
model.fit(x_train, y_train)
models[nf][model_name] = model
print("Dumping models ...")
pickle.dump(models, open(model_path, 'wb'))
return 1
# Train per-resource model for regex accelerator contention
def train_reg(nf, df, model_path="models.pkl"):
try:
models = pickle.load(open(model_path, 'rb'))
except:
models = {}
if "yala_reg" not in models[nf]:
print(f"Training yala_reg model for {nf} ...")
T0 = LinearRegression()
# fit solo performance of regex part
T0.fit(df[['mtbr']],df['perf_reverse'])
models[nf]["yala_reg"] = T0
print("Dumping models ...")
pickle.dump(models, open(model_path, 'wb'))
if __name__ == "__main__":
nf = "flowmon"
path = f"../profile/{nf}/sample_slomo_train"
df = pd.read_csv(path, names=['flowsize','pktsize','ipc', 'irt', 'l2crd', 'l2cwr', 'memrd', 'memwr', 'wss', 'perf'], sep=' ')
print(f"Total {str(len(df))} group(s) of data for training linear/slomo")
train_mem(nf, df, model_name="linear", model_path="models.pkl")
train_mem(nf, df, model_name="slomo", model_path="models.pkl")
path = f"../profile/{nf}/sample_yala_mem_train"
df = pd.read_csv(path, names=['flowsize','pktsize','ipc', 'irt', 'l2crd', 'l2cwr', 'memrd', 'memwr', 'wss', 'perf'], sep=' ')
print(f"Total {str(len(df))} group(s) of data for training per-resource model regarding memory subsystem contention in yala")
train_mem(nf, df, model_name="yala_mem", model_path="models.pkl")
path = f"../profile/{nf}/sample_yala_reg_train"
df = pd.read_csv(path, names=['mtbr', 'perf_reverse'], sep=' ')
print(f"Total {str(len(df))} group(s) of data for training per-resource model regarding regex accelerator contention in yala")
train_reg(nf, df, model_path="models.pkl")