-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFFS algorithm and machine learning model training code.py
More file actions
100 lines (81 loc) · 3.59 KB
/
FFS algorithm and machine learning model training code.py
File metadata and controls
100 lines (81 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
from sklearn.metrics import mean_squared_error, f1_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import Matern
from sklearn.pipeline import Pipeline
# Implement the FFS algorithm
def fractal_feature_selection(X, y, num_blocks=5):
"""
Fractal Feature Selection (FFS) algorithm.
Parameters:
X (array): Feature matrix
y (array): Target variable
num_blocks (int): Number of blocks to divide features into
Returns:
importance (array): Feature importance scores
"""
# Divide features into blocks
blocks = np.array_split(X, num_blocks, axis=1)
# Measure similarity between blocks using RMSE
similarities = []
for i in range(len(blocks)):
for j in range(i+1, len(blocks)):
mse = mean_squared_error(blocks[i], blocks[j])
similarities.append((i, j, mse))
# Determine feature importance based on low RMSE
importance = np.zeros(X.shape[1])
median_mse = np.median([s[2] for s in similarities])
for i, j, mse in similarities:
if mse < median_mse:
importance[i*2:(i+1)*2] += 1
importance[j*2:(j+1)*2] += 1
return importance
# Train a machine learning model using the FFS algorithm
def train_model(X, y):
importance = fractal_feature_selection(X, y)
X_selected = X[:, importance > 0]
X_scaled = StandardScaler().fit_transform(X_selected)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Train a Logistic Regression model
lr_pipeline = Pipeline([
('model', LogisticRegression())
])
lr_param_grid = {
'model__C': [0.1, 1, 10]
}
lr_grid_search = GridSearchCV(lr_pipeline, lr_param_grid, cv=5, scoring='f1_macro')
lr_grid_search.fit(X_train, y_train)
lr_best_model = lr_grid_search.best_estimator_
lr_y_pred = lr_best_model.predict(X_test)
print("F1 score (LR):", f1_score(y_test, lr_y_pred, average='macro'))
# Train a Gaussian Process Classifier
gp_classifier = GaussianProcessClassifier(kernel=Matern(), random_state=42)
gp_classifier.fit(X_train, y_train)
gp_y_pred = gp_classifier.predict(X_test)
print("F1 score (GP):", f1_score(y_test, gp_y_pred, average='macro'))
# Train a Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
rf_y_pred = rf_classifier.predict(X_test)
print("F1 score (RF):", f1_score(y_test, rf_y_pred, average='macro'))
# Train a Support Vector Machine Classifier
svm_classifier = SVC(kernel='rbf', C=1, random_state=42)
svm_classifier.fit(X_train, y_train)
svm_y_pred = svm_classifier.predict(X_test)
print("F1 score (SVM):", f1_score(y_test, svm_y_pred, average='macro'))
# Train a Gradient Boosting Classifier
gb_classifier = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_classifier.fit(X_train, y_train)
gb_y_pred = gb_classifier.predict(X_test)
print("F1 score (GB):", f1_score(y_test, gb_y_pred, average='macro'))
# Example usage
# Generate some sample data
X = np.random.rand(100, 10)
y = np.random.randint(0, 2, 100)
# Train a machine learning model using the FFS algorithm
train_model(X, y)