-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfc.py
More file actions
107 lines (77 loc) · 2.95 KB
/
Copy pathfc.py
File metadata and controls
107 lines (77 loc) · 2.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from ga import GeneticAlgorithm
class FeatureConstructor:
"""Create new features using genetic algorithm."""
def __init__(self, clf, fold, duration=None, max_iter=None, base_included=True):
"""Init method.
Args:
clf : classifier object implementing 'fit'
Classfier used for scoring new features.
fold : int, cross-validation generator or an iterable
Determines the cross-validation splitting strategy,
see also http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html.
duration : int
Determines how many minutes a genetic algorithm runs.
max_iter : int
Determines how many iterations a genetic algorithm runs.
base_included : bool
Determines whether or not the base dataset is included during the evaluation of newly created features.
ga : GeneticAlgorithm
Object used for creating new sets of features.
"""
self.clf = clf
self.fold = fold
self.duration = duration
self.max_iter = max_iter
self.base_included = base_included
self.ga = GeneticAlgorithm(clf, fold, duration, max_iter, base_included)
def fit(self, X, y):
"""Fit estimator.
Args:
X : array-like
The data to fit.
y : array-like
The target variable.
"""
self.ga.fit(X, y)
def get_params(self, ind='best'):
"""Print best or most frequent set of new features.
Args:
ind : string, 'best' or 'most_freq'
Determines which set of features save to a file.
"""
self.ga.get_params(ind)
def save(self, filename, ind='best'):
"""Save the best or most frequent set of features to a file.
Args:
filename : string
ind : string, 'best' or 'most_freq'
Determines which set of features save to a file.
"""
if ind == 'best' or ind == 'most_freq':
self.ga.save(filename, ind)
else:
raise ValueError("ind must be 'best' or 'most_freq'.")
def load(self, filename):
"""Load a set of features from a file.
Args:
filename : string
Returns:
Tuple with a set of features.
"""
return self.ga.load(filename)
def transform(self, X, individual):
"""Transform dataset into new one using created features.
Args:
X : array-like
The data to transform.
individual : tuple
Tuple with a set of features.
Returns:
New dataset, array-like.
"""
return self.ga.transform(X, individual)
def plot(self):
"""Plot data from the genetic algorithm."""
self.ga.plot()