Skip to content

Commit eb030a3

Browse files
committed
wip: finger
1 parent 5e0bdc9 commit eb030a3

8 files changed

Lines changed: 249 additions & 78 deletions

File tree

psyke/__init__.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,12 @@ def plot_fairness(self, dataframe: pd.DataFrame, groups: dict[str, list], colorm
251251
plt.show()
252252

253253
def make_fair(self, features: Iterable[str]):
254-
raise NotImplementedError(f'Fairness for {type(self).__name__} is not supported at the moment')
254+
raise NotImplementedError(f'Fairness for {type(self).__name__} is not currently supported')
255255

256256
def mae(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
257257
n: int = 3) -> float:
258258
"""
259-
Calculates the predictions' MAE w.r.t. the instances given as input.
259+
Calculates the predictions' MAE with respect to the instances given as input.
260260
261261
:param dataframe: the set of instances to be used to calculate the mean absolute error.
262262
:param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
@@ -271,7 +271,7 @@ def mae(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, crit
271271
def mse(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
272272
n: int = 3) -> float:
273273
"""
274-
Calculates the predictions' MSE w.r.t. the instances given as input.
274+
Calculates the predictions' MSE with respect to the instances given as input.
275275
276276
:param dataframe: the set of instances to be used to calculate the mean squared error.
277277
:param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
@@ -286,7 +286,7 @@ def mse(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, crit
286286
def r2(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
287287
n: int = 3) -> float:
288288
"""
289-
Calculates the predictions' R2 score w.r.t. the instances given as input.
289+
Calculates the predictions' R2 score with respect to the instances given as input.
290290
291291
:param dataframe: the set of instances to be used to calculate the R2 score.
292292
:param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
@@ -301,7 +301,7 @@ def r2(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, crite
301301
def accuracy(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
302302
n: int = 3) -> float:
303303
"""
304-
Calculates the predictions' accuracy classification score w.r.t. the instances given as input.
304+
Calculates the predictions' accuracy classification score with respect to the instances given as input.
305305
306306
:param dataframe: the set of instances to be used to calculate the accuracy classification score.
307307
:param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
@@ -317,7 +317,7 @@ def accuracy(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False,
317317
def f1(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
318318
n: int = 3) -> float:
319319
"""
320-
Calculates the predictions' F1 score w.r.t. the instances given as input.
320+
Calculates the predictions' F1 score with respect to the instances given as input.
321321
322322
:param dataframe: the set of instances to be used to calculate the F1 score.
323323
:param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
@@ -398,14 +398,14 @@ def hex(predictor, grid, min_examples: int = 250, threshold: float = 0.1, output
398398
def ginger(predictor, features: Iterable[str], sigmas: Iterable[float], max_slices: int, min_rules: int = 1,
399399
max_poly: int = 1, alpha: float = 0.5, indpb: float = 0.5, tournsize: int = 3, metric: str = 'R2',
400400
n_gen: int = 50, n_pop: int = 50, threshold=None, valid=None, output=Target.REGRESSION,
401-
normalization: dict[str, tuple[float, float]] = None,
401+
discretization=None, normalization: dict[str, tuple[float, float]] = None,
402402
seed: int = get_default_random_seed()) -> Extractor:
403403
"""
404404
Creates a new GInGER extractor.
405405
"""
406406
from psyke.extraction.hypercubic.ginger import GInGER
407407
return GInGER(predictor, features, sigmas, max_slices, min_rules, max_poly, alpha, indpb, tournsize, metric,
408-
n_gen, n_pop, threshold, valid, output, normalization, seed)
408+
n_gen, n_pop, threshold, valid, output, discretization, normalization, seed)
409409

410410
@staticmethod
411411
def gridrex(predictor, grid, min_examples: int = 250, threshold: float = 0.1,

psyke/extraction/hypercubic/ginger/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ class GInGER(HyperCubeExtractor):
2222

2323
def __init__(self, predictor, features, sigmas, max_slices, min_rules=1, max_poly=1, alpha=0.5, indpb=0.5,
2424
tournsize=3, metric='R2', n_gen=50, n_pop=50, threshold=None, valid=None,
25-
output: Target = Target.REGRESSION, normalization=None, seed: int = get_default_random_seed()):
25+
output: Target = Target.REGRESSION, discretization=None, normalization=None,
26+
seed: int = get_default_random_seed()):
2627
super().__init__(predictor, output=Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
27-
normalization=normalization)
28+
discretization=discretization, normalization=normalization)
2829
self.threshold = threshold
2930
np.random.seed(seed)
3031

psyke/extraction/hypercubic/hypercube.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ def barycenter(self) -> Point:
176176

177177
def subcubes(self, cubes: Iterable[GenericCube], only_largest: bool = True) -> Iterable[GenericCube]:
178178
subcubes = [c for c in cubes if c in self and c.output != self.output]
179-
if only_largest:
180-
subsubcubes = [c for cube_list in [c.subcubes(cubes) for c in subcubes] for c in cube_list]
179+
if only_largest and subcubes:
180+
subsubcubes = {sc for c in subcubes for sc in c.subcubes(subcubes)}
181181
subcubes = [c for c in subcubes if c not in subsubcubes]
182182
return subcubes
183183

@@ -247,7 +247,7 @@ def interval_to_value(self, dimension, unscale=None):
247247
if dimension not in self._infinite_dimensions:
248248
return Between(unscale(self[dimension][0], dimension), unscale(self[dimension][1], dimension))
249249
if len(self._infinite_dimensions[dimension]) == 2:
250-
return
250+
return None
251251
if '+' in self._infinite_dimensions[dimension]:
252252
return GreaterThan(unscale(self[dimension][0], dimension))
253253
if '-' in self._infinite_dimensions[dimension]:

psyke/fuzzy/__init__.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
from collections.abc import Iterable
2+
from itertools import product
3+
4+
import numpy as np
5+
import skfuzzy as skf
6+
from matplotlib import pyplot as plt
7+
from sklearn.linear_model import LinearRegression
8+
9+
10+
def generate_membership(var, domain, thresholds, shape='tri'):
11+
th = [var.min()] + [min(max(t, var.min()), var.max()) for t in thresholds] + [var.max()]
12+
13+
if shape == 'tri':
14+
mid = [(x1 + x2) / 2 for x1, x2 in zip(th[:-1], th[1:])]
15+
return [skf.trapmf(domain, [domain.min()] * 2 + mid[:2])] + \
16+
[skf.trimf(domain, [x1, x2, x3]) for x1, x2, x3 in zip(mid[:-2], mid[1:-1], mid[2:])] + \
17+
[skf.trapmf(domain, mid[-2:] + [domain.max()] * 2)]
18+
if shape == 'trap':
19+
beg = [None, domain.min()] + [(3 * x1 + x2) / 4 for x1, x2 in zip(th[1:-1], th[2:])] + [domain.max()]
20+
end = [domain.min()] + [(x1 + 3 * x2) / 4 for x1, x2 in zip(th[:-2], th[1:-1])] + [domain.max()]
21+
return [skf.trapmf(domain, [end[i - 1], beg[i], end[i], beg[i + 1]]) for i in range(1, len(th))]
22+
raise ValueError('Supported shape values are only \'tri\' and \'trap\'')
23+
24+
def extend_domain(x, q_low=0.05, q_high=0.95, p=0.05, k_sigma=2.0, abs_min_margin=0.0):
25+
ql, qh = np.quantile(x, [q_low, q_high])
26+
margin = max(p * (qh - ql), k_sigma * np.std(x), abs_min_margin)
27+
return np.linspace(ql - margin, qh + margin, 200)
28+
29+
def fuzzify(cuts, data, features, feature_to_idx, shape='tri'):
30+
cuts = dict(zip(features, cuts))
31+
domains = {c: extend_domain(data[c]) for c in features}
32+
return {c: (generate_membership(data[c], domains[c], cuts[c], shape), (min(domains[c]), max(domains[c])),
33+
feature_to_idx[c]) for c in features}
34+
35+
def fuzzy_labels(n):
36+
if n < 1 or n > 9:
37+
raise ValueError('n must be between 1 and 9')
38+
if n == 1:
39+
return ["Medium"]
40+
if n == 2:
41+
return ["Low", "High"]
42+
43+
full_scale = ["Extremely Low", "Very Low", "Low", "Slightly Low", "Medium",
44+
"Slightly High", "High", "Very High", "Extremely High"]
45+
indices = np.round(np.linspace(0, len(full_scale) - 1, n)).astype(int)
46+
47+
selected = []
48+
for i in indices:
49+
if full_scale[i] not in selected:
50+
selected.append(full_scale[i])
51+
52+
return selected
53+
54+
def get_activations(x, functions_domains, valid):
55+
levels = [np.array([skf.interp_membership(np.linspace(domain[0], domain[1], 200), mf, x[index]) for mf in mfs])
56+
for mfs, domain, index in functions_domains.values()]
57+
return np.prod(np.meshgrid(*levels, indexing='ij'), axis=0).ravel()[valid]
58+
59+
def crisp_or_equation(lr: float | str | LinearRegression, features=Iterable[str], decimals: int = 3) -> str | float:
60+
if isinstance(lr, LinearRegression):
61+
terms = [f"{c:.{decimals}f}*{f}" for c, f in zip(lr.coef_, features)]
62+
return f"y = {lr.intercept_:.{decimals}f} + " + " + ".join(terms)
63+
return lr
64+
65+
def generate_fuzzy_rules(variables: dict[str, Iterable[str]], outputs: Iterable[str | float | LinearRegression],
66+
features: Iterable[str], valid: Iterable[bool]) -> list[str]:
67+
outputs = [crisp_or_equation(output, features) for output in outputs]
68+
return [f'Output is {output} if {" and ".join(f"{var} is {label}" for var, label in zip(variables.keys(), combo))}'
69+
for combo, output in zip(np.array(list(product(*list(variables.values()))))[valid], outputs)]
70+
71+
def plot_membership(functions_domains):
72+
fig, ax = plt.subplots(nrows=len(functions_domains), figsize=(6, len(functions_domains) * 3))
73+
74+
for i, (k, v) in enumerate(functions_domains.items()):
75+
for s, l in zip(v[0], v[2]):
76+
ax[i].plot(np.linspace(v[1][0], v[1][1], 200), s, linewidth=1.5, label=l)
77+
ax[i].set_title(k)
78+
ax[i].set_xlim(v[1][0], v[1][1])
79+
ax[i].legend()
80+
81+
plt.tight_layout()
82+
plt.show()

psyke/fuzzy/finger/__init__.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import itertools
2+
from collections.abc import Iterable
3+
4+
import numpy as np
5+
import pandas as pd
6+
from deap import base, creator
7+
from sklearn.base import ClassifierMixin
8+
from sklearn.linear_model import LinearRegression
9+
from sklearn.preprocessing import PolynomialFeatures
10+
11+
from psyke import Target
12+
from psyke.fuzzy import fuzzify, plot_membership, fuzzy_labels, generate_fuzzy_rules, get_activations
13+
from psyke.genetic import regions_from_cuts, output_estimation
14+
from psyke.genetic.fgin import FGIn
15+
16+
17+
class FInGER:
18+
19+
def __init__(self, predictor, features, sigmas, max_slices, min_rules=1, max_poly=1, alpha=0.5, indpb=0.5,
20+
tournsize=3, n_gen=50, n_pop=50, membership_shape='trap', metric='R2', valid=None,
21+
output=Target.REGRESSION):
22+
23+
self.predictor = predictor
24+
self.features = features
25+
self.max_features = len(features)
26+
self.sigmas = sigmas
27+
self.max_slices = max_slices
28+
self.min_rules = min_rules
29+
self.poly = max_poly
30+
self._output = Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output
31+
self.valid = valid
32+
self.trained_poly = None
33+
34+
self.alpha = alpha
35+
self.indpb = indpb
36+
self.tournsize = tournsize
37+
self.metric = metric
38+
self.n_gen = n_gen
39+
self.n_pop = n_pop
40+
41+
self.shape = membership_shape
42+
self.valid_masks = None
43+
self.outputs = None
44+
self.functions_domains = {}
45+
46+
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
47+
creator.create("Individual", list, fitness=creator.FitnessMax)
48+
49+
# TODO: a class for methods and attributes supporting polynomial combinations
50+
def __poly_names(self):
51+
return [''.join(['' if pp == 0 else f'{n} * ' if pp == 1 else f'{n}**{pp} * '
52+
for pp, n in zip(p, self.trained_poly.feature_names_in_)])[:-3]
53+
for p in self.trained_poly.powers_]
54+
55+
@staticmethod
56+
def _get_cuts(individual, slices):
57+
boundaries = np.cumsum([0] + list(slices))
58+
return [sorted(individual[boundaries[i]:boundaries[i + 1]]) for i in range(len(slices))]
59+
60+
def extract(self, dataframe: pd.DataFrame) -> str:
61+
best = {}
62+
for poly in range(self.poly):
63+
for slices in list(itertools.product(range(1, self.max_slices + 1), repeat=self.max_features)):
64+
gr = FGIn((dataframe.iloc[:, :-1], dataframe.iloc[:, -1]), self.valid, self.features, self.sigmas,
65+
slices, min_rules=self.min_rules, poly=poly + 1, alpha=self.alpha, indpb=self.indpb,
66+
tournsize=self.tournsize, membership_shape=self.shape, metric=self.metric,
67+
output=self._output, warm=True)
68+
69+
b, score, _, _ = gr.run(n_gen=self.n_gen, n_pop=self.n_pop)
70+
best[(score, poly + 1, slices)] = b
71+
m = min(best)
72+
poly, slices, best = m[1], m[2], best[m]
73+
self.trained_poly = PolynomialFeatures(degree=poly, include_bias=False)
74+
75+
cuts = FInGER._get_cuts(best, slices)
76+
self.functions_domains = fuzzify(cuts, dataframe.iloc[:, :-1], self.features,
77+
{f: i for i, f in enumerate(dataframe.columns[:-1])}, self.shape)
78+
79+
masks = np.array([regions_from_cuts(dataframe, cuts, self.features) == r
80+
for r in range(np.prod([s + 1 for s in slices]))])
81+
self.valid_masks = masks.sum(axis=1) >= 3
82+
masks = masks[self.valid_masks]
83+
84+
self.outputs = np.array([output_estimation(dataframe.iloc[:, :-1], dataframe.iloc[:, -1], self._output,
85+
self.trained_poly, mask) for mask in masks]).T
86+
87+
functions_domains = {k: (v[0], v[1], fuzzy_labels(len(v[0]))) for k, v in self.functions_domains.items()}
88+
return "\n".join(generate_fuzzy_rules({k: v[2] for k, v in functions_domains.items()}, self.outputs,
89+
dataframe.columns[:-1], self.valid_masks))
90+
91+
def show_membership_functions(self):
92+
functions_domains = {k: (v[0], v[1], fuzzy_labels(len(v[0]))) for k, v in self.functions_domains.items()}
93+
plot_membership(functions_domains)
94+
95+
def predict(self, dataframe: pd.DataFrame) -> Iterable:
96+
activations = np.array([get_activations(x, self.functions_domains, self.valid_masks)
97+
for _, x in dataframe.iterrows()])
98+
99+
if self._output == Target.CLASSIFICATION:
100+
classes, idx = np.unique(self.outputs, return_inverse=True)
101+
pred = classes[np.argmax(np.vstack([activations[:, idx == i].sum(axis=1) for i, c in enumerate(classes)]),
102+
axis=0)]
103+
else:
104+
outputs = self.outputs if self._output == Target.CONSTANT else \
105+
np.vstack([lr.predict(self.trained_poly.fit_transform(dataframe)) for lr in self.outputs]).T
106+
pred = (outputs * activations).sum(axis=1)
107+
return np.array(pred)
108+
109+
@property
110+
def n_rules(self):
111+
return len(self.outputs)

psyke/genetic/__init__.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from statistics import mode
2+
import numpy as np
3+
from sklearn.linear_model import LinearRegression
4+
5+
from psyke import Target
6+
7+
8+
def regions_from_cuts(x, cuts, features):
9+
indices = [np.searchsorted(np.array(cut), x[f].to_numpy(), side='right')
10+
for cut, f in zip(cuts, features)]
11+
12+
regions = np.zeros(len(x), dtype=int)
13+
multiplier = 1
14+
for idx, n in zip(reversed(indices), reversed([len(cut) + 1 for cut in cuts])):
15+
regions += idx * multiplier
16+
multiplier *= n
17+
return regions
18+
19+
def output_estimation(x, y, output, poly, mask, to_pred=None):
20+
if output == Target.REGRESSION:
21+
lr = LinearRegression().fit(poly.fit_transform(x)[mask], y[mask])
22+
return lr if to_pred is None else lr.predict(poly.fit_transform(to_pred))
23+
if output == Target.CONSTANT:
24+
return np.mean(y[mask])
25+
if output == Target.CLASSIFICATION:
26+
return mode(y[mask])
27+
raise ValueError('Supported outputs are Target.{REGRESSION, CONSTANT, CLASSIFICATION}')

0 commit comments

Comments
 (0)