Skip to content

Commit e11084a

Browse files
ENH: Add hyperparameter validation to classifiers (#31)
* ENH: Add hyperparameter validation to REDSVM and adapt tests * ENH: Add hyperparameter validation to SVOREX and adapt tests * ENH: Add hyperparameter validation to NNOP and adapt tests * ENH: Add hyperparameter validation to NNPOM and adapt tests * ENH: Add hyperparameter validation to OrdinalDecomposition and adapt tests * ENH: Enforce strict boolean type check for 'shrinking' hyperparameter
1 parent ac090ee commit e11084a

File tree

10 files changed

+280
-62
lines changed

10 files changed

+280
-62
lines changed

orca_python/classifiers/NNOP.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""Neural Network with Ordered Partitions (NNOP)."""
22

33
import math as math
4+
from numbers import Integral, Real
45

56
import numpy as np
67
import scipy
7-
from sklearn.base import BaseEstimator, ClassifierMixin
8+
from sklearn.base import BaseEstimator, ClassifierMixin, _fit_context
9+
from sklearn.utils._param_validation import Interval
810
from sklearn.utils.multiclass import unique_labels
911
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
1012

@@ -89,12 +91,20 @@ class NNOP(BaseEstimator, ClassifierMixin):
8991
9092
"""
9193

94+
_parameter_constraints: dict = {
95+
"epsilon_init": [Interval(Real, 0.0, None, closed="neither")],
96+
"n_hidden": [Interval(Integral, 1, None, closed="left")],
97+
"max_iter": [Interval(Integral, 1, None, closed="left")],
98+
"lambda_value": [Interval(Real, 0.0, None, closed="neither")],
99+
}
100+
92101
def __init__(self, epsilon_init=0.5, n_hidden=50, max_iter=500, lambda_value=0.01):
93102
self.epsilon_init = epsilon_init
94103
self.n_hidden = n_hidden
95104
self.max_iter = max_iter
96105
self.lambda_value = lambda_value
97106

107+
@_fit_context(prefer_skip_nested_validation=True)
98108
def fit(self, X, y):
99109
"""Fit the model with the training data.
100110

orca_python/classifiers/NNPOM.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""Neural Network based on Proportional Odd Model (NNPOM)."""
22

33
import math as math
4+
from numbers import Integral, Real
45

56
import numpy as np
67
import scipy
7-
from sklearn.base import BaseEstimator, ClassifierMixin
8+
from sklearn.base import BaseEstimator, ClassifierMixin, _fit_context
9+
from sklearn.utils._param_validation import Interval
810
from sklearn.utils.multiclass import unique_labels
911
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
1012

@@ -90,12 +92,20 @@ class NNPOM(BaseEstimator, ClassifierMixin):
9092
9193
"""
9294

95+
_parameter_constraints: dict = {
96+
"epsilon_init": [Interval(Real, 0.0, None, closed="neither")],
97+
"n_hidden": [Interval(Integral, 1, None, closed="left")],
98+
"max_iter": [Interval(Integral, 1, None, closed="left")],
99+
"lambda_value": [Interval(Real, 0.0, None, closed="neither")],
100+
}
101+
93102
def __init__(self, epsilon_init=0.5, n_hidden=50, max_iter=500, lambda_value=0.01):
94103
self.epsilon_init = epsilon_init
95104
self.n_hidden = n_hidden
96105
self.max_iter = max_iter
97106
self.lambda_value = lambda_value
98107

108+
@_fit_context(prefer_skip_nested_validation=True)
99109
def fit(self, X, y):
100110
"""Fit the model with the training data.
101111

orca_python/classifiers/OrdinalDecomposition.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
"""OrdinalDecomposition ensemble."""
22

33
import numpy as np
4-
from sklearn.base import BaseEstimator, ClassifierMixin
4+
from sklearn.base import BaseEstimator, ClassifierMixin, _fit_context
5+
from sklearn.utils._param_validation import StrOptions
56
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
67

78
from orca_python.utilities import load_classifier
89

9-
# from sys import path
10-
# path.append('..')
11-
1210

1311
class OrdinalDecomposition(BaseEstimator, ClassifierMixin):
1412
"""OrdinalDecomposition ensemble classifier.
@@ -88,6 +86,26 @@ class OrdinalDecomposition(BaseEstimator, ClassifierMixin):
8886
8987
"""
9088

89+
_parameter_constraints: dict = {
90+
"dtype": [
91+
StrOptions(
92+
{
93+
"ordered_partitions",
94+
"one_vs_next",
95+
"one_vs_followers",
96+
"one_vs_previous",
97+
}
98+
)
99+
],
100+
"decision_method": [
101+
StrOptions(
102+
{"exponential_loss", "hinge_loss", "logarithmic_loss", "frank_hall"}
103+
)
104+
],
105+
"base_classifier": [str],
106+
"parameters": [dict],
107+
}
108+
91109
def __init__(
92110
self,
93111
dtype="ordered_partitions",
@@ -100,6 +118,7 @@ def __init__(
100118
self.base_classifier = base_classifier
101119
self.parameters = parameters
102120

121+
@_fit_context(prefer_skip_nested_validation=True)
103122
def fit(self, X, y):
104123
"""Fit the model with the training data.
105124

orca_python/classifiers/REDSVM.py

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Reduction from ordinal regression to binary SVM (REDSVM)."""
22

3+
from numbers import Integral, Real
4+
35
import numpy as np
4-
from sklearn.base import BaseEstimator, ClassifierMixin
6+
from sklearn.base import BaseEstimator, ClassifierMixin, _fit_context
7+
from sklearn.utils._param_validation import Interval, StrOptions
58
from sklearn.utils.multiclass import unique_labels
69
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
710

811
from orca_python.classifiers.libsvmRank.python import svm
912

10-
# from .libsvmRank.python import svm
11-
1213

1314
class REDSVM(BaseEstimator, ClassifierMixin):
1415
"""Reduction from ordinal regression to binary SVM classifiers.
@@ -37,13 +38,16 @@ class REDSVM(BaseEstimator, ClassifierMixin):
3738
degree : int, default=3
3839
Set degree in kernel function.
3940
40-
gamma : float, default=1/n_features
41-
Set gamma in kernel function.
41+
gamma : {'scale', 'auto'} or float, default=1.0
42+
Kernel coefficient determining the influence of individual training samples:
43+
- 'scale': 1 / (n_features * X.var())
44+
- 'auto': 1 / n_features
45+
- float: Must be non-negative.
4246
4347
coef0 : float, default=0
4448
Set coef0 in kernel function.
4549
46-
shrinking : int, default=1
50+
shrinking : bool, default=True
4751
Set whether to use the shrinking heuristics.
4852
4953
tol : float, default=0.001
@@ -74,14 +78,42 @@ class REDSVM(BaseEstimator, ClassifierMixin):
7478
7579
"""
7680

81+
_parameter_constraints: dict = {
82+
"C": [Interval(Real, 0.0, None, closed="neither")],
83+
"kernel": [
84+
StrOptions(
85+
{
86+
"linear",
87+
"poly",
88+
"rbf",
89+
"sigmoid",
90+
"stump",
91+
"perceptron",
92+
"laplacian",
93+
"exponential",
94+
"precomputed",
95+
}
96+
)
97+
],
98+
"degree": [Interval(Integral, 0, None, closed="left")],
99+
"gamma": [
100+
StrOptions({"scale", "auto"}),
101+
Interval(Real, 0.0, None, closed="neither"),
102+
],
103+
"coef0": [Interval(Real, None, None, closed="neither")],
104+
"shrinking": ["boolean"],
105+
"tol": [Interval(Real, 0.0, None, closed="neither")],
106+
"cache_size": [Interval(Real, 0.0, None, closed="neither")],
107+
}
108+
77109
def __init__(
78110
self,
79111
C=1,
80112
kernel="rbf",
81113
degree=3,
82-
gamma=None,
114+
gamma="auto",
83115
coef0=0,
84-
shrinking=1,
116+
shrinking=True,
85117
tol=0.001,
86118
cache_size=100,
87119
):
@@ -94,6 +126,7 @@ def __init__(
94126
self.tol = tol
95127
self.cache_size = cache_size
96128

129+
@_fit_context(prefer_skip_nested_validation=True)
97130
def fit(self, X, y):
98131
"""Fit the model with the training data.
99132
@@ -117,14 +150,24 @@ def fit(self, X, y):
117150
If parameters are invalid or data has wrong format.
118151
119152
"""
153+
# Additional strict validation for boolean parameters
154+
if not isinstance(self.shrinking, bool):
155+
raise ValueError(
156+
f"The 'shrinking' parameter must be of type bool. "
157+
f"Got {type(self.shrinking).__name__} instead."
158+
)
159+
120160
# Check that X and y have correct shape
121161
X, y = check_X_y(X, y)
122162
# Store the classes seen during fit
123163
self.classes_ = unique_labels(y)
124164

125-
# Set the default g value if necessary
126-
if self.gamma is None:
127-
self.gamma = 1 / np.size(X, 1)
165+
# Set default gamma value if not specified
166+
gamma_value = self.gamma
167+
if self.gamma == "auto":
168+
gamma_value = 1.0 / X.shape[1]
169+
elif self.gamma == "scale":
170+
gamma_value = 1.0 / (X.shape[1] * X.var())
128171

129172
# Map kernel type
130173
kernel_type_mapping = {
@@ -138,18 +181,18 @@ def fit(self, X, y):
138181
"exponential": 7,
139182
"precomputed": 8,
140183
}
141-
kernel_type = kernel_type_mapping.get(self.kernel, -1)
184+
kernel_type = kernel_type_mapping[self.kernel]
142185

143186
# Fit the model
144187
options = "-s 5 -t {} -d {} -g {} -r {} -c {} -m {} -e {} -h {} -q".format(
145188
str(kernel_type),
146189
str(self.degree),
147-
str(self.gamma),
190+
str(gamma_value),
148191
str(self.coef0),
149192
str(self.C),
150193
str(self.cache_size),
151194
str(self.tol),
152-
str(self.shrinking),
195+
str(1 if self.shrinking else 0),
153196
)
154197
self.model_ = svm.fit(y.tolist(), X.tolist(), options)
155198

@@ -184,6 +227,6 @@ def predict(self, X):
184227
# Input validation
185228
X = check_array(X)
186229

187-
y_pred = svm.predict(X.tolist(), self.model_)
230+
y_pred = np.array(svm.predict(X.tolist(), self.model_))
188231

189232
return y_pred

orca_python/classifiers/SVOREX.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""Support Vector for Ordinal Regression (Explicit constraints) (SVOREX)."""
22

3-
from sklearn.base import BaseEstimator, ClassifierMixin
3+
from numbers import Integral, Real
4+
5+
import numpy as np
6+
from sklearn.base import BaseEstimator, ClassifierMixin, _fit_context
7+
from sklearn.utils._param_validation import Interval, StrOptions
48
from sklearn.utils.multiclass import unique_labels
59
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
610

7-
# from .svorex import svorex
811
from orca_python.classifiers.svorex import svorex
912

1013

@@ -56,13 +59,30 @@ class SVOREX(BaseEstimator, ClassifierMixin):
5659
5760
"""
5861

62+
_parameter_constraints: dict = {
63+
"C": [Interval(Real, 0.0, None, closed="neither")],
64+
"kernel": [
65+
StrOptions(
66+
{
67+
"gaussian",
68+
"linear",
69+
"poly",
70+
}
71+
)
72+
],
73+
"degree": [Interval(Integral, 0, None, closed="left")],
74+
"tol": [Interval(Real, 0.0, None, closed="neither")],
75+
"kappa": [Interval(Real, 0.0, None, closed="neither")],
76+
}
77+
5978
def __init__(self, C=1.0, kernel="gaussian", degree=2, tol=0.001, kappa=1):
6079
self.C = C
6180
self.kernel = kernel
6281
self.degree = degree
6382
self.tol = tol
6483
self.kappa = kappa
6584

85+
@_fit_context(prefer_skip_nested_validation=True)
6686
def fit(self, X, y):
6787
"""Fit the model with the training data.
6888
@@ -135,6 +155,6 @@ def predict(self, X):
135155
# Input validation
136156
X = check_array(X)
137157

138-
y_pred = svorex.predict(X.tolist(), self.model_)
158+
y_pred = np.array(svorex.predict(X.tolist(), self.model_))
139159

140160
return y_pred

orca_python/classifiers/tests/test_nnop.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,36 @@ def y():
2121
@pytest.mark.parametrize(
2222
"param_name, invalid_value",
2323
[
24+
("epsilon_init", 0),
25+
("epsilon_init", -1),
2426
("n_hidden", -1),
2527
("max_iter", -1),
28+
("lambda_value", -1e-5),
2629
],
2730
)
28-
def test_nnop_fit_hyperparameters_validation(X, y, param_name, invalid_value):
29-
"""Test that hyperparameters are validated."""
31+
def test_nnop_hyperparameter_value_validation(X, y, param_name, invalid_value):
32+
"""Test that NNOP raises ValueError for invalid of hyperparameters."""
3033
classifier = NNOP(**{param_name: invalid_value})
31-
model = classifier.fit(X, y)
3234

33-
assert model is None, "The NNOP fit method doesnt return Null on error"
35+
with pytest.raises(ValueError, match=rf"The '{param_name}' parameter.*"):
36+
classifier.fit(X, y)
37+
38+
39+
@pytest.mark.parametrize(
40+
"param_name, invalid_value",
41+
[
42+
("epsilon_init", "high"),
43+
("n_hidden", 5.5),
44+
("max_iter", 2.5),
45+
("lambda_value", "tight"),
46+
],
47+
)
48+
def test_nnop_hyperparameter_type_validation(X, y, param_name, invalid_value):
49+
"""Test that NNOP raises ValueError for invalid types of hyperparameters."""
50+
classifier = NNOP(**{param_name: invalid_value})
51+
52+
with pytest.raises(ValueError, match=rf"The '{param_name}' parameter.*"):
53+
classifier.fit(X, y)
3454

3555

3656
def test_nnop_fit_input_validation(X, y):

0 commit comments

Comments
 (0)