diff --git a/.github/workflows/test-package-no-mosek.yml b/.github/workflows/test-package-no-mosek.yml index 73c6b68..2d7ef3c 100644 --- a/.github/workflows/test-package-no-mosek.yml +++ b/.github/workflows/test-package-no-mosek.yml @@ -10,7 +10,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 - name: Set up Python ${{matrix.python-version}} diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml index b9c5bbf..1819d0e 100644 --- a/.github/workflows/test-package.yml +++ b/.github/workflows/test-package.yml @@ -10,7 +10,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.12'] + python-version: ['3.13'] steps: - uses: actions/checkout@v2 - name: Set up Python ${{matrix.python-version}} diff --git a/pykoop/_sklearn_metaestimators/metaestimators.py b/pykoop/_sklearn_metaestimators/metaestimators.py index 4e4d8f5..e5b413f 100644 --- a/pykoop/_sklearn_metaestimators/metaestimators.py +++ b/pykoop/_sklearn_metaestimators/metaestimators.py @@ -15,6 +15,7 @@ class to do so. My compromise was to copy and adjust that code for my own uses, """ import abc +from contextlib import suppress from typing import Any, Dict, List import numpy as np @@ -28,10 +29,18 @@ def _get_params(self, attr: str, deep: bool = True) -> Dict[str, Any]: out = super().get_params(deep=deep) if not deep: return out + estimators = getattr(self, attr) - if not hasattr(estimators, '__iter__'): + try: + out.update(estimators) + except (TypeError, ValueError): + # Ignore TypeError for cases where estimators is not a list of + # (name, estimator) and ignore ValueError when the list is not + # formatted correctly. This is to prevent errors when calling + # `set_params`. `BaseEstimator.set_params` calls `get_params` which + # can error for invalid values for `estimators`. return out - out.update(estimators) + for name, estimator in estimators: if hasattr(estimator, "get_params"): for key, value in estimator.get_params(deep=True).items(): @@ -43,14 +52,18 @@ def _set_params(self, attr: str, **params) -> '_BaseComposition': # 1. All steps if attr in params: setattr(self, attr, params.pop(attr)) - # 2. Step replacement + # 2. Replace items with estimators in params items = getattr(self, attr) - names = [] - if hasattr(items, '__iter__'): - names, _ = zip(*items) - for name in list(params.keys()): - if "__" not in name and name in names: - self._replace_estimator(attr, name, params.pop(name)) + if isinstance(items, list) and items: + # Get item names used to identify valid names in params + # `zip` raises a TypeError when `items` does not contains + # elements of length 2 + with suppress(TypeError): + item_names, _ = zip(*items) + for name in list(params.keys()): + if "__" not in name and name in item_names: + self._replace_estimator(attr, name, params.pop(name)) + # 3. Step parameters and other initialisation arguments super().set_params(**params) return self @@ -68,11 +81,11 @@ def _validate_names(self, names: List[str]) -> None: if len(set(names)) != len(names): raise ValueError("Names provided are not unique: {0!r}".format( list(names))) - conflict_names = set(names).intersection(self.get_params(deep=False)) - if conflict_names: + invalid_names = set(names).intersection(self.get_params(deep=False)) + if invalid_names: raise ValueError( "Estimator names conflict with constructor arguments: {0!r}". - format(sorted(conflict_names))) + format(sorted(invalid_names))) invalid_names = [name for name in names if "__" in name] if invalid_names: raise ValueError( diff --git a/pykoop/centers.py b/pykoop/centers.py index 6f9c883..cfe7c8f 100644 --- a/pykoop/centers.py +++ b/pykoop/centers.py @@ -78,7 +78,7 @@ class GridCenters(Centers): >>> grid.fit(X_msd[:, 1:]) # Remove episode feature GridCenters(n_points_per_feature=4) >>> grid.centers_ - array([...]) + array(...) """ def __init__( @@ -155,7 +155,7 @@ class UniformRandomCenters(Centers): >>> rand.fit(X_msd[:, 1:]) # Remove episode feature UniformRandomCenters(n_centers=10) >>> rand.centers_ - array([...]) + array(...) """ def __init__( @@ -235,7 +235,7 @@ class GaussianRandomCenters(Centers): >>> rand.fit(X_msd[:, 1:]) # Remove episode feature GaussianRandomCenters(n_centers=10) >>> rand.centers_ - array([...]) + array(...) """ def __init__( @@ -306,7 +306,7 @@ class QmcCenters(Centers): >>> qmc.fit(X_msd[:, 1:]) # Remove episode feature QmcCenters(n_centers=10) >>> qmc.centers_ - array([...]) + array(...) Generate centers using a Sobol sequence @@ -314,7 +314,7 @@ class QmcCenters(Centers): >>> qmc.fit(X_msd[:, 1:]) # Remove episode feature QmcCenters(n_centers=8, qmc=) >>> qmc.centers_ - array([...]) + array(...) """ def __init__( @@ -430,7 +430,7 @@ class ClusterCenters(Centers): >>> kmeans.fit(X_msd[:, 1:]) # Remove episode feature ClusterCenters(estimator=KMeans(n_clusters=3)) >>> kmeans.centers_ - array([...]) + array(...) """ def __init__( @@ -507,7 +507,7 @@ class GaussianMixtureRandomCenters(Centers): >>> gmm.fit(X_msd[:, 1:]) # Remove episode feature GaussianMixtureRandomCenters(estimator=GaussianMixture(n_components=3)) >>> gmm.centers_ - array([...]) + array(...) """ def __init__( diff --git a/pykoop/kernel_approximation.py b/pykoop/kernel_approximation.py index b5b1772..3a7f04e 100644 --- a/pykoop/kernel_approximation.py +++ b/pykoop/kernel_approximation.py @@ -113,7 +113,7 @@ class RandomFourierKernelApprox(KernelApproximation): >>> ka.fit(X_msd[:, 1:]) # Remove episode feature RandomFourierKernelApprox(n_components=10, random_state=1234) >>> ka.transform(X_msd[:, 1:]) - array([...]) + array(...) """ _ft_lookup = { @@ -267,6 +267,10 @@ def transform(self, X: np.ndarray) -> np.ndarray: """ sklearn.utils.validation.check_is_fitted(self) X = sklearn.utils.validation.check_array(X) + if X.shape[1] != self.n_features_in_: + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") X_scaled = np.sqrt(2 * self.shape) * X products = X_scaled @ self.random_weights_ # (n_samples, n_components) if self.method == 'weight_only': @@ -314,7 +318,7 @@ class RandomBinningKernelApprox(KernelApproximation): >>> ka.fit(X_msd[:, 1:]) # Remove episode feature RandomBinningKernelApprox(n_components=10, random_state=1234) >>> ka.transform(X_msd[:, 1:]) - array([...]) + array(...) """ _ddot_lookup = { @@ -451,6 +455,10 @@ def transform(self, X: np.ndarray) -> np.ndarray: """ sklearn.utils.validation.check_is_fitted(self) X = sklearn.utils.validation.check_array(X) + if X.shape[1] != self.n_features_in_: + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") X_scaled = np.sqrt(2 * self.shape) * X X_hashed = self._hash_samples(X_scaled) Xt = self.encoder_.transform(X_hashed) / np.sqrt(self.n_components) diff --git a/pykoop/koopman_pipeline.py b/pykoop/koopman_pipeline.py index 821c79d..5543bbc 100644 --- a/pykoop/koopman_pipeline.py +++ b/pykoop/koopman_pipeline.py @@ -721,7 +721,7 @@ def fit( } # Validate data X = sklearn.utils.validation.check_array(X, **self._check_array_params) - # Set numbre of input features (including episode feature) + # Set number of input features (including episode feature) self.n_features_in_ = X.shape[1] # Extract episode feature if self.episode_feature_: @@ -751,12 +751,11 @@ def transform(self, X: np.ndarray) -> np.ndarray: X, **self._check_array_params, ) - # Check input shape + # Check number of features if X.shape[1] != self.n_features_in_: - raise ValueError(f'{self.__class__.__name__} `fit()` called ' - f'with {self.n_features_in_} features, but ' - f'`transform()` called with {X.shape[1]} ' - 'features.') + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") return self._apply_transform_or_inverse(X, 'transform') def inverse_transform(self, X: np.ndarray) -> np.ndarray: @@ -996,12 +995,11 @@ def transform(self, X: np.ndarray) -> np.ndarray: X, **self._check_array_params, ) - # Check input shape + # Check number of features if X.shape[1] != self.n_features_in_: - raise ValueError(f'{self.__class__.__name__} `fit()` called ' - f'with {self.n_features_in_} features, but ' - f'`transform()` called with {X.shape[1]} ' - 'features.') + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") return self._apply_transform_or_inverse(X, 'transform') def inverse_transform(self, X: np.ndarray) -> np.ndarray: @@ -1014,12 +1012,11 @@ def inverse_transform(self, X: np.ndarray) -> np.ndarray: X, **self._check_array_params, ) - # Check input shape + # Check number of features if X.shape[1] != self.n_features_out_: - raise ValueError(f'{self.__class__.__name__} `fit()` output ' - f'{self.n_features_out_} features, but ' - '`inverse_transform()` called with ' - f'{X.shape[1]} features.') + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_out_} features as input.") return self._apply_transform_or_inverse(X, 'inverse_transform') def _apply_transform_or_inverse(self, X: np.ndarray, @@ -1270,6 +1267,11 @@ def predict(self, X: np.ndarray) -> np.ndarray: self._validate_feature_names(X) # Validate array X = sklearn.utils.validation.check_array(X, **self._check_array_params) + # Check number of features + if X.shape[1] != self.n_features_in_: + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") # Split episodes episodes = split_episodes(X, episode_feature=self.episode_feature_) # Predict for each episode @@ -1623,8 +1625,13 @@ def _validate_feature_names(self, X: np.ndarray) -> None: if not np.all(_extract_feature_names(X) == self.feature_names_in_): raise ValueError('Input features do not match fit features.') - # Extra estimator tags - # https://scikit-learn.org/stable/developers/develop.html#estimator-tags + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.required = False + tags.target_tags.single_output = False + tags.target_tags.multi_output = True + return tags + def _more_tags(self): return { 'multioutput': True, @@ -1812,12 +1819,11 @@ def transform(self, X: np.ndarray) -> np.ndarray: X, **self._check_array_params, ) - # Check input shape + # Check number of features if X.shape[1] != self.n_features_in_: - raise ValueError(f'{self.__class__.__name__} `fit()` called ' - f'with {self.n_features_in_} features, but ' - f'`transform()` called with {X.shape[1]} ' - 'features.') + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") # Split episodes episodes = split_episodes(X, episode_feature=self.episode_feature_) episodes_state = [] @@ -1873,12 +1879,11 @@ def inverse_transform(self, X: np.ndarray) -> np.ndarray: sklearn.utils.validation.check_is_fitted(self) X = sklearn.utils.validation.check_array( X, **self._check_array_params) - # Check input shape + # Check number of features if X.shape[1] != self.n_features_out_: - raise ValueError(f'{self.__class__.__name__} `fit()` output ' - f'{self.n_features_out_} features, but ' - '`inverse_transform()` called with ' - f'{X.shape[1]} features.') + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_out_} features as input.") # Split episodes episodes = split_episodes(X, episode_feature=self.episode_feature_) episodes_state = [] @@ -2278,12 +2283,11 @@ def transform(self, X: np.ndarray) -> np.ndarray: X, **self._check_array_params, ) - # Check input shape + # Check number of features if X.shape[1] != self.n_features_in_: - raise ValueError(f'{self.__class__.__name__} `fit()` called ' - f'with {self.n_features_in_} features, but ' - f'`transform()` called with {X.shape[1]} ' - 'features.') + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") # Apply lifting functions X_out = X for _, lf in self.lifting_functions_: @@ -2309,12 +2313,11 @@ def inverse_transform(self, X: np.ndarray) -> np.ndarray: X, **self._check_array_params, ) - # Check input shape + # Check number of features if X.shape[1] != self.n_features_out_: - raise ValueError(f'{self.__class__.__name__} `fit()` output ' - f'{self.n_features_out_} features, but ' - '`inverse_transform()` called with ' - f'{X.shape[1]} features.') + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_out_} features as input.") # Apply inverse lifting functions in reverse order X_out = X for _, lf in self.lifting_functions_[::-1]: @@ -2360,6 +2363,11 @@ def predict(self, X: np.ndarray) -> np.ndarray: X, **self._check_array_params, ) + # Check number of features + if X.shape[1] != self.n_features_in_: + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") # Lift data matrix X_trans = self.transform(X) # Predict in lifted space @@ -2405,6 +2413,11 @@ def score(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> float: self._validate_feature_names(X) # Validate input array X = sklearn.utils.validation.check_array(X, **self._check_array_params) + # Check number of features + if X.shape[1] != self.n_features_in_: + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") scorer = KoopmanPipeline.make_scorer() score = scorer(self, X, None) return score diff --git a/pykoop/lifting_functions.py b/pykoop/lifting_functions.py index 05102a3..43c3cb0 100644 --- a/pykoop/lifting_functions.py +++ b/pykoop/lifting_functions.py @@ -72,9 +72,9 @@ class SkLearnLiftingFn(koopman_pipeline.EpisodeIndependentLiftingFn): ['ep', 'StandardScaler(x0)', 'StandardScaler(x1)', 'StandardScaler(u0)'] >>> X_msd_pp = std_scaler.transform(X_msd) >>> np.mean(X_msd_pp[:, 1:], axis=0) - array([...]) + array(...) >>> np.std(X_msd_pp[:, 1:], axis=0) - array([...]) + array(...) """ def __init__( diff --git a/pykoop/lmi_regressors.py b/pykoop/lmi_regressors.py index 9f14175..a5f169c 100644 --- a/pykoop/lmi_regressors.py +++ b/pykoop/lmi_regressors.py @@ -97,6 +97,15 @@ class LmiRegressor(koopman_pipeline.KoopmanRegressor): 'dtype': 'float64', } + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.required = False + tags.target_tags.single_output = False + tags.target_tags.multi_output = True + # Hard to guarantee exact idempotence when calling external solver. + tags.non_deterministic = True + return tags + def _more_tags(self): reason = ('Hard to guarantee exact idempotence when calling external ' 'solver.') @@ -2372,6 +2381,13 @@ def predict(self, X: np.ndarray) -> np.ndarray: sklearn.utils.validation.check_is_fitted(self) return self.hinf_regressor_.predict(X) + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.required = False + tags.target_tags.single_output = False + tags.target_tags.multi_output = True + return tags + def _more_tags(self): return { 'multioutput': True, diff --git a/pykoop/regressors.py b/pykoop/regressors.py index e4cb088..3e8d456 100644 --- a/pykoop/regressors.py +++ b/pykoop/regressors.py @@ -149,7 +149,7 @@ def _fit_regressor(self, X_unshifted: np.ndarray, G = (Theta_p @ Psi.T) / q H = (Psi @ Psi.T) / q self.regressor_.fit(H.T, G.T) - coef = self.regressor_.coef_.T + coef = np.atleast_2d(self.regressor_.coef_).T return coef def _validate_parameters(self) -> None: @@ -395,6 +395,17 @@ def _validate_parameters(self) -> None: if self.mode_type not in valid_mode_types: raise ValueError(f'`mode_type` must be one of {valid_mode_types}') + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.required = False + tags.target_tags.single_output = False + tags.target_tags.multi_output = True + # The `dmd.Dmd` class requires X and y to have the samenumber of + # features. This test does not meet that requirement and must be + # skipped for now. + tags._skip_test = True + return tags + def _more_tags(self): reason = ('The `dmd.Dmd` class requires X and y to have the same ' 'number of features. This test does not meet that ' @@ -501,6 +512,16 @@ def _validate_parameters(self) -> None: # No parameters to validate pass + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.required = False + tags.target_tags.single_output = False + tags.target_tags.multi_output = True + # Allow a bad score since the ``coef_`` matrix will be filled with + # zeros, and we just care to test ``scikit-learn`` API compliance. + tags.regressor_tags.poor_score = True + return tags + def _more_tags(self): return { 'multioutput': True, diff --git a/pykoop/tsvd.py b/pykoop/tsvd.py index 899905a..73e19e3 100644 --- a/pykoop/tsvd.py +++ b/pykoop/tsvd.py @@ -79,7 +79,7 @@ def __init__( >>> tsvd.fit(X_msd) Tsvd() >>> tsvd.singular_values_ - array([...]) + array(...) SVD with cutoff truncation @@ -87,7 +87,7 @@ def __init__( >>> tsvd.fit(X_msd) Tsvd(truncation='cutoff', truncation_param=0.001) >>> tsvd.singular_values_ - array([...]) + array(...) SVD with manual rank truncation @@ -95,7 +95,7 @@ def __init__( >>> tsvd.fit(X_msd) Tsvd(truncation='rank', truncation_param=2) >>> tsvd.singular_values_ - array([...]) + array(...) """ self.truncation = truncation self.truncation_param = truncation_param diff --git a/pykoop/util.py b/pykoop/util.py index ba20e1c..85d8304 100644 --- a/pykoop/util.py +++ b/pykoop/util.py @@ -458,8 +458,8 @@ def example_data_pendulum() -> Dict[str, Any]: X_pend = np.vstack(X_pend_lst) valid_ep = [5, 25, 45] train_ep = list(set(range(n_ep)) - set(valid_ep)) - valid_idx = np.where(np.in1d(X_pend[:, 0], valid_ep))[0] - train_idx = np.where(np.in1d(X_pend[:, 0], train_ep))[0] + valid_idx = np.where(np.isin(X_pend[:, 0], valid_ep))[0] + train_idx = np.where(np.isin(X_pend[:, 0], train_ep))[0] X_train = X_pend[train_idx, :] X_valid = X_pend[valid_idx, :] n_inputs = 0 @@ -536,8 +536,8 @@ def u(t): X_do = np.vstack(X_do_lst) valid_ep = [n_ep - 3, n_ep - 2, n_ep - 1] train_ep = list(set(range(n_ep)) - set(valid_ep)) - valid_idx = np.where(np.in1d(X_do[:, 0], valid_ep))[0] - train_idx = np.where(np.in1d(X_do[:, 0], train_ep))[0] + valid_idx = np.where(np.isin(X_do[:, 0], valid_ep))[0] + train_idx = np.where(np.isin(X_do[:, 0], train_ep))[0] X_train = X_do[train_idx, :] X_valid = X_do[valid_idx, :] n_inputs = 1 diff --git a/requirements.txt b/requirements.txt index 13885b0..ee05d47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Requirements to install ``pykoop`` and run examples numpy>=1.21.0 scipy>=1.8.0 -scikit-learn>=1.2.0 +scikit-learn>=1.2.0, <1.7.0 PICOS>=2.4.0 optht>=0.2.0 Deprecated>=1.2.13 diff --git a/setup.py b/setup.py index 8463441..2ed2fbe 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ install_requires=[ 'numpy>=1.21.0', 'scipy>=1.7.0', - 'scikit-learn>=1.0.0', + 'scikit-learn>=1.2.0, <1.7.0', 'picos>=2.4.0', 'optht>=0.2.0', 'Deprecated>=1.2.13',