Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ Changes:
membership thresholds. A sample is predicted as a member when its observed score
exceeds the predicted threshold at quantile level (1 - alpha). No shadow models or
architecture knowledge required. Registered in the attack factory as `"qmia"`.
* Refactor: move `unwrap_model` from `InstanceBasedAttack` to `sacroml.attacks.utils`
so it can be reused by other attacks that need to split a scikit-learn `Pipeline`
into its final estimator and preprocessing stages
([#455](https://github.com/AI-SDC/SACRO-ML/issues/455)).

## Version 1.4.3 (Jan 29, 2026)

Expand Down
27 changes: 2 additions & 25 deletions sacroml/attacks/instance_based_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,13 @@

import numpy as np
from fpdf import FPDF
from sklearn.base import BaseEstimator
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR, NuSVC, NuSVR, OneClassSVM

try:
from sklearn.pipeline import Pipeline
except ImportError: # pragma: no cover
Pipeline = None

from sacroml.attacks import report
from sacroml.attacks.attack import Attack
from sacroml.attacks.target import Target
from sacroml.attacks.utils import unwrap_model

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -154,24 +149,6 @@ def attackable(cls, target: Target) -> bool:
return False
return True

@staticmethod
def _unwrap_model(model: BaseEstimator) -> tuple[BaseEstimator, Pipeline | None]:
"""Extract the final estimator and preprocessor from a Pipeline.

Returns
-------
tuple
(final_estimator, preprocessor_pipeline_or_None)
If the model is a Pipeline with preprocessing steps, returns
a Pipeline of just the preprocessing steps so X_train can be
transformed to the same space as the stored instances.
"""
if Pipeline is not None and isinstance(model, Pipeline):
final_estimator = model.steps[-1][1]
preprocessor = Pipeline(model.steps[:-1]) if len(model.steps) > 1 else None
return final_estimator, preprocessor
return model, None

def _compare_instances(
self,
stored_instances: np.ndarray,
Expand Down Expand Up @@ -289,7 +266,7 @@ def _attack(self, target: Target) -> dict:
dict
Attack report dictionary.
"""
raw_model, preprocessor = self._unwrap_model(target.model.model)
raw_model, preprocessor = unwrap_model(target.model.model)
model_type = type(raw_model).__name__

is_svm = isinstance(raw_model, SVM_TYPES)
Expand Down
33 changes: 33 additions & 0 deletions sacroml/attacks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np
from scipy.stats import shapiro
from sklearn.base import BaseEstimator
from sklearn.pipeline import Pipeline

from sacroml.attacks.model import Model
from sacroml.attacks.target import Target
Expand Down Expand Up @@ -278,3 +279,35 @@ def get_class_by_name(class_path: str) -> type[object]:
module_path, class_name = class_path.rsplit(".", 1)
module = importlib.import_module(module_path)
return getattr(module, class_name)


def unwrap_model(
model: BaseEstimator,
) -> tuple[BaseEstimator, Pipeline | None]:
"""Extract the final estimator and preprocessor from a scikit-learn model.

If ``model`` is a :class:`sklearn.pipeline.Pipeline`, the final step is
returned as the estimator and a new ``Pipeline`` containing the remaining
earlier steps is returned as the preprocessor. This allows callers to
transform inputs into the same feature space the final estimator was
fitted on. If the Pipeline has only one step, no preprocessor exists and
``None`` is returned in its place. Non-Pipeline models are returned
unchanged with ``None`` as the preprocessor.

Parameters
----------
model : BaseEstimator
A fitted scikit-learn estimator, optionally wrapped in a ``Pipeline``.

Returns
-------
tuple[BaseEstimator, Pipeline | None]
``(final_estimator, preprocessor)`` where ``preprocessor`` is a
Pipeline of all steps except the last, or ``None`` if the input is
not a Pipeline or is a single-step Pipeline.
"""
if isinstance(model, Pipeline):
final_estimator = model.steps[-1][1]
preprocessor = Pipeline(model.steps[:-1]) if len(model.steps) > 1 else None
return final_estimator, preprocessor
return model, None
58 changes: 58 additions & 0 deletions tests/attacks/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Tests for sacroml.attacks.utils helper functions."""

from __future__ import annotations

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

from sacroml.attacks.utils import unwrap_model


class TestUnwrapModel:
"""Tests for ``unwrap_model``."""

def test_non_pipeline_returns_model_and_none(self):
"""A plain estimator is returned unchanged with no preprocessor."""
model = SVC(gamma=0.1)
estimator, preprocessor = unwrap_model(model)
assert estimator is model
assert preprocessor is None

def test_single_step_pipeline_returns_final_step_only(self):
"""A one-step Pipeline yields its final estimator and no preprocessor."""
final = LogisticRegression()
pipe = Pipeline([("clf", final)])
estimator, preprocessor = unwrap_model(pipe)
assert estimator is final
assert preprocessor is None

def test_multi_step_pipeline_splits_preprocessor_from_estimator(self):
"""A multi-step Pipeline yields the final step and a Pipeline of the rest."""
scaler = StandardScaler()
final = LogisticRegression()
pipe = Pipeline([("scaler", scaler), ("clf", final)])

estimator, preprocessor = unwrap_model(pipe)

assert estimator is final
assert isinstance(preprocessor, Pipeline)
assert [name for name, _ in preprocessor.steps] == ["scaler"]
assert preprocessor.steps[0][1] is scaler

def test_multi_step_preprocessor_transforms_input(self):
"""The returned preprocessor can transform inputs end-to-end."""
rng = np.random.default_rng(0)
X = rng.normal(size=(20, 3))
y = rng.integers(0, 2, size=20)

pipe = Pipeline([("scaler", StandardScaler()), ("clf", LogisticRegression())])
pipe.fit(X, y)

_, preprocessor = unwrap_model(pipe)
transformed = preprocessor.transform(X)

np.testing.assert_allclose(transformed.mean(axis=0), 0, atol=1e-8)
np.testing.assert_allclose(transformed.std(axis=0), 1, atol=1e-1)