From a81df5a5443f593517fa10c2d4252534b1441a5b Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 12 Feb 2026 18:14:25 +0100 Subject: [PATCH 01/22] enh - add Y transformation keyword argument in NormativeModel to enforce positivity --- pcntoolkit/normative_model.py | 78 +++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/pcntoolkit/normative_model.py b/pcntoolkit/normative_model.py index 3928a11a..5bee8bfb 100644 --- a/pcntoolkit/normative_model.py +++ b/pcntoolkit/normative_model.py @@ -54,6 +54,13 @@ class NormativeModel: Input (X/covariates) scaler to use. outscaler: str Output (Y/response_vars) scaler to use. + y_transform : str or None + Optional transform applied to Y before fitting and inverted + after prediction. Currently supported: ``"log"`` applies + log(Y+1) before fitting and then inverts centiles of the resulting + model back in the original space. + This is useful for phenotypes that cannot be negative. + Default is ``None`` (no transform). name: str Name of the model """ @@ -68,6 +75,7 @@ def __init__( save_dir: Optional[str] = None, inscaler: str = "standardize", outscaler: str = "standardize", + y_transform: Optional[str] = None, name: Optional[str] = None, ): self.savemodel: bool = savemodel @@ -77,6 +85,7 @@ def __init__( self._save_dir = save_dir if save_dir is not None else get_default_save_dir() self.inscaler: str = inscaler self.outscaler: str = outscaler + self.y_transform: Optional[str] = y_transform self.name: Optional[str] = name self.response_vars: list[str] = None # type: ignore self.template_regression_model: RegressionModel = template_regression_model @@ -180,6 +189,7 @@ def transfer(self, transfer_data: NormData, save_dir: str | None = None, **kwarg saveplots=True, inscaler=self.inscaler, outscaler=self.outscaler, + y_transform=self.y_transform, save_dir=self.save_dir, ) if save_dir is not None: @@ -239,6 +249,7 @@ def extend(self, data: NormData, save_dir: str | None = None, n_synth_samples: i saveplots=True, inscaler=self.inscaler, outscaler=self.outscaler, + y_transform=self.y_transform, save_dir=save_dir, ) @@ -459,6 +470,7 @@ def load(cls, path: str, into: NormativeModel | None = None) -> NormativeModel: outscaler = metadata["outscaler"] saveplots = metadata["saveplots"] evaluate_model = metadata["evaluate_model"] + y_transform = metadata.get("y_transform", None) name = metadata["name"] response_vars = [] @@ -491,6 +503,7 @@ def load(cls, path: str, into: NormativeModel | None = None) -> NormativeModel: save_dir=save_dir, inscaler=inscaler, outscaler=outscaler, + y_transform=y_transform, name=name, ) else: @@ -548,9 +561,13 @@ def preprocess(self, data: NormData) -> None: """ Applies preprocessing transformations to the input data. + First applies an optional response transform (e.g. log1p), then scales. + Args: data (NormData): Data to preprocess. """ + # Enforce positivity if necessary + self._apply_y_transform(data) self.scale_forward(data) def scale_forward(self, data: NormData, overwrite: bool = False) -> None: @@ -585,10 +602,15 @@ def scale_forward(self, data: NormData, overwrite: bool = False) -> None: def postprocess(self, data: NormData) -> None: """Apply postprocessing to the data. + First unscales, then applies the inverse response transform (e.g. expm1). + Args: data (NormData): Data to postprocess. """ self.scale_backward(data) + # Invert Y to its original space if positivity was enforced during + # preprocessing + self._invert_y_transform(data) def scale_backward(self, data: NormData) -> None: """ @@ -605,6 +627,59 @@ def scale_backward(self, data: NormData) -> None: """ data.scale_backward(self.inscalers, self.outscalers) + def _apply_y_transform(self, data: NormData) -> None: + """ + Apply the forward response transform (e.g. log1p) to Y-like variables + in the data. + + Parameters + ---------- + data : NormData + Data object containing response variable arrays (Y, Yhat, + centiles, thrive_Y) to which the transform should be applied. + + """ + if self.y_transform is None: + return + + + # TODO: Check if we need to track if transform has already been + # applied to avoid double-inverting. Normally I dont expect any issues + # as every process() is followed by a postprocess(). The only issues can + # be if users call postprocess() multiple times manually or with + # compute_thrivelines() that has a preprocess() call without a postprocess(). + + if self.y_transform == "log1p": + # Apply transform to all Y-like input variables + for var in "Y": + if var in data.data_vars: + data[var] = np.log1p(data[var]) + + def _invert_y_transform(self, data: NormData) -> None: + """ + Apply the inverse response transform (e.g. expm1) to Y-like variables + in the data. + + Parameters + ---------- + data : NormData + Data object containing response variable arrays (Y, Yhat, + centiles, thrive_Y) to which the inverse transform should be applied. + """ + if self.y_transform is None: + return + + # TODO: Check if we need to track if inverse transform has already been + # applied to avoid double-inverting. Normally I dont expect any issues + # as every process() is followed by a postprocess(). The only issues can + # be if users call postprocess() multiple times manually or with + # compute_thrivelines() that has a preprocess() call without a postprocess(). + + if self.y_transform == "log1p": + for var in ("Y", "centiles", "Yhat", "Y_harmonized", "thrive_Y"): + if var in data.data_vars: + data[var] = np.expm1(data[var]) + def evaluate(self, data: NormData) -> None: """ Evaluates the model performance on the data. @@ -927,6 +1002,7 @@ def to_dict(self): "is_fitted": self.is_fitted, "inscaler": self.inscaler, "outscaler": self.outscaler, + "y_transform": self.y_transform, "ptk_version": importlib.metadata.version("pcntoolkit"), } @@ -986,6 +1062,7 @@ def from_args(cls, **kwargs) -> NormativeModel: inscaler = kwargs.get("inscaler", "none") outscaler = kwargs.get("outscaler", "none") name = kwargs.get("name", None) + y_transform = kwargs.get("y_transform", None) assert "alg" in kwargs, "Algorithm must be specified" if kwargs["alg"] == "blr": template_regression_model = BLR.from_args("template", kwargs) @@ -1004,6 +1081,7 @@ def from_args(cls, **kwargs) -> NormativeModel: save_dir=save_dir, inscaler=inscaler, outscaler=outscaler, + y_transform=y_transform, name=name, ) From 1d2f5360629921b47c6fdb6bf5d0cfeb6e18e235 Mon Sep 17 00:00:00 2001 From: contsili Date: Mon, 2 Mar 2026 18:38:06 +0100 Subject: [PATCH 02/22] enh - rename NormativeModel tests That way it is clear what is tested: main: tests the main normative model functions helper: tests the helper functions --- ..._model.py => test_normative_model_main.py} | 0 test/test_norm/test_normative_model_helper.py | 297 ++++++++++++++++++ 2 files changed, 297 insertions(+) rename test/test_core/{test_normative_model.py => test_normative_model_main.py} (100%) create mode 100644 test/test_norm/test_normative_model_helper.py diff --git a/test/test_core/test_normative_model.py b/test/test_core/test_normative_model_main.py similarity index 100% rename from test/test_core/test_normative_model.py rename to test/test_core/test_normative_model_main.py diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py new file mode 100644 index 00000000..3caf02a6 --- /dev/null +++ b/test/test_norm/test_normative_model_helper.py @@ -0,0 +1,297 @@ +import copy + +import numpy as np +import pytest + +from pcntoolkit.dataio.norm_data import NormData +from pcntoolkit.math_functions.likelihood import NormalLikelihood +from pcntoolkit.math_functions.prior import * +from pcntoolkit.normative_model import NormativeModel +from pcntoolkit.regression_model.blr import * +from pcntoolkit.regression_model.hbr import * +from test import test_norm +from test.fixtures.data_fixtures import * +from test.fixtures.norm_data_fixtures import * +from test.fixtures.path_fixtures import * +from test.fixtures.test_model_fixtures import * + +""" +This file contains tests for the NormBase class in the PCNtoolkit. + +The tests cover the following aspects: +1. Fitting the model with valid data +2. Handling invalid data for fitting +3. Scaling methods for data +4. Polynomial basis expansion +5. B-spline basis expansion +""" + + +def test_fit(new_norm_test_model: NormativeModel, norm_data_from_arrays: NormData): + if os.path.exists(new_norm_test_model.save_dir): + shutil.rmtree(new_norm_test_model.save_dir) + os.makedirs(new_norm_test_model.save_dir, exist_ok=True) + new_norm_test_model.fit(norm_data_from_arrays) + assert new_norm_test_model.is_fitted + + +def test_fit_predict(new_norm_test_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData): + if os.path.exists(new_norm_test_model.save_dir): + shutil.rmtree(new_norm_test_model.save_dir) + os.makedirs(new_norm_test_model.save_dir, exist_ok=True) + new_norm_test_model.fit_predict(norm_data_from_arrays, test_norm_data_from_arrays) + assert new_norm_test_model.is_fitted + + +def test_predict(fitted_norm_test_model: NormativeModel, test_norm_data_from_arrays: NormData): + os.makedirs(fitted_norm_test_model.save_dir, exist_ok=True) + fitted_norm_test_model.predict(test_norm_data_from_arrays) + assert fitted_norm_test_model.is_fitted + + +def test_harmonize(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData): + fitted_norm_test_model.harmonize(norm_data_from_arrays) + assert fitted_norm_test_model.is_fitted + + +def test_compute_zscores(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData): + fitted_norm_test_model.compute_zscores(norm_data_from_arrays) + assert fitted_norm_test_model.is_fitted + + +def test_compute_centiles(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData): + fitted_norm_test_model.compute_centiles(norm_data_from_arrays) + assert fitted_norm_test_model.is_fitted + + +# Test the fit method of the NormBase class with invalid data +def test_norm_base_fit_invalid_data(new_norm_test_model: NormativeModel): + with pytest.raises(AttributeError): + new_norm_test_model.fit(None) + + with pytest.raises(AttributeError): + new_norm_test_model.fit("invalid_data") + + +# Scaling tests +@pytest.mark.parametrize("scaler", ["standardize", "minmax"]) +def test_scaling( + norm_data_from_arrays, + new_norm_test_model: NormativeModel, + n_covariates, + n_response_vars, + scaler, +): + object.__setattr__(new_norm_test_model, "inscaler", scaler) + object.__setattr__(new_norm_test_model, "outscaler", scaler) + copydata = copy.deepcopy(norm_data_from_arrays) + copydata.attrs["is_scaled"] = False + for s in new_norm_test_model.inscalers: + s.is_fitted = False + for s in new_norm_test_model.outscalers: + s.is_fitted = False + X_bak = norm_data_from_arrays.X.data.copy() + y_bak = norm_data_from_arrays.Y.data.copy() + new_norm_test_model.scale_forward(copydata) + + if scaler == "standardize": + assert_standardized(copydata, n_covariates, n_response_vars) + elif scaler == "minmax": + assert_minmax_scaled(copydata) + + new_norm_test_model.scale_backward(copydata) + assert np.allclose(copydata.X.data, X_bak) + assert np.allclose(copydata.Y.data, y_bak) + + +def assert_standardized(data, n_covariates, n_response_vars): + assert data.X.data.mean(axis=0) == pytest.approx(n_covariates * [0]) + assert data.X.data.std(axis=0) == pytest.approx(n_covariates * [1]) + assert data.Y.data.mean(axis=0) == pytest.approx(n_response_vars * [0]) + assert data.Y.data.std(axis=0) == pytest.approx(n_response_vars * [1]) + + +def assert_minmax_scaled(data): + assert np.allclose(data.X.data.min(axis=0), 0) + assert np.allclose(data.X.data.max(axis=0), 1) + assert np.allclose(data.Y.data.min(axis=0), 0) + assert np.allclose(data.Y.data.max(axis=0), 1) + + +def test_test_model_to_and_from_dict_and_args(test_model_args: dict, norm_data_from_arrays: NormData, save_dir_test_model): + model = NormativeModel.from_args(**test_model_args) + model_dict = model.to_dict() + assert model_dict["template_regression_model"]["success_ratio"] == test_model_args["success_ratio"] + model.fit(norm_data_from_arrays) + assert model.is_fitted + model_dict = model.to_dict() + assert model_dict["template_regression_model"]["success_ratio"] == test_model_args["success_ratio"] + model.predict(norm_data_from_arrays) + assert hasattr(norm_data_from_arrays, "Z") + Z_bak = copy.deepcopy(norm_data_from_arrays["Z"]) + del norm_data_from_arrays["Z"] + assert not hasattr(norm_data_from_arrays, "Z") + model.save(save_dir_test_model) + loaded_model = NormativeModel.load(save_dir_test_model) + assert loaded_model.is_fitted + loaded_model.predict(norm_data_from_arrays) + assert np.allclose(norm_data_from_arrays["Z"], Z_bak) + + +@pytest.fixture +def hbr_model_args(save_dir_hbr): + return { + "savemodel": False, + "saveresults": False, + "evaluate_model": False, + "saveplots": False, + "save_dir": save_dir_hbr, + "inscaler": "standardize", + "outscaler": "standardize", + "name": "hbr_test_model", + "alg": "hbr", + "likelihood": "Normal", + "linear_mu": True, + "random_mu": False, + "random_slope_mu": False, + "random_intercept_mu": True, + "linear_sigma": False, + "random_sigma": True, + "dist_name_sigma_sigma": "LogNormal", + "dist_params_sigma_sigma": (2.5, 1.3), + "draws": 10, + "tune": 10, + "cores": 1, + } + + +def test_hbr_model_to_and_from_dict_and_args(hbr_model_args: dict, norm_data_from_arrays: NormData, save_dir_hbr): + model = NormativeModel.from_args(**hbr_model_args) + model_dict = model.to_dict() + for k in ["savemodel", "saveresults", "evaluate_model", "saveplots", "inscaler", "outscaler", "name"]: + assert model_dict[k] == hbr_model_args[k] + tmplt = model.template_regression_model + assert isinstance(tmplt, HBR) + assert isinstance(tmplt.likelihood, NormalLikelihood) + assert isinstance(tmplt.likelihood.mu, LinearPrior) + assert isinstance(tmplt.likelihood.mu.intercept, RandomPrior) + assert isinstance(tmplt.likelihood.sigma, RandomPrior) + assert isinstance(tmplt.likelihood.sigma.sigma, Prior) + assert tmplt.likelihood.sigma.sigma.dist_name == "LogNormal" + assert tmplt.likelihood.sigma.sigma.dist_params == (2.5, 1.3) + model.fit(norm_data_from_arrays) + assert model.is_fitted + + model1 = model[model.response_vars[0]] + assert isinstance(model1, HBR) + assert isinstance(model1.likelihood, NormalLikelihood) + assert isinstance(model1.likelihood.mu, LinearPrior) + assert isinstance(model1.likelihood.mu.intercept, RandomPrior) + assert isinstance(model1.likelihood.sigma, RandomPrior) + assert isinstance(model1.likelihood.sigma.sigma, Prior) + assert model1.likelihood.sigma.sigma.dist_name == "LogNormal" + assert model1.likelihood.sigma.sigma.dist_params == (2.5, 1.3) + + model.predict(norm_data_from_arrays) + assert hasattr(norm_data_from_arrays, "Z") + Z_bak = copy.deepcopy(norm_data_from_arrays["Z"]) + del norm_data_from_arrays["Z"] + assert not hasattr(norm_data_from_arrays, "Z") + model.save(save_dir_hbr) + del model + loaded_model = NormativeModel.load(save_dir_hbr) + loaded_model.predict(norm_data_from_arrays) + assert np.allclose(norm_data_from_arrays["Z"], Z_bak) + assert loaded_model.is_fitted + model1 = loaded_model[loaded_model.response_vars[0]] + assert isinstance(model1, HBR) + assert isinstance(model1.likelihood, NormalLikelihood) + assert isinstance(model1.likelihood.mu, LinearPrior) + assert isinstance(model1.likelihood.mu.intercept, RandomPrior) + assert isinstance(model1.likelihood.sigma, RandomPrior) + assert isinstance(model1.likelihood.sigma.sigma, Prior) + assert model1.likelihood.sigma.sigma.dist_name == "LogNormal" + assert model1.likelihood.sigma.sigma.dist_params == [2.5, 1.3] + + +@pytest.fixture +def blr_model_args(save_dir_blr): + return { + "savemodel": False, + "saveresults": False, + "evaluate_model": False, + "saveplots": False, + "save_dir": save_dir_blr, + "inscaler": "standardize", + "outscaler": "standardize", + "name": "blr_test_model", + "alg": "blr", + "n_iter": 10, + "tol": 1e-3, + "ard": False, + "optimizer": "l-bfgs-b", + "l_bfgs_b_l": 0.7, + "l_bfgs_b_epsilon": 0.1, + "l_bfgs_b_norm": "l2", + "fixed_effect": True, + "heteroskedastic": False, + "fixed_effect_var": False, + } + + +def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_from_arrays: NormData, save_dir_blr): + model = NormativeModel.from_args(**blr_model_args) + model_dict = model.to_dict() + for k in ["savemodel", "saveresults", "evaluate_model", "saveplots", "inscaler", "outscaler", "name"]: + assert model_dict[k] == blr_model_args[k] + tmplt = model.template_regression_model + assert isinstance(tmplt, BLR) + assert tmplt.n_iter == 10 + assert tmplt.tol == 1e-3 + assert not tmplt.ard + assert tmplt.optimizer == "l-bfgs-b" + assert tmplt.l_bfgs_b_l == 0.7 + assert tmplt.l_bfgs_b_epsilon == 0.1 + assert tmplt.l_bfgs_b_norm == "l2" + assert tmplt.fixed_effect + assert not tmplt.heteroskedastic + assert not tmplt.fixed_effect_var + print("Fitting") + model.fit(norm_data_from_arrays) + print("fitted") + assert model.is_fitted + + model1 = model[model.response_vars[0]] + assert isinstance(model1, BLR) + assert model1.n_iter == 10 + assert model1.tol == 1e-3 + assert not model1.ard + assert model1.optimizer == "l-bfgs-b" + assert model1.l_bfgs_b_l == 0.7 + assert model1.l_bfgs_b_epsilon == 0.1 + assert model1.l_bfgs_b_norm == "l2" + assert model1.fixed_effect + assert not model1.fixed_effect_var + model.predict(norm_data_from_arrays) + assert hasattr(norm_data_from_arrays, "Z") + Z_bak = copy.deepcopy(norm_data_from_arrays["Z"]) + del norm_data_from_arrays["Z"] + assert not hasattr(norm_data_from_arrays, "Z") + model.save(save_dir_blr) + del model + loaded_model = NormativeModel.load(save_dir_blr) + model1 = loaded_model[loaded_model.response_vars[0]] + loaded_model.predict(norm_data_from_arrays) + assert np.allclose(norm_data_from_arrays["Z"], Z_bak) + + assert loaded_model.is_fitted + assert isinstance(model1, BLR) + assert model1.n_iter == 10 + assert model1.tol == 1e-3 + assert not model1.ard + assert model1.optimizer == "l-bfgs-b" + assert model1.l_bfgs_b_l == 0.7 + assert model1.l_bfgs_b_epsilon == 0.1 + assert model1.l_bfgs_b_norm == "l2" + assert model1.fixed_effect + assert not model1.fixed_effect_var From 06f21a37fc97f5049193b6b7a3c06b3eacd55bbd Mon Sep 17 00:00:00 2001 From: contsili Date: Mon, 2 Mar 2026 18:39:00 +0100 Subject: [PATCH 03/22] enh - generate data and model to use for the log transform test --- test/fixtures/test_model_fixtures.py | 88 ++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/test/fixtures/test_model_fixtures.py b/test/fixtures/test_model_fixtures.py index 4a7f26f3..8e7aa8b1 100644 --- a/test/fixtures/test_model_fixtures.py +++ b/test/fixtures/test_model_fixtures.py @@ -1,3 +1,7 @@ +import os +import shutil + +import numpy as np import pytest from pcntoolkit.dataio.norm_data import NormData @@ -43,3 +47,87 @@ def fitted_norm_test_model(new_norm_test_model: NormativeModel, norm_data_from_a os.makedirs(new_norm_test_model.save_dir, exist_ok=True) new_norm_test_model.fit(norm_data_from_arrays) return new_norm_test_model + +@pytest.fixture +def positive_norm_data( + train_arrays: tuple, +) -> NormData: + """Create NormData with strictly positive Y values. + + Takes the standard training arrays and shifts Y so + that every element is strictly greater than zero. + + Returns + ------- + NormData + Dataset whose Y column values are all > 0. + """ + # Unpack the standard training arrays + X, y, batch_effects = train_arrays + # Shift Y so every value is strictly positive + y_positive = np.abs(y) + 1.0 + # Build a NormData from the positive arrays + return NormData.from_ndarrays( + "positive_data", X, y_positive, batch_effects + ) + + +@pytest.fixture +def positive_test_norm_data( + test_arrays: tuple, +) -> NormData: + """Create test NormData with strictly positive Y values. + + Takes the standard test arrays and shifts Y so that + every element is strictly greater than zero. + + Returns + ------- + NormData + Dataset whose Y column values are all > 0. + """ + # Unpack the standard test arrays + X, y, batch_effects = test_arrays + # Shift Y so every value is strictly positive + y_positive = np.abs(y) + 1.0 + # Build a NormData from the positive arrays + return NormData.from_ndarrays( + "positive_test_data", X, y_positive, batch_effects + ) + + +@pytest.fixture +def norm_test_model_with_log_transform( + save_dir_test_model: str, +) -> NormativeModel: + """Create a NormativeModel using TestModel with log1p. + + The model has ``y_transform='log1p'`` so that Y is + log-transformed before fitting and back-transformed + after prediction, ensuring all outputs remain positive. + + Returns + ------- + NormativeModel + Un-fitted normative model with log1p transform. + """ + # Build a fresh save directory + log_dir = os.path.join(save_dir_test_model, "log1p") + if os.path.exists(log_dir): + shutil.rmtree(log_dir) + os.makedirs(log_dir, exist_ok=True) + # Create the underlying test regression model + test_model = TestModel("test_model_log1p") + # Return a NormativeModel with the log1p transform + return NormativeModel( + template_regression_model=test_model, + savemodel=False, + saveresults=False, + evaluate_model=False, + saveplots=False, + save_dir=log_dir, + inscaler="standardize", + outscaler="standardize", + y_transform="log1p", + name="test_model_log1p", + ) From f79717e8270ddc04c6e6ea14d628f68336da656c Mon Sep 17 00:00:00 2001 From: contsili Date: Mon, 2 Mar 2026 18:40:00 +0100 Subject: [PATCH 04/22] enh - add log transform tests for centiles and predictions to ensure non-negativity --- test/test_norm/test_normative_model_helper.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index 3caf02a6..38a39468 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -295,3 +295,55 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro assert model1.l_bfgs_b_norm == "l2" assert model1.fixed_effect assert not model1.fixed_effect_var + + +def test_001_log_transform_centiles_should_beNonNegative( + norm_test_model_with_log_transform: NormativeModel, + positive_norm_data: NormData, + positive_test_norm_data: NormData, +) -> None: + """Centiles must be >= 0 after log1p round-trip. + + Extreme lower-tail centiles may be clipped to exactly 0 + by the positivity enforcement, so we check >= 0. + """ + # Arrange – fit the model, then predict on test data + norm_test_model_with_log_transform.fit_predict( + positive_norm_data, positive_test_norm_data + ) + # Assert – every centile in the back-transformed space >= 0 + assert bool( + np.all(positive_test_norm_data["centiles"].values >= 0) + ) + + +def test_002_log_transform_yhat_should_bePositive( + norm_test_model_with_log_transform: NormativeModel, + positive_norm_data: NormData, + positive_test_norm_data: NormData, +) -> None: + """Yhat must be > 0 after log1p round-trip.""" + # Arrange – fit the model, then predict on test data + norm_test_model_with_log_transform.fit_predict( + positive_norm_data, positive_test_norm_data + ) + # Assert – every Yhat in the back-transformed space > 0 + assert bool( + np.all(positive_test_norm_data["Yhat"].values > 0) + ) + + +def test_003_log_transform_Y_should_bePositive( + norm_test_model_with_log_transform: NormativeModel, + positive_norm_data: NormData, + positive_test_norm_data: NormData, +) -> None: + """Y must remain > 0 after the log1p round-trip.""" + # Arrange – fit the model, then predict on test data + norm_test_model_with_log_transform.fit_predict( + positive_norm_data, positive_test_norm_data + ) + # Assert – every Y value after the round-trip is > 0 + assert bool( + np.all(positive_test_norm_data["Y"].values > 0) + ) From 2774ddfa65f06ea3f9e1d48976311968af7d9e7d Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Tue, 3 Mar 2026 17:53:00 +0100 Subject: [PATCH 05/22] enh - refactor log transformed centiles and yhat tests - Add BLR model fixtures (instead of test model fixtures) - adjust assertions: centiles and yhat should be > -1 --- test/test_norm/test_normative_model_helper.py | 65 ++++++++----------- 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index 38a39468..36b221f9 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -10,6 +10,7 @@ from pcntoolkit.regression_model.blr import * from pcntoolkit.regression_model.hbr import * from test import test_norm +from test.fixtures.blr_model_fixtures import * from test.fixtures.data_fixtures import * from test.fixtures.norm_data_fixtures import * from test.fixtures.path_fixtures import * @@ -296,54 +297,42 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro assert model1.fixed_effect assert not model1.fixed_effect_var - -def test_001_log_transform_centiles_should_beNonNegative( - norm_test_model_with_log_transform: NormativeModel, - positive_norm_data: NormData, - positive_test_norm_data: NormData, +def test_log_transformed_centiles( + norm_blr_model_with_log_transform: NormativeModel, + norm_data_from_arrays: NormData, + test_norm_data_from_arrays: NormData, ) -> None: - """Centiles must be >= 0 after log1p round-trip. - - Extreme lower-tail centiles may be clipped to exactly 0 - by the positivity enforcement, so we check >= 0. - """ - # Arrange – fit the model, then predict on test data - norm_test_model_with_log_transform.fit_predict( - positive_norm_data, positive_test_norm_data + norm_blr_model_with_log_transform.fit_predict( + norm_data_from_arrays, test_norm_data_from_arrays ) - # Assert – every centile in the back-transformed space >= 0 + + # Check that Y are non-negative in the original data assert bool( - np.all(positive_test_norm_data["centiles"].values >= 0) + np.all(test_norm_data_from_arrays["Y"].values >= 0) ) - - -def test_002_log_transform_yhat_should_bePositive( - norm_test_model_with_log_transform: NormativeModel, - positive_norm_data: NormData, - positive_test_norm_data: NormData, -) -> None: - """Yhat must be > 0 after log1p round-trip.""" - # Arrange – fit the model, then predict on test data - norm_test_model_with_log_transform.fit_predict( - positive_norm_data, positive_test_norm_data + # Both training and test centiles should be bigger or equal to -1 + assert bool( + np.all(norm_data_from_arrays["centiles"].values > -1) ) - # Assert – every Yhat in the back-transformed space > 0 assert bool( - np.all(positive_test_norm_data["Yhat"].values > 0) + np.all(test_norm_data_from_arrays["centiles"].values > -1) ) -def test_003_log_transform_Y_should_bePositive( - norm_test_model_with_log_transform: NormativeModel, - positive_norm_data: NormData, - positive_test_norm_data: NormData, + +def test_log_transformed_yhat( + norm_blr_model_with_log_transform: NormativeModel, + norm_data_from_arrays: NormData, + test_norm_data_from_arrays: NormData, ) -> None: - """Y must remain > 0 after the log1p round-trip.""" - # Arrange – fit the model, then predict on test data - norm_test_model_with_log_transform.fit_predict( - positive_norm_data, positive_test_norm_data + norm_blr_model_with_log_transform.fit_predict( + norm_data_from_arrays, test_norm_data_from_arrays + ) + # We dont expect any negative yhat values in the train dataset + assert bool( + np.all(norm_data_from_arrays["Yhat"].values >= 0) ) - # Assert – every Y value after the round-trip is > 0 + # test dataset: -1 possibility due to extrapolation assert bool( - np.all(positive_test_norm_data["Y"].values > 0) + np.all(test_norm_data_from_arrays["Yhat"].values > -1) ) From c791079b3bfe1e0582290828f4dd0dc8a543ab77 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Tue, 3 Mar 2026 17:53:58 +0100 Subject: [PATCH 06/22] fix - discard test model fixtures --- test/fixtures/test_model_fixtures.py | 84 ---------------------------- 1 file changed, 84 deletions(-) diff --git a/test/fixtures/test_model_fixtures.py b/test/fixtures/test_model_fixtures.py index 8e7aa8b1..0fa761db 100644 --- a/test/fixtures/test_model_fixtures.py +++ b/test/fixtures/test_model_fixtures.py @@ -47,87 +47,3 @@ def fitted_norm_test_model(new_norm_test_model: NormativeModel, norm_data_from_a os.makedirs(new_norm_test_model.save_dir, exist_ok=True) new_norm_test_model.fit(norm_data_from_arrays) return new_norm_test_model - -@pytest.fixture -def positive_norm_data( - train_arrays: tuple, -) -> NormData: - """Create NormData with strictly positive Y values. - - Takes the standard training arrays and shifts Y so - that every element is strictly greater than zero. - - Returns - ------- - NormData - Dataset whose Y column values are all > 0. - """ - # Unpack the standard training arrays - X, y, batch_effects = train_arrays - # Shift Y so every value is strictly positive - y_positive = np.abs(y) + 1.0 - # Build a NormData from the positive arrays - return NormData.from_ndarrays( - "positive_data", X, y_positive, batch_effects - ) - - -@pytest.fixture -def positive_test_norm_data( - test_arrays: tuple, -) -> NormData: - """Create test NormData with strictly positive Y values. - - Takes the standard test arrays and shifts Y so that - every element is strictly greater than zero. - - Returns - ------- - NormData - Dataset whose Y column values are all > 0. - """ - # Unpack the standard test arrays - X, y, batch_effects = test_arrays - # Shift Y so every value is strictly positive - y_positive = np.abs(y) + 1.0 - # Build a NormData from the positive arrays - return NormData.from_ndarrays( - "positive_test_data", X, y_positive, batch_effects - ) - - -@pytest.fixture -def norm_test_model_with_log_transform( - save_dir_test_model: str, -) -> NormativeModel: - """Create a NormativeModel using TestModel with log1p. - - The model has ``y_transform='log1p'`` so that Y is - log-transformed before fitting and back-transformed - after prediction, ensuring all outputs remain positive. - - Returns - ------- - NormativeModel - Un-fitted normative model with log1p transform. - """ - # Build a fresh save directory - log_dir = os.path.join(save_dir_test_model, "log1p") - if os.path.exists(log_dir): - shutil.rmtree(log_dir) - os.makedirs(log_dir, exist_ok=True) - # Create the underlying test regression model - test_model = TestModel("test_model_log1p") - # Return a NormativeModel with the log1p transform - return NormativeModel( - template_regression_model=test_model, - savemodel=False, - saveresults=False, - evaluate_model=False, - saveplots=False, - save_dir=log_dir, - inscaler="standardize", - outscaler="standardize", - y_transform="log1p", - name="test_model_log1p", - ) From fdc7371d56ddf15683f4f84a6fb0a13f963460fb Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Tue, 3 Mar 2026 17:55:19 +0100 Subject: [PATCH 07/22] enh - add blr model fixtures --- test/fixtures/blr_model_fixtures.py | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py index 87129235..88958249 100644 --- a/test/fixtures/blr_model_fixtures.py +++ b/test/fixtures/blr_model_fixtures.py @@ -1,3 +1,5 @@ +from math import log + import pytest from pcntoolkit.dataio.norm_data import NormData @@ -6,6 +8,7 @@ from pcntoolkit.regression_model.blr import BLR from test.fixtures.norm_data_fixtures import * from test.fixtures.path_fixtures import * +import os @pytest.fixture @@ -69,3 +72,37 @@ def fitted_norm_blr_model(new_norm_blr_model: NormativeModel, norm_data_from_arr os.makedirs(new_norm_blr_model.save_dir, exist_ok=True) new_norm_blr_model.fit(norm_data_from_arrays) return new_norm_blr_model + + +@pytest.fixture +def norm_blr_model_with_log_transform( + save_dir_test_model: str +) -> NormativeModel: + """Create a NormativeModel using BLR with log1p. + + Returns + ------- + NormativeModel + Un-fitted normative model with log1p transform. + """ + # Build a fresh save directory + log_dir = os.path.join(save_dir_test_model, "log1p") + if os.path.exists(log_dir): + shutil.rmtree(log_dir) + os.makedirs(log_dir, exist_ok=True) + # Create test regression model + test_model = BLR("name=test_model_log1p") + # Return a NormativeModel with the log1p transform + return NormativeModel( + template_regression_model=test_model, + savemodel=False, + saveresults=False, + evaluate_model=False, + saveplots=True, + save_dir=log_dir, + inscaler="standardize", + outscaler="standardize", + name="test_model_log1p", + y_transform=None, + ) + From fce4ee36b08c22ea775053d7acd294ef965cdfa8 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Tue, 3 Mar 2026 18:00:26 +0100 Subject: [PATCH 08/22] fix - remove redundancies test_normative_model is the same as test_normative_model_helper --- test/test_norm/test_normative_model.py | 296 ------------------ test/test_norm/test_normative_model_helper.py | 3 +- 2 files changed, 1 insertion(+), 298 deletions(-) delete mode 100644 test/test_norm/test_normative_model.py diff --git a/test/test_norm/test_normative_model.py b/test/test_norm/test_normative_model.py deleted file mode 100644 index 2e50d191..00000000 --- a/test/test_norm/test_normative_model.py +++ /dev/null @@ -1,296 +0,0 @@ -import copy - -import numpy as np -import pytest - -from pcntoolkit.dataio.norm_data import NormData -from pcntoolkit.math_functions.likelihood import NormalLikelihood -from pcntoolkit.math_functions.prior import * -from pcntoolkit.normative_model import NormativeModel -from pcntoolkit.regression_model.blr import * -from pcntoolkit.regression_model.hbr import * -from test.fixtures.data_fixtures import * -from test.fixtures.norm_data_fixtures import * -from test.fixtures.path_fixtures import * -from test.fixtures.test_model_fixtures import * - -""" -This file contains tests for the NormBase class in the PCNtoolkit. - -The tests cover the following aspects: -1. Fitting the model with valid data -2. Handling invalid data for fitting -3. Scaling methods for data -4. Polynomial basis expansion -5. B-spline basis expansion -""" - - -def test_fit(new_norm_test_model: NormativeModel, norm_data_from_arrays: NormData): - if os.path.exists(new_norm_test_model.save_dir): - shutil.rmtree(new_norm_test_model.save_dir) - os.makedirs(new_norm_test_model.save_dir, exist_ok=True) - new_norm_test_model.fit(norm_data_from_arrays) - assert new_norm_test_model.is_fitted - - -def test_fit_predict(new_norm_test_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData): - if os.path.exists(new_norm_test_model.save_dir): - shutil.rmtree(new_norm_test_model.save_dir) - os.makedirs(new_norm_test_model.save_dir, exist_ok=True) - new_norm_test_model.fit_predict(norm_data_from_arrays, test_norm_data_from_arrays) - assert new_norm_test_model.is_fitted - - -def test_predict(fitted_norm_test_model: NormativeModel, test_norm_data_from_arrays: NormData): - os.makedirs(fitted_norm_test_model.save_dir, exist_ok=True) - fitted_norm_test_model.predict(test_norm_data_from_arrays) - assert fitted_norm_test_model.is_fitted - - -def test_harmonize(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData): - fitted_norm_test_model.harmonize(norm_data_from_arrays) - assert fitted_norm_test_model.is_fitted - - -def test_compute_zscores(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData): - fitted_norm_test_model.compute_zscores(norm_data_from_arrays) - assert fitted_norm_test_model.is_fitted - - -def test_compute_centiles(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData): - fitted_norm_test_model.compute_centiles(norm_data_from_arrays) - assert fitted_norm_test_model.is_fitted - - -# Test the fit method of the NormBase class with invalid data -def test_norm_base_fit_invalid_data(new_norm_test_model: NormativeModel): - with pytest.raises(AttributeError): - new_norm_test_model.fit(None) - - with pytest.raises(AttributeError): - new_norm_test_model.fit("invalid_data") - - -# Scaling tests -@pytest.mark.parametrize("scaler", ["standardize", "minmax"]) -def test_scaling( - norm_data_from_arrays, - new_norm_test_model: NormativeModel, - n_covariates, - n_response_vars, - scaler, -): - object.__setattr__(new_norm_test_model, "inscaler", scaler) - object.__setattr__(new_norm_test_model, "outscaler", scaler) - copydata = copy.deepcopy(norm_data_from_arrays) - copydata.attrs["is_scaled"] = False - for s in new_norm_test_model.inscalers: - s.is_fitted = False - for s in new_norm_test_model.outscalers: - s.is_fitted = False - X_bak = norm_data_from_arrays.X.data.copy() - y_bak = norm_data_from_arrays.Y.data.copy() - new_norm_test_model.scale_forward(copydata) - - if scaler == "standardize": - assert_standardized(copydata, n_covariates, n_response_vars) - elif scaler == "minmax": - assert_minmax_scaled(copydata) - - new_norm_test_model.scale_backward(copydata) - assert np.allclose(copydata.X.data, X_bak) - assert np.allclose(copydata.Y.data, y_bak) - - -def assert_standardized(data, n_covariates, n_response_vars): - assert data.X.data.mean(axis=0) == pytest.approx(n_covariates * [0]) - assert data.X.data.std(axis=0) == pytest.approx(n_covariates * [1]) - assert data.Y.data.mean(axis=0) == pytest.approx(n_response_vars * [0]) - assert data.Y.data.std(axis=0) == pytest.approx(n_response_vars * [1]) - - -def assert_minmax_scaled(data): - assert np.allclose(data.X.data.min(axis=0), 0) - assert np.allclose(data.X.data.max(axis=0), 1) - assert np.allclose(data.Y.data.min(axis=0), 0) - assert np.allclose(data.Y.data.max(axis=0), 1) - - -def test_test_model_to_and_from_dict_and_args(test_model_args: dict, norm_data_from_arrays: NormData, save_dir_test_model): - model = NormativeModel.from_args(**test_model_args) - model_dict = model.to_dict() - assert model_dict["template_regression_model"]["success_ratio"] == test_model_args["success_ratio"] - model.fit(norm_data_from_arrays) - assert model.is_fitted - model_dict = model.to_dict() - assert model_dict["template_regression_model"]["success_ratio"] == test_model_args["success_ratio"] - model.predict(norm_data_from_arrays) - assert hasattr(norm_data_from_arrays, "Z") - Z_bak = copy.deepcopy(norm_data_from_arrays["Z"]) - del norm_data_from_arrays["Z"] - assert not hasattr(norm_data_from_arrays, "Z") - model.save(save_dir_test_model) - loaded_model = NormativeModel.load(save_dir_test_model) - assert loaded_model.is_fitted - loaded_model.predict(norm_data_from_arrays) - assert np.allclose(norm_data_from_arrays["Z"], Z_bak) - - -@pytest.fixture -def hbr_model_args(save_dir_hbr): - return { - "savemodel": False, - "saveresults": False, - "evaluate_model": False, - "saveplots": False, - "save_dir": save_dir_hbr, - "inscaler": "standardize", - "outscaler": "standardize", - "name": "hbr_test_model", - "alg": "hbr", - "likelihood": "Normal", - "linear_mu": True, - "random_mu": False, - "random_slope_mu": False, - "random_intercept_mu": True, - "linear_sigma": False, - "random_sigma": True, - "dist_name_sigma_sigma": "LogNormal", - "dist_params_sigma_sigma": (2.5, 1.3), - "draws": 10, - "tune": 10, - "cores": 1, - } - - -def test_hbr_model_to_and_from_dict_and_args(hbr_model_args: dict, norm_data_from_arrays: NormData, save_dir_hbr): - model = NormativeModel.from_args(**hbr_model_args) - model_dict = model.to_dict() - for k in ["savemodel", "saveresults", "evaluate_model", "saveplots", "inscaler", "outscaler", "name"]: - assert model_dict[k] == hbr_model_args[k] - tmplt = model.template_regression_model - assert isinstance(tmplt, HBR) - assert isinstance(tmplt.likelihood, NormalLikelihood) - assert isinstance(tmplt.likelihood.mu, LinearPrior) - assert isinstance(tmplt.likelihood.mu.intercept, RandomPrior) - assert isinstance(tmplt.likelihood.sigma, RandomPrior) - assert isinstance(tmplt.likelihood.sigma.sigma, Prior) - assert tmplt.likelihood.sigma.sigma.dist_name == "LogNormal" - assert tmplt.likelihood.sigma.sigma.dist_params == (2.5, 1.3) - model.fit(norm_data_from_arrays) - assert model.is_fitted - - model1 = model[model.response_vars[0]] - assert isinstance(model1, HBR) - assert isinstance(model1.likelihood, NormalLikelihood) - assert isinstance(model1.likelihood.mu, LinearPrior) - assert isinstance(model1.likelihood.mu.intercept, RandomPrior) - assert isinstance(model1.likelihood.sigma, RandomPrior) - assert isinstance(model1.likelihood.sigma.sigma, Prior) - assert model1.likelihood.sigma.sigma.dist_name == "LogNormal" - assert model1.likelihood.sigma.sigma.dist_params == (2.5, 1.3) - - model.predict(norm_data_from_arrays) - assert hasattr(norm_data_from_arrays, "Z") - Z_bak = copy.deepcopy(norm_data_from_arrays["Z"]) - del norm_data_from_arrays["Z"] - assert not hasattr(norm_data_from_arrays, "Z") - model.save(save_dir_hbr) - del model - loaded_model = NormativeModel.load(save_dir_hbr) - loaded_model.predict(norm_data_from_arrays) - assert np.allclose(norm_data_from_arrays["Z"], Z_bak) - assert loaded_model.is_fitted - model1 = loaded_model[loaded_model.response_vars[0]] - assert isinstance(model1, HBR) - assert isinstance(model1.likelihood, NormalLikelihood) - assert isinstance(model1.likelihood.mu, LinearPrior) - assert isinstance(model1.likelihood.mu.intercept, RandomPrior) - assert isinstance(model1.likelihood.sigma, RandomPrior) - assert isinstance(model1.likelihood.sigma.sigma, Prior) - assert model1.likelihood.sigma.sigma.dist_name == "LogNormal" - assert model1.likelihood.sigma.sigma.dist_params == [2.5, 1.3] - - -@pytest.fixture -def blr_model_args(save_dir_blr): - return { - "savemodel": False, - "saveresults": False, - "evaluate_model": False, - "saveplots": False, - "save_dir": save_dir_blr, - "inscaler": "standardize", - "outscaler": "standardize", - "name": "blr_test_model", - "alg": "blr", - "n_iter": 10, - "tol": 1e-3, - "ard": False, - "optimizer": "l-bfgs-b", - "l_bfgs_b_l": 0.7, - "l_bfgs_b_epsilon": 0.1, - "l_bfgs_b_norm": "l2", - "fixed_effect": True, - "heteroskedastic": False, - "fixed_effect_var": False, - } - - -def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_from_arrays: NormData, save_dir_blr): - model = NormativeModel.from_args(**blr_model_args) - model_dict = model.to_dict() - for k in ["savemodel", "saveresults", "evaluate_model", "saveplots", "inscaler", "outscaler", "name"]: - assert model_dict[k] == blr_model_args[k] - tmplt = model.template_regression_model - assert isinstance(tmplt, BLR) - assert tmplt.n_iter == 10 - assert tmplt.tol == 1e-3 - assert not tmplt.ard - assert tmplt.optimizer == "l-bfgs-b" - assert tmplt.l_bfgs_b_l == 0.7 - assert tmplt.l_bfgs_b_epsilon == 0.1 - assert tmplt.l_bfgs_b_norm == "l2" - assert tmplt.fixed_effect - assert not tmplt.heteroskedastic - assert not tmplt.fixed_effect_var - print("Fitting") - model.fit(norm_data_from_arrays) - print("fitted") - assert model.is_fitted - - model1 = model[model.response_vars[0]] - assert isinstance(model1, BLR) - assert model1.n_iter == 10 - assert model1.tol == 1e-3 - assert not model1.ard - assert model1.optimizer == "l-bfgs-b" - assert model1.l_bfgs_b_l == 0.7 - assert model1.l_bfgs_b_epsilon == 0.1 - assert model1.l_bfgs_b_norm == "l2" - assert model1.fixed_effect - assert not model1.fixed_effect_var - model.predict(norm_data_from_arrays) - assert hasattr(norm_data_from_arrays, "Z") - Z_bak = copy.deepcopy(norm_data_from_arrays["Z"]) - del norm_data_from_arrays["Z"] - assert not hasattr(norm_data_from_arrays, "Z") - model.save(save_dir_blr) - del model - loaded_model = NormativeModel.load(save_dir_blr) - model1 = loaded_model[loaded_model.response_vars[0]] - loaded_model.predict(norm_data_from_arrays) - assert np.allclose(norm_data_from_arrays["Z"], Z_bak) - - assert loaded_model.is_fitted - assert isinstance(model1, BLR) - assert model1.n_iter == 10 - assert model1.tol == 1e-3 - assert not model1.ard - assert model1.optimizer == "l-bfgs-b" - assert model1.l_bfgs_b_l == 0.7 - assert model1.l_bfgs_b_epsilon == 0.1 - assert model1.l_bfgs_b_norm == "l2" - assert model1.fixed_effect - assert not model1.fixed_effect_var diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index 36b221f9..91b7c4c2 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -9,7 +9,6 @@ from pcntoolkit.normative_model import NormativeModel from pcntoolkit.regression_model.blr import * from pcntoolkit.regression_model.hbr import * -from test import test_norm from test.fixtures.blr_model_fixtures import * from test.fixtures.data_fixtures import * from test.fixtures.norm_data_fixtures import * @@ -297,6 +296,7 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro assert model1.fixed_effect assert not model1.fixed_effect_var + def test_log_transformed_centiles( norm_blr_model_with_log_transform: NormativeModel, norm_data_from_arrays: NormData, @@ -319,7 +319,6 @@ def test_log_transformed_centiles( ) - def test_log_transformed_yhat( norm_blr_model_with_log_transform: NormativeModel, norm_data_from_arrays: NormData, From 2f42207b58e579a348d286a1e4cb687b560258e9 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 14:35:33 +0100 Subject: [PATCH 09/22] fix - remove redundant import --- test/fixtures/test_model_fixtures.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/fixtures/test_model_fixtures.py b/test/fixtures/test_model_fixtures.py index 0fa761db..63a6bd9b 100644 --- a/test/fixtures/test_model_fixtures.py +++ b/test/fixtures/test_model_fixtures.py @@ -1,7 +1,6 @@ import os import shutil -import numpy as np import pytest from pcntoolkit.dataio.norm_data import NormData From 27201ab770263f649b6ca4908a9428af1f2e1cdb Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 14:35:54 +0100 Subject: [PATCH 10/22] enh - add log transform functionality --- pcntoolkit/normative_model.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pcntoolkit/normative_model.py b/pcntoolkit/normative_model.py index 89942170..facc2f5e 100644 --- a/pcntoolkit/normative_model.py +++ b/pcntoolkit/normative_model.py @@ -655,6 +655,10 @@ def _apply_y_transform(self, data: NormData) -> None: for var in "Y": if var in data.data_vars: data[var] = np.log1p(data[var]) + elif self.y_transform == "log": + for var in "Y": + if var in data.data_vars: + data[var] = np.log(data[var]) def _invert_y_transform(self, data: NormData) -> None: """ @@ -680,6 +684,10 @@ def _invert_y_transform(self, data: NormData) -> None: for var in ("Y", "centiles", "Yhat", "Y_harmonized", "thrive_Y"): if var in data.data_vars: data[var] = np.expm1(data[var]) + elif self.y_transform == "log": + for var in ("Y", "centiles", "Yhat", "Y_harmonized", "thrive_Y"): + if var in data.data_vars: + data[var] = np.exp(data[var]) def evaluate(self, data: NormData) -> None: """ From 87e41153020034765be7102b3666da4a0cda4c76 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 14:40:14 +0100 Subject: [PATCH 11/22] enh - rename --- test/fixtures/blr_model_fixtures.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py index 8316cc33..4ad6e424 100644 --- a/test/fixtures/blr_model_fixtures.py +++ b/test/fixtures/blr_model_fixtures.py @@ -87,7 +87,7 @@ def fitted_blr_model( @pytest.fixture -def new_norm_blr_model( +def norm_blr_model( blr_model_factory: Callable, save_dir_blr ) -> NormativeModel: @@ -105,15 +105,15 @@ def new_norm_blr_model( @pytest.fixture -def fitted_norm_blr_model(new_norm_blr_model: NormativeModel, +def fitted_norm_blr_model(norm_blr_model: NormativeModel, norm_data_from_arrays: NormData ) -> NormativeModel: print("removing items") - if os.path.exists(new_norm_blr_model.save_dir): - shutil.rmtree(new_norm_blr_model.save_dir) - os.makedirs(new_norm_blr_model.save_dir, exist_ok=True) - new_norm_blr_model.fit(norm_data_from_arrays) - return new_norm_blr_model + if os.path.exists(norm_blr_model.save_dir): + shutil.rmtree(norm_blr_model.save_dir) + os.makedirs(norm_blr_model.save_dir, exist_ok=True) + norm_blr_model.fit(norm_data_from_arrays) + return norm_blr_model @pytest.fixture From 80a442fe8d08f07b8a685fc85510a0401078a7ef Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 14:43:55 +0100 Subject: [PATCH 12/22] enh - rename --- test/fixtures/blr_model_fixtures.py | 2 +- test/test_norm/test_normative_model_helper.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py index 4ad6e424..b9e93aa2 100644 --- a/test/fixtures/blr_model_fixtures.py +++ b/test/fixtures/blr_model_fixtures.py @@ -117,7 +117,7 @@ def fitted_norm_blr_model(norm_blr_model: NormativeModel, @pytest.fixture -def norm_blr_model_with_log_transform( +def log_transform_norm_blr_model( save_dir_test_model: str ) -> NormativeModel: """Create a NormativeModel using BLR with log1p. diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index 91b7c4c2..423acbda 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -298,11 +298,11 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro def test_log_transformed_centiles( - norm_blr_model_with_log_transform: NormativeModel, + log_transform_norm_blr_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData, ) -> None: - norm_blr_model_with_log_transform.fit_predict( + log_transform_norm_blr_model.fit_predict( norm_data_from_arrays, test_norm_data_from_arrays ) @@ -320,11 +320,11 @@ def test_log_transformed_centiles( def test_log_transformed_yhat( - norm_blr_model_with_log_transform: NormativeModel, + log_transform_norm_blr_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData, ) -> None: - norm_blr_model_with_log_transform.fit_predict( + log_transform_norm_blr_model.fit_predict( norm_data_from_arrays, test_norm_data_from_arrays ) # We dont expect any negative yhat values in the train dataset From 4d31037c3c7431bf5dcfea6e72f41037f2c38b28 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 14:53:09 +0100 Subject: [PATCH 13/22] enh - refactor comments --- pcntoolkit/normative_model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pcntoolkit/normative_model.py b/pcntoolkit/normative_model.py index facc2f5e..84204309 100644 --- a/pcntoolkit/normative_model.py +++ b/pcntoolkit/normative_model.py @@ -651,12 +651,13 @@ def _apply_y_transform(self, data: NormData) -> None: # compute_thrivelines() that has a preprocess() call without a postprocess(). if self.y_transform == "log1p": - # Apply transform to all Y-like input variables - for var in "Y": + # Apply log1p transform to the response variable Y + for var in ["Y"]: if var in data.data_vars: data[var] = np.log1p(data[var]) elif self.y_transform == "log": - for var in "Y": + # Apply natural log transform to the response variable Y + for var in ["Y"]: if var in data.data_vars: data[var] = np.log(data[var]) From 71d5f704fb374a3c9d6a30fd84f3fbfe8b29ba68 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 14:55:27 +0100 Subject: [PATCH 14/22] enh - add test fixtures for log transform --- test/fixtures/blr_model_fixtures.py | 40 ++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py index b9e93aa2..6132ed6d 100644 --- a/test/fixtures/blr_model_fixtures.py +++ b/test/fixtures/blr_model_fixtures.py @@ -117,7 +117,7 @@ def fitted_norm_blr_model(norm_blr_model: NormativeModel, @pytest.fixture -def log_transform_norm_blr_model( +def log1p_transform_norm_blr_model( save_dir_test_model: str ) -> NormativeModel: """Create a NormativeModel using BLR with log1p. @@ -133,7 +133,7 @@ def log_transform_norm_blr_model( shutil.rmtree(log_dir) os.makedirs(log_dir, exist_ok=True) # Create test regression model - test_model = BLR("name=test_model_log1p") + test_model = BLR("test_model_log1p") # Return a NormativeModel with the log1p transform return NormativeModel( template_regression_model=test_model, @@ -145,6 +145,40 @@ def log_transform_norm_blr_model( inscaler="standardize", outscaler="standardize", name="test_model_log1p", - y_transform=None, + y_transform="log1p", + ) + + +@pytest.fixture +def log_transform_norm_blr_model( + save_dir_blr: str, +) -> NormativeModel: + """Create a NormativeModel using BLR with natural log y_transform. + + Returns + ------- + NormativeModel + Un-fitted normative model with natural-log transform. + """ + # Build a fresh save directory for this fixture + log_dir = os.path.join(save_dir_blr, "log") + if os.path.exists(log_dir): + # Remove stale directory from previous test runs + shutil.rmtree(log_dir) + os.makedirs(log_dir, exist_ok=True) + # Create a BLR regression model for the natural-log transform test + test_model = BLR("test_model_log") + # Return NormativeModel with natural-log transform enabled + return NormativeModel( + template_regression_model=test_model, + savemodel=False, + saveresults=False, + evaluate_model=False, + saveplots=True, + save_dir=log_dir, + inscaler="standardize", + outscaler="standardize", + name="test_model_log", + y_transform="log", ) From dd702bdc983ab463d7f1b796b2ba991f3d4d0b5a Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 15:01:10 +0100 Subject: [PATCH 15/22] enh - refactor existing log1p tests --- test/test_norm/test_normative_model_helper.py | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index 423acbda..35cf1fbf 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -297,12 +297,12 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro assert not model1.fixed_effect_var -def test_log_transformed_centiles( - log_transform_norm_blr_model: NormativeModel, +def test_log1p_transformed_centiles( + log1p_transform_norm_blr_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData, ) -> None: - log_transform_norm_blr_model.fit_predict( + log1p_transform_norm_blr_model.fit_predict( norm_data_from_arrays, test_norm_data_from_arrays ) @@ -310,7 +310,8 @@ def test_log_transformed_centiles( assert bool( np.all(test_norm_data_from_arrays["Y"].values >= 0) ) - # Both training and test centiles should be bigger or equal to -1 + # Both training and test centiles should be bigger than 0 due to the + # exp(Y) - 1 transform assert bool( np.all(norm_data_from_arrays["centiles"].values > -1) ) @@ -319,7 +320,24 @@ def test_log_transformed_centiles( ) -def test_log_transformed_yhat( +def test_log1p_transformed_yhat( + log1p_transform_norm_blr_model: NormativeModel, + norm_data_from_arrays: NormData, + test_norm_data_from_arrays: NormData, +) -> None: + log1p_transform_norm_blr_model.fit_predict( + norm_data_from_arrays, test_norm_data_from_arrays + ) + # We dont expect any negative yhat values in the train and test dataset + assert bool( + np.all(norm_data_from_arrays["Yhat"].values >= 0) + ) + assert bool( + np.all(test_norm_data_from_arrays["Yhat"].values >= 0) + ) + + +def test_log_transformed_centiles( log_transform_norm_blr_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData, From ba5844e117e33575bac4cf006bb43114ac1e3403 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 15:02:18 +0100 Subject: [PATCH 16/22] enh - add log transform tests --- test/test_norm/test_normative_model_helper.py | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index 35cf1fbf..eb6905b6 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -345,11 +345,34 @@ def test_log_transformed_centiles( log_transform_norm_blr_model.fit_predict( norm_data_from_arrays, test_norm_data_from_arrays ) - # We dont expect any negative yhat values in the train dataset + + # Check that Y are positive in the original data assert bool( - np.all(norm_data_from_arrays["Yhat"].values >= 0) + np.all(test_norm_data_from_arrays["Y"].values > 0) + ) + # Both training and test centiles should be bigger than 0 due to the exp(Y) + # transform + assert bool( + np.all(norm_data_from_arrays["centiles"].values > 0) + ) + assert bool( + np.all(test_norm_data_from_arrays["centiles"].values > 0) + ) + + +def test_log_transformed_yhat( + log_transform_norm_blr_model: NormativeModel, + norm_data_from_arrays: NormData, + test_norm_data_from_arrays: NormData, +) -> None: + log_transform_norm_blr_model.fit_predict( + norm_data_from_arrays, test_norm_data_from_arrays ) - # test dataset: -1 possibility due to extrapolation + + # We dont expect any negative yhat values in the train and test dataset assert bool( - np.all(test_norm_data_from_arrays["Yhat"].values > -1) + np.all(norm_data_from_arrays["Yhat"].values > 0) ) + assert bool( + np.all(test_norm_data_from_arrays["Yhat"].values > 0) + ) \ No newline at end of file From e13a09a1960e40ad7df56f3740e0c14253cd27a6 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 15:04:12 +0100 Subject: [PATCH 17/22] enh - group tests for faster execution --- test/test_norm/test_normative_model_helper.py | 31 +++++-------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index eb6905b6..69cd67d9 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -297,7 +297,7 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro assert not model1.fixed_effect_var -def test_log1p_transformed_centiles( +def test_log1p_transform( log1p_transform_norm_blr_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData, @@ -305,11 +305,12 @@ def test_log1p_transformed_centiles( log1p_transform_norm_blr_model.fit_predict( norm_data_from_arrays, test_norm_data_from_arrays ) - + # Check that Y are non-negative in the original data assert bool( - np.all(test_norm_data_from_arrays["Y"].values >= 0) + np.all(test_norm_data_from_arrays["Y"].values > 0) ) + # Both training and test centiles should be bigger than 0 due to the # exp(Y) - 1 transform assert bool( @@ -319,15 +320,6 @@ def test_log1p_transformed_centiles( np.all(test_norm_data_from_arrays["centiles"].values > -1) ) - -def test_log1p_transformed_yhat( - log1p_transform_norm_blr_model: NormativeModel, - norm_data_from_arrays: NormData, - test_norm_data_from_arrays: NormData, -) -> None: - log1p_transform_norm_blr_model.fit_predict( - norm_data_from_arrays, test_norm_data_from_arrays - ) # We dont expect any negative yhat values in the train and test dataset assert bool( np.all(norm_data_from_arrays["Yhat"].values >= 0) @@ -337,7 +329,7 @@ def test_log1p_transformed_yhat( ) -def test_log_transformed_centiles( +def test_log_transformed( log_transform_norm_blr_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData, @@ -350,6 +342,7 @@ def test_log_transformed_centiles( assert bool( np.all(test_norm_data_from_arrays["Y"].values > 0) ) + # Both training and test centiles should be bigger than 0 due to the exp(Y) # transform assert bool( @@ -359,20 +352,10 @@ def test_log_transformed_centiles( np.all(test_norm_data_from_arrays["centiles"].values > 0) ) - -def test_log_transformed_yhat( - log_transform_norm_blr_model: NormativeModel, - norm_data_from_arrays: NormData, - test_norm_data_from_arrays: NormData, -) -> None: - log_transform_norm_blr_model.fit_predict( - norm_data_from_arrays, test_norm_data_from_arrays - ) - # We dont expect any negative yhat values in the train and test dataset assert bool( np.all(norm_data_from_arrays["Yhat"].values > 0) ) assert bool( np.all(test_norm_data_from_arrays["Yhat"].values > 0) - ) \ No newline at end of file + ) From 9b0ec3b00fc32f7f080f90b8eac0b69d636d4e2d Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 17:04:18 +0100 Subject: [PATCH 18/22] enh - check that the log transform can be performed --- pcntoolkit/normative_model.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/pcntoolkit/normative_model.py b/pcntoolkit/normative_model.py index 84204309..05f16559 100644 --- a/pcntoolkit/normative_model.py +++ b/pcntoolkit/normative_model.py @@ -56,9 +56,9 @@ class NormativeModel: Output (Y/response_vars) scaler to use. y_transform : str or None Optional transform applied to Y before fitting and inverted - after prediction. Currently supported: ``"log"`` applies - log(Y+1) before fitting and then inverts centiles of the resulting - model back in the original space. + after prediction. Currently supported: + - ``"log1p"`` applies log(Y+1) + - ``"log"`` applies natural log(Y) This is useful for phenotypes that cannot be negative. Default is ``None`` (no transform). name: str @@ -642,8 +642,7 @@ def _apply_y_transform(self, data: NormData) -> None: """ if self.y_transform is None: return - - + # TODO: Check if we need to track if transform has already been # applied to avoid double-inverting. Normally I dont expect any issues # as every process() is followed by a postprocess(). The only issues can @@ -653,12 +652,25 @@ def _apply_y_transform(self, data: NormData) -> None: if self.y_transform == "log1p": # Apply log1p transform to the response variable Y for var in ["Y"]: - if var in data.data_vars: + if (data[var] < -1).any(): + raise ValueError("Cannot apply log1p transform to variable " + f"'{var}' because it contains values less " + "than -1." + ) + else: data[var] = np.log1p(data[var]) + elif self.y_transform == "log": # Apply natural log transform to the response variable Y for var in ["Y"]: - if var in data.data_vars: + if (data[var] <= 0).any(): + raise ValueError( + f"Cannot apply log transform to variable '{var}' " + "because it contains non-positive values. " + "Consider using 'log1p' transform or ensuring " + "all values are positive." + ) + else: data[var] = np.log(data[var]) def _invert_y_transform(self, data: NormData) -> None: From 235d13e4f73918c3b054d4e66e20c520e24b5d15 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 17:04:44 +0100 Subject: [PATCH 19/22] enh - plotter should assign positive response vars --- pcntoolkit/util/plotter.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pcntoolkit/util/plotter.py b/pcntoolkit/util/plotter.py index 2a675474..d9f2ccc9 100644 --- a/pcntoolkit/util/plotter.py +++ b/pcntoolkit/util/plotter.py @@ -101,9 +101,10 @@ def plot_centiles( # Batch effects are the first ones in the highlighted batch effects for be, v in batch_effects.items(): centile_df[be] = v - # Response vars are all 0, we don't need them + # Assign random values for response vars because they are not needed. + # They must be > 0 to satisfy later checks that require response_vars > 0. for rv in response_vars: - centile_df[rv] = 0 + centile_df[rv] = 1e-6 centile_data = NormData.from_dataframe( "centile", @@ -352,9 +353,10 @@ def plot_centiles_advanced( # Batch effects are the first ones in the highlighted batch effects for be, v in batch_effects.items(): centile_df[be] = v[0] - # Response vars are all 0, we don't need them + # Assign random values for response vars because they are not needed. + # They must be > 0 to satisfy later checks that require response_vars > 0. for rv in model.response_vars: - centile_df[rv] = 0 + centile_df[rv] = 1e-6 centile_data = NormData.from_dataframe( "centile", dataframe=centile_df, From bc734f065e33b43e6d102042ba1de5160d3ea0d5 Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 17:05:53 +0100 Subject: [PATCH 20/22] enh - last improvements to the tests --- test/fixtures/blr_model_fixtures.py | 16 ++++++++-------- test/test_norm/test_normative_model_helper.py | 6 ++++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py index 6132ed6d..c6dfebdf 100644 --- a/test/fixtures/blr_model_fixtures.py +++ b/test/fixtures/blr_model_fixtures.py @@ -133,14 +133,14 @@ def log1p_transform_norm_blr_model( shutil.rmtree(log_dir) os.makedirs(log_dir, exist_ok=True) # Create test regression model - test_model = BLR("test_model_log1p") + blr_model = BLR("test_model_log1p") # Return a NormativeModel with the log1p transform return NormativeModel( - template_regression_model=test_model, + template_regression_model=blr_model, savemodel=False, saveresults=False, evaluate_model=False, - saveplots=True, + saveplots=False, save_dir=log_dir, inscaler="standardize", outscaler="standardize", @@ -151,7 +151,7 @@ def log1p_transform_norm_blr_model( @pytest.fixture def log_transform_norm_blr_model( - save_dir_blr: str, + save_dir_test_model: str, ) -> NormativeModel: """Create a NormativeModel using BLR with natural log y_transform. @@ -161,20 +161,20 @@ def log_transform_norm_blr_model( Un-fitted normative model with natural-log transform. """ # Build a fresh save directory for this fixture - log_dir = os.path.join(save_dir_blr, "log") + log_dir = os.path.join(save_dir_test_model, "log") if os.path.exists(log_dir): # Remove stale directory from previous test runs shutil.rmtree(log_dir) os.makedirs(log_dir, exist_ok=True) # Create a BLR regression model for the natural-log transform test - test_model = BLR("test_model_log") + blr_model = BLR("test_model_log") # Return NormativeModel with natural-log transform enabled return NormativeModel( - template_regression_model=test_model, + template_regression_model=blr_model, savemodel=False, saveresults=False, evaluate_model=False, - saveplots=True, + saveplots=False, save_dir=log_dir, inscaler="standardize", outscaler="standardize", diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index 69cd67d9..8ddc72d2 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -308,7 +308,7 @@ def test_log1p_transform( # Check that Y are non-negative in the original data assert bool( - np.all(test_norm_data_from_arrays["Y"].values > 0) + np.all(test_norm_data_from_arrays["Y"].values >= 0) ) # Both training and test centiles should be bigger than 0 due to the @@ -337,7 +337,9 @@ def test_log_transformed( log_transform_norm_blr_model.fit_predict( norm_data_from_arrays, test_norm_data_from_arrays ) - + # Force the data to be >= 1e-6 + test_norm_data_from_arrays["Y"].values.clip(min=1e-6) + # Check that Y are positive in the original data assert bool( np.all(test_norm_data_from_arrays["Y"].values > 0) From 60e5ea6fbe1f682aba898194f5d24095cc0922bf Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 17:09:47 +0100 Subject: [PATCH 21/22] fix - clip before fitting --- test/test_norm/test_normative_model_helper.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py index 8ddc72d2..6e5a5549 100644 --- a/test/test_norm/test_normative_model_helper.py +++ b/test/test_norm/test_normative_model_helper.py @@ -334,12 +334,13 @@ def test_log_transformed( norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData, ) -> None: + # Force the data to be >= 1e-6 + test_norm_data_from_arrays["Y"].values.clip(min=1e-6) + log_transform_norm_blr_model.fit_predict( norm_data_from_arrays, test_norm_data_from_arrays ) - # Force the data to be >= 1e-6 - test_norm_data_from_arrays["Y"].values.clip(min=1e-6) - + # Check that Y are positive in the original data assert bool( np.all(test_norm_data_from_arrays["Y"].values > 0) From 19301942897649bf0f6eb56021e93188f30b27da Mon Sep 17 00:00:00 2001 From: Konstantinos Tsilimparis Date: Thu, 5 Mar 2026 17:20:35 +0100 Subject: [PATCH 22/22] enh - comment --- pcntoolkit/regression_model/blr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pcntoolkit/regression_model/blr.py b/pcntoolkit/regression_model/blr.py index 306fb702..d5a9a197 100644 --- a/pcntoolkit/regression_model/blr.py +++ b/pcntoolkit/regression_model/blr.py @@ -347,6 +347,7 @@ def backward(self, X: xr.DataArray, be: xr.DataArray, Z: xr.DataArray) -> xr.Dat self.ys[mask] = self.ys[mask] + residual_mean self.s2[mask] = np.square(np.sqrt(self.s2[mask]) * correction_factor) + # Compute the centiles in the original Y space: centiles = Z * std + mean centiles = np_Z * np.sqrt(self.s2) + self.ys if self.warp: centiles = self.warp.invf(centiles, self.gamma)