From a81df5a5443f593517fa10c2d4252534b1441a5b Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 12 Feb 2026 18:14:25 +0100
Subject: [PATCH 01/22] enh - add  Y transformation keyword argument in
 NormativeModel to enforce positivity

---
 pcntoolkit/normative_model.py | 78 +++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/pcntoolkit/normative_model.py b/pcntoolkit/normative_model.py
index 3928a11a..5bee8bfb 100644
--- a/pcntoolkit/normative_model.py
+++ b/pcntoolkit/normative_model.py
@@ -54,6 +54,13 @@ class NormativeModel:
         Input (X/covariates) scaler to use.
     outscaler: str
         Output (Y/response_vars) scaler to use.
+    y_transform : str or None
+        Optional transform applied to Y before fitting and inverted
+        after prediction. Currently supported: ``"log"`` applies
+        log(Y+1) before fitting and then inverts centiles of the resulting 
+        model back in the original space.
+        This is useful for phenotypes that cannot be negative.
+        Default is ``None`` (no transform).
     name: str
         Name of the model
     """
@@ -68,6 +75,7 @@ def __init__(
         save_dir: Optional[str] = None,
         inscaler: str = "standardize",
         outscaler: str = "standardize",
+        y_transform: Optional[str] = None,
         name: Optional[str] = None,
     ):
         self.savemodel: bool = savemodel
@@ -77,6 +85,7 @@ def __init__(
         self._save_dir = save_dir if save_dir is not None else get_default_save_dir()
         self.inscaler: str = inscaler
         self.outscaler: str = outscaler
+        self.y_transform: Optional[str] = y_transform
         self.name: Optional[str] = name
         self.response_vars: list[str] = None  # type: ignore
         self.template_regression_model: RegressionModel = template_regression_model
@@ -180,6 +189,7 @@ def transfer(self, transfer_data: NormData, save_dir: str | None = None, **kwarg
             saveplots=True,
             inscaler=self.inscaler,
             outscaler=self.outscaler,
+            y_transform=self.y_transform,
             save_dir=self.save_dir,
         )
         if save_dir is not None:
@@ -239,6 +249,7 @@ def extend(self, data: NormData, save_dir: str | None = None, n_synth_samples: i
             saveplots=True,
             inscaler=self.inscaler,
             outscaler=self.outscaler,
+            y_transform=self.y_transform,
             save_dir=save_dir,
         )
 
@@ -459,6 +470,7 @@ def load(cls, path: str, into: NormativeModel | None = None) -> NormativeModel:
         outscaler = metadata["outscaler"]
         saveplots = metadata["saveplots"]
         evaluate_model = metadata["evaluate_model"]
+        y_transform = metadata.get("y_transform", None)
         name = metadata["name"]
 
         response_vars = []
@@ -491,6 +503,7 @@ def load(cls, path: str, into: NormativeModel | None = None) -> NormativeModel:
                 save_dir=save_dir,
                 inscaler=inscaler,
                 outscaler=outscaler,
+                y_transform=y_transform,
                 name=name,
             )
         else:
@@ -548,9 +561,13 @@ def preprocess(self, data: NormData) -> None:
         """
         Applies preprocessing transformations to the input data.
 
+        First applies an optional response transform (e.g. log1p), then scales.
+
         Args:
             data (NormData): Data to preprocess.
         """
+        # Enforce positivity if necessary
+        self._apply_y_transform(data)
         self.scale_forward(data)
 
     def scale_forward(self, data: NormData, overwrite: bool = False) -> None:
@@ -585,10 +602,15 @@ def scale_forward(self, data: NormData, overwrite: bool = False) -> None:
     def postprocess(self, data: NormData) -> None:
         """Apply postprocessing to the data.
 
+        First unscales, then applies the inverse response transform (e.g. expm1).
+
         Args:
             data (NormData): Data to postprocess.
         """
         self.scale_backward(data)
+        # Invert Y to its original space if positivity was enforced during 
+        # preprocessing
+        self._invert_y_transform(data)
 
     def scale_backward(self, data: NormData) -> None:
         """
@@ -605,6 +627,59 @@ def scale_backward(self, data: NormData) -> None:
         """
         data.scale_backward(self.inscalers, self.outscalers)
 
+    def _apply_y_transform(self, data: NormData) -> None:
+        """
+        Apply the forward response transform (e.g. log1p) to Y-like variables 
+        in the data.
+        
+        Parameters
+        ----------
+        data : NormData
+            Data object containing response variable arrays (Y, Yhat, 
+            centiles, thrive_Y) to which the transform should be applied.
+        
+        """
+        if self.y_transform is None:
+            return
+        
+                
+        # TODO: Check if we need to track if transform has already been 
+        # applied to avoid double-inverting. Normally I dont expect any issues 
+        # as every process() is followed by a postprocess(). The only issues can
+        # be if users call postprocess() multiple times manually or with
+        # compute_thrivelines() that has a preprocess() call without a postprocess().
+            
+        if self.y_transform == "log1p":
+            # Apply transform to all Y-like input variables
+            for var in "Y":
+                if var in data.data_vars:
+                    data[var] = np.log1p(data[var])
+
+    def _invert_y_transform(self, data: NormData) -> None:
+        """
+        Apply the inverse response transform (e.g. expm1) to Y-like variables
+        in the data.
+        
+        Parameters
+        ----------
+        data : NormData
+            Data object containing response variable arrays (Y, Yhat,
+            centiles, thrive_Y) to which the inverse transform should be applied.
+        """
+        if self.y_transform is None:
+            return
+        
+        # TODO: Check if we need to track if inverse transform has already been 
+        # applied to avoid double-inverting. Normally I dont expect any issues 
+        # as every process() is followed by a postprocess(). The only issues can
+        # be if users call postprocess() multiple times manually or with
+        # compute_thrivelines() that has a preprocess() call without a postprocess().
+            
+        if self.y_transform == "log1p":
+            for var in ("Y", "centiles", "Yhat", "Y_harmonized", "thrive_Y"):
+                if var in data.data_vars:
+                    data[var] = np.expm1(data[var])
+            
     def evaluate(self, data: NormData) -> None:
         """
         Evaluates the model performance on the data.
@@ -927,6 +1002,7 @@ def to_dict(self):
             "is_fitted": self.is_fitted,
             "inscaler": self.inscaler,
             "outscaler": self.outscaler,
+            "y_transform": self.y_transform,
             "ptk_version": importlib.metadata.version("pcntoolkit"),
         }
 
@@ -986,6 +1062,7 @@ def from_args(cls, **kwargs) -> NormativeModel:
         inscaler = kwargs.get("inscaler", "none")
         outscaler = kwargs.get("outscaler", "none")
         name = kwargs.get("name", None)
+        y_transform = kwargs.get("y_transform", None)
         assert "alg" in kwargs, "Algorithm must be specified"
         if kwargs["alg"] == "blr":
             template_regression_model = BLR.from_args("template", kwargs)
@@ -1004,6 +1081,7 @@ def from_args(cls, **kwargs) -> NormativeModel:
             save_dir=save_dir,
             inscaler=inscaler,
             outscaler=outscaler,
+            y_transform=y_transform,
             name=name,
         )
 

From 1d2f5360629921b47c6fdb6bf5d0cfeb6e18e235 Mon Sep 17 00:00:00 2001
From: contsili <konstantinos.tsilimparis@outlook.com>
Date: Mon, 2 Mar 2026 18:38:06 +0100
Subject: [PATCH 02/22] enh - rename NormativeModel tests

That way it is clear what is tested:
main: tests the main normative model functions
helper: tests the helper functions
---
 ..._model.py => test_normative_model_main.py} |   0
 test/test_norm/test_normative_model_helper.py | 297 ++++++++++++++++++
 2 files changed, 297 insertions(+)
 rename test/test_core/{test_normative_model.py => test_normative_model_main.py} (100%)
 create mode 100644 test/test_norm/test_normative_model_helper.py

diff --git a/test/test_core/test_normative_model.py b/test/test_core/test_normative_model_main.py
similarity index 100%
rename from test/test_core/test_normative_model.py
rename to test/test_core/test_normative_model_main.py
diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
new file mode 100644
index 00000000..3caf02a6
--- /dev/null
+++ b/test/test_norm/test_normative_model_helper.py
@@ -0,0 +1,297 @@
+import copy
+
+import numpy as np
+import pytest
+
+from pcntoolkit.dataio.norm_data import NormData
+from pcntoolkit.math_functions.likelihood import NormalLikelihood
+from pcntoolkit.math_functions.prior import *
+from pcntoolkit.normative_model import NormativeModel
+from pcntoolkit.regression_model.blr import *
+from pcntoolkit.regression_model.hbr import *
+from test import test_norm
+from test.fixtures.data_fixtures import *
+from test.fixtures.norm_data_fixtures import *
+from test.fixtures.path_fixtures import *
+from test.fixtures.test_model_fixtures import *
+
+"""
+This file contains tests for the NormBase class in the PCNtoolkit.
+
+The tests cover the following aspects:
+1. Fitting the model with valid data
+2. Handling invalid data for fitting
+3. Scaling methods for data
+4. Polynomial basis expansion
+5. B-spline basis expansion
+"""
+
+
+def test_fit(new_norm_test_model: NormativeModel, norm_data_from_arrays: NormData):
+    if os.path.exists(new_norm_test_model.save_dir):
+        shutil.rmtree(new_norm_test_model.save_dir)
+    os.makedirs(new_norm_test_model.save_dir, exist_ok=True)
+    new_norm_test_model.fit(norm_data_from_arrays)
+    assert new_norm_test_model.is_fitted
+
+
+def test_fit_predict(new_norm_test_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData):
+    if os.path.exists(new_norm_test_model.save_dir):
+        shutil.rmtree(new_norm_test_model.save_dir)
+    os.makedirs(new_norm_test_model.save_dir, exist_ok=True)
+    new_norm_test_model.fit_predict(norm_data_from_arrays, test_norm_data_from_arrays)
+    assert new_norm_test_model.is_fitted
+
+
+def test_predict(fitted_norm_test_model: NormativeModel, test_norm_data_from_arrays: NormData):
+    os.makedirs(fitted_norm_test_model.save_dir, exist_ok=True)
+    fitted_norm_test_model.predict(test_norm_data_from_arrays)
+    assert fitted_norm_test_model.is_fitted
+
+
+def test_harmonize(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData):
+    fitted_norm_test_model.harmonize(norm_data_from_arrays)
+    assert fitted_norm_test_model.is_fitted
+
+
+def test_compute_zscores(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData):
+    fitted_norm_test_model.compute_zscores(norm_data_from_arrays)
+    assert fitted_norm_test_model.is_fitted
+
+
+def test_compute_centiles(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData):
+    fitted_norm_test_model.compute_centiles(norm_data_from_arrays)
+    assert fitted_norm_test_model.is_fitted
+
+
+# Test the fit method of the NormBase class with invalid data
+def test_norm_base_fit_invalid_data(new_norm_test_model: NormativeModel):
+    with pytest.raises(AttributeError):
+        new_norm_test_model.fit(None)
+
+    with pytest.raises(AttributeError):
+        new_norm_test_model.fit("invalid_data")
+
+
+# Scaling tests
+@pytest.mark.parametrize("scaler", ["standardize", "minmax"])
+def test_scaling(
+    norm_data_from_arrays,
+    new_norm_test_model: NormativeModel,
+    n_covariates,
+    n_response_vars,
+    scaler,
+):
+    object.__setattr__(new_norm_test_model, "inscaler", scaler)
+    object.__setattr__(new_norm_test_model, "outscaler", scaler)
+    copydata = copy.deepcopy(norm_data_from_arrays)
+    copydata.attrs["is_scaled"] = False
+    for s in new_norm_test_model.inscalers:
+        s.is_fitted = False
+    for s in new_norm_test_model.outscalers:
+        s.is_fitted = False
+    X_bak = norm_data_from_arrays.X.data.copy()
+    y_bak = norm_data_from_arrays.Y.data.copy()
+    new_norm_test_model.scale_forward(copydata)
+
+    if scaler == "standardize":
+        assert_standardized(copydata, n_covariates, n_response_vars)
+    elif scaler == "minmax":
+        assert_minmax_scaled(copydata)
+
+    new_norm_test_model.scale_backward(copydata)
+    assert np.allclose(copydata.X.data, X_bak)
+    assert np.allclose(copydata.Y.data, y_bak)
+
+
+def assert_standardized(data, n_covariates, n_response_vars):
+    assert data.X.data.mean(axis=0) == pytest.approx(n_covariates * [0])
+    assert data.X.data.std(axis=0) == pytest.approx(n_covariates * [1])
+    assert data.Y.data.mean(axis=0) == pytest.approx(n_response_vars * [0])
+    assert data.Y.data.std(axis=0) == pytest.approx(n_response_vars * [1])
+
+
+def assert_minmax_scaled(data):
+    assert np.allclose(data.X.data.min(axis=0), 0)
+    assert np.allclose(data.X.data.max(axis=0), 1)
+    assert np.allclose(data.Y.data.min(axis=0), 0)
+    assert np.allclose(data.Y.data.max(axis=0), 1)
+
+
+def test_test_model_to_and_from_dict_and_args(test_model_args: dict, norm_data_from_arrays: NormData, save_dir_test_model):
+    model = NormativeModel.from_args(**test_model_args)
+    model_dict = model.to_dict()
+    assert model_dict["template_regression_model"]["success_ratio"] == test_model_args["success_ratio"]
+    model.fit(norm_data_from_arrays)
+    assert model.is_fitted
+    model_dict = model.to_dict()
+    assert model_dict["template_regression_model"]["success_ratio"] == test_model_args["success_ratio"]
+    model.predict(norm_data_from_arrays)
+    assert hasattr(norm_data_from_arrays, "Z")
+    Z_bak = copy.deepcopy(norm_data_from_arrays["Z"])
+    del norm_data_from_arrays["Z"]
+    assert not hasattr(norm_data_from_arrays, "Z")
+    model.save(save_dir_test_model)
+    loaded_model = NormativeModel.load(save_dir_test_model)
+    assert loaded_model.is_fitted
+    loaded_model.predict(norm_data_from_arrays)
+    assert np.allclose(norm_data_from_arrays["Z"], Z_bak)
+
+
+@pytest.fixture
+def hbr_model_args(save_dir_hbr):
+    return {
+        "savemodel": False,
+        "saveresults": False,
+        "evaluate_model": False,
+        "saveplots": False,
+        "save_dir": save_dir_hbr,
+        "inscaler": "standardize",
+        "outscaler": "standardize",
+        "name": "hbr_test_model",
+        "alg": "hbr",
+        "likelihood": "Normal",
+        "linear_mu": True,
+        "random_mu": False,
+        "random_slope_mu": False,
+        "random_intercept_mu": True,
+        "linear_sigma": False,
+        "random_sigma": True,
+        "dist_name_sigma_sigma": "LogNormal",
+        "dist_params_sigma_sigma": (2.5, 1.3),
+        "draws": 10,
+        "tune": 10,
+        "cores": 1,
+    }
+
+
+def test_hbr_model_to_and_from_dict_and_args(hbr_model_args: dict, norm_data_from_arrays: NormData, save_dir_hbr):
+    model = NormativeModel.from_args(**hbr_model_args)
+    model_dict = model.to_dict()
+    for k in ["savemodel", "saveresults", "evaluate_model", "saveplots", "inscaler", "outscaler", "name"]:
+        assert model_dict[k] == hbr_model_args[k]
+    tmplt = model.template_regression_model
+    assert isinstance(tmplt, HBR)
+    assert isinstance(tmplt.likelihood, NormalLikelihood)
+    assert isinstance(tmplt.likelihood.mu, LinearPrior)
+    assert isinstance(tmplt.likelihood.mu.intercept, RandomPrior)
+    assert isinstance(tmplt.likelihood.sigma, RandomPrior)
+    assert isinstance(tmplt.likelihood.sigma.sigma, Prior)
+    assert tmplt.likelihood.sigma.sigma.dist_name == "LogNormal"
+    assert tmplt.likelihood.sigma.sigma.dist_params == (2.5, 1.3)
+    model.fit(norm_data_from_arrays)
+    assert model.is_fitted
+
+    model1 = model[model.response_vars[0]]
+    assert isinstance(model1, HBR)
+    assert isinstance(model1.likelihood, NormalLikelihood)
+    assert isinstance(model1.likelihood.mu, LinearPrior)
+    assert isinstance(model1.likelihood.mu.intercept, RandomPrior)
+    assert isinstance(model1.likelihood.sigma, RandomPrior)
+    assert isinstance(model1.likelihood.sigma.sigma, Prior)
+    assert model1.likelihood.sigma.sigma.dist_name == "LogNormal"
+    assert model1.likelihood.sigma.sigma.dist_params == (2.5, 1.3)
+
+    model.predict(norm_data_from_arrays)
+    assert hasattr(norm_data_from_arrays, "Z")
+    Z_bak = copy.deepcopy(norm_data_from_arrays["Z"])
+    del norm_data_from_arrays["Z"]
+    assert not hasattr(norm_data_from_arrays, "Z")
+    model.save(save_dir_hbr)
+    del model
+    loaded_model = NormativeModel.load(save_dir_hbr)
+    loaded_model.predict(norm_data_from_arrays)
+    assert np.allclose(norm_data_from_arrays["Z"], Z_bak)
+    assert loaded_model.is_fitted
+    model1 = loaded_model[loaded_model.response_vars[0]]
+    assert isinstance(model1, HBR)
+    assert isinstance(model1.likelihood, NormalLikelihood)
+    assert isinstance(model1.likelihood.mu, LinearPrior)
+    assert isinstance(model1.likelihood.mu.intercept, RandomPrior)
+    assert isinstance(model1.likelihood.sigma, RandomPrior)
+    assert isinstance(model1.likelihood.sigma.sigma, Prior)
+    assert model1.likelihood.sigma.sigma.dist_name == "LogNormal"
+    assert model1.likelihood.sigma.sigma.dist_params == [2.5, 1.3]
+
+
+@pytest.fixture
+def blr_model_args(save_dir_blr):
+    return {
+        "savemodel": False,
+        "saveresults": False,
+        "evaluate_model": False,
+        "saveplots": False,
+        "save_dir": save_dir_blr,
+        "inscaler": "standardize",
+        "outscaler": "standardize",
+        "name": "blr_test_model",
+        "alg": "blr",
+        "n_iter": 10,
+        "tol": 1e-3,
+        "ard": False,
+        "optimizer": "l-bfgs-b",
+        "l_bfgs_b_l": 0.7,
+        "l_bfgs_b_epsilon": 0.1,
+        "l_bfgs_b_norm": "l2",
+        "fixed_effect": True,
+        "heteroskedastic": False,
+        "fixed_effect_var": False,
+    }
+
+
+def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_from_arrays: NormData, save_dir_blr):
+    model = NormativeModel.from_args(**blr_model_args)
+    model_dict = model.to_dict()
+    for k in ["savemodel", "saveresults", "evaluate_model", "saveplots", "inscaler", "outscaler", "name"]:
+        assert model_dict[k] == blr_model_args[k]
+    tmplt = model.template_regression_model
+    assert isinstance(tmplt, BLR)
+    assert tmplt.n_iter == 10
+    assert tmplt.tol == 1e-3
+    assert not tmplt.ard
+    assert tmplt.optimizer == "l-bfgs-b"
+    assert tmplt.l_bfgs_b_l == 0.7
+    assert tmplt.l_bfgs_b_epsilon == 0.1
+    assert tmplt.l_bfgs_b_norm == "l2"
+    assert tmplt.fixed_effect
+    assert not tmplt.heteroskedastic
+    assert not tmplt.fixed_effect_var
+    print("Fitting")
+    model.fit(norm_data_from_arrays)
+    print("fitted")
+    assert model.is_fitted
+
+    model1 = model[model.response_vars[0]]
+    assert isinstance(model1, BLR)
+    assert model1.n_iter == 10
+    assert model1.tol == 1e-3
+    assert not model1.ard
+    assert model1.optimizer == "l-bfgs-b"
+    assert model1.l_bfgs_b_l == 0.7
+    assert model1.l_bfgs_b_epsilon == 0.1
+    assert model1.l_bfgs_b_norm == "l2"
+    assert model1.fixed_effect
+    assert not model1.fixed_effect_var
+    model.predict(norm_data_from_arrays)
+    assert hasattr(norm_data_from_arrays, "Z")
+    Z_bak = copy.deepcopy(norm_data_from_arrays["Z"])
+    del norm_data_from_arrays["Z"]
+    assert not hasattr(norm_data_from_arrays, "Z")
+    model.save(save_dir_blr)
+    del model
+    loaded_model = NormativeModel.load(save_dir_blr)
+    model1 = loaded_model[loaded_model.response_vars[0]]
+    loaded_model.predict(norm_data_from_arrays)
+    assert np.allclose(norm_data_from_arrays["Z"], Z_bak)
+
+    assert loaded_model.is_fitted
+    assert isinstance(model1, BLR)
+    assert model1.n_iter == 10
+    assert model1.tol == 1e-3
+    assert not model1.ard
+    assert model1.optimizer == "l-bfgs-b"
+    assert model1.l_bfgs_b_l == 0.7
+    assert model1.l_bfgs_b_epsilon == 0.1
+    assert model1.l_bfgs_b_norm == "l2"
+    assert model1.fixed_effect
+    assert not model1.fixed_effect_var

From 06f21a37fc97f5049193b6b7a3c06b3eacd55bbd Mon Sep 17 00:00:00 2001
From: contsili <konstantinos.tsilimparis@outlook.com>
Date: Mon, 2 Mar 2026 18:39:00 +0100
Subject: [PATCH 03/22] enh - generate data and model to use for the log
 transform test

---
 test/fixtures/test_model_fixtures.py | 88 ++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/test/fixtures/test_model_fixtures.py b/test/fixtures/test_model_fixtures.py
index 4a7f26f3..8e7aa8b1 100644
--- a/test/fixtures/test_model_fixtures.py
+++ b/test/fixtures/test_model_fixtures.py
@@ -1,3 +1,7 @@
+import os
+import shutil
+
+import numpy as np
 import pytest
 
 from pcntoolkit.dataio.norm_data import NormData
@@ -43,3 +47,87 @@ def fitted_norm_test_model(new_norm_test_model: NormativeModel, norm_data_from_a
     os.makedirs(new_norm_test_model.save_dir, exist_ok=True)
     new_norm_test_model.fit(norm_data_from_arrays)
     return new_norm_test_model
+
+@pytest.fixture
+def positive_norm_data(
+    train_arrays: tuple,
+) -> NormData:
+    """Create NormData with strictly positive Y values.
+
+    Takes the standard training arrays and shifts Y so
+    that every element is strictly greater than zero.
+
+    Returns
+    -------
+    NormData
+        Dataset whose Y column values are all > 0.
+    """
+    # Unpack the standard training arrays
+    X, y, batch_effects = train_arrays
+    # Shift Y so every value is strictly positive
+    y_positive = np.abs(y) + 1.0
+    # Build a NormData from the positive arrays
+    return NormData.from_ndarrays(
+        "positive_data", X, y_positive, batch_effects
+    )
+
+
+@pytest.fixture
+def positive_test_norm_data(
+    test_arrays: tuple,
+) -> NormData:
+    """Create test NormData with strictly positive Y values.
+
+    Takes the standard test arrays and shifts Y so that
+    every element is strictly greater than zero.
+
+    Returns
+    -------
+    NormData
+        Dataset whose Y column values are all > 0.
+    """
+    # Unpack the standard test arrays
+    X, y, batch_effects = test_arrays
+    # Shift Y so every value is strictly positive
+    y_positive = np.abs(y) + 1.0
+    # Build a NormData from the positive arrays
+    return NormData.from_ndarrays(
+        "positive_test_data", X, y_positive, batch_effects
+    )
+
+
+@pytest.fixture
+def norm_test_model_with_log_transform(
+    save_dir_test_model: str,
+) -> NormativeModel:
+    """Create a NormativeModel using TestModel with log1p.
+
+    The model has ``y_transform='log1p'`` so that Y is
+    log-transformed before fitting and back-transformed
+    after prediction, ensuring all outputs remain positive.
+
+    Returns
+    -------
+    NormativeModel
+        Un-fitted normative model with log1p transform.
+    """
+    # Build a fresh save directory
+    log_dir = os.path.join(save_dir_test_model, "log1p")
+    if os.path.exists(log_dir):
+        shutil.rmtree(log_dir)
+    os.makedirs(log_dir, exist_ok=True)
+    # Create the underlying test regression model
+    test_model = TestModel("test_model_log1p")
+    # Return a NormativeModel with the log1p transform
+    return NormativeModel(
+        template_regression_model=test_model,
+        savemodel=False,
+        saveresults=False,
+        evaluate_model=False,
+        saveplots=False,
+        save_dir=log_dir,
+        inscaler="standardize",
+        outscaler="standardize",
+        y_transform="log1p",
+        name="test_model_log1p",
+    )

From f79717e8270ddc04c6e6ea14d628f68336da656c Mon Sep 17 00:00:00 2001
From: contsili <konstantinos.tsilimparis@outlook.com>
Date: Mon, 2 Mar 2026 18:40:00 +0100
Subject: [PATCH 04/22] enh - add log transform tests for centiles and
 predictions to ensure non-negativity

---
 test/test_norm/test_normative_model_helper.py | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index 3caf02a6..38a39468 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -295,3 +295,55 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro
     assert model1.l_bfgs_b_norm == "l2"
     assert model1.fixed_effect
     assert not model1.fixed_effect_var
+
+
+def test_001_log_transform_centiles_should_beNonNegative(
+    norm_test_model_with_log_transform: NormativeModel,
+    positive_norm_data: NormData,
+    positive_test_norm_data: NormData,
+) -> None:
+    """Centiles must be >= 0 after log1p round-trip.
+
+    Extreme lower-tail centiles may be clipped to exactly 0
+    by the positivity enforcement, so we check >= 0.
+    """
+    # Arrange – fit the model, then predict on test data
+    norm_test_model_with_log_transform.fit_predict(
+        positive_norm_data, positive_test_norm_data
+    )
+    # Assert – every centile in the back-transformed space >= 0
+    assert bool(
+        np.all(positive_test_norm_data["centiles"].values >= 0)
+    )
+
+
+def test_002_log_transform_yhat_should_bePositive(
+    norm_test_model_with_log_transform: NormativeModel,
+    positive_norm_data: NormData,
+    positive_test_norm_data: NormData,
+) -> None:
+    """Yhat must be > 0 after log1p round-trip."""
+    # Arrange – fit the model, then predict on test data
+    norm_test_model_with_log_transform.fit_predict(
+        positive_norm_data, positive_test_norm_data
+    )
+    # Assert – every Yhat in the back-transformed space > 0
+    assert bool(
+        np.all(positive_test_norm_data["Yhat"].values > 0)
+    )
+
+
+def test_003_log_transform_Y_should_bePositive(
+    norm_test_model_with_log_transform: NormativeModel,
+    positive_norm_data: NormData,
+    positive_test_norm_data: NormData,
+) -> None:
+    """Y must remain > 0 after the log1p round-trip."""
+    # Arrange – fit the model, then predict on test data
+    norm_test_model_with_log_transform.fit_predict(
+        positive_norm_data, positive_test_norm_data
+    )
+    # Assert – every Y value after the round-trip is > 0
+    assert bool(
+        np.all(positive_test_norm_data["Y"].values > 0)
+    )

From 2774ddfa65f06ea3f9e1d48976311968af7d9e7d Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Tue, 3 Mar 2026 17:53:00 +0100
Subject: [PATCH 05/22] enh - refactor log transformed centiles and yhat tests

- Add BLR model fixtures (instead of test model fixtures)
- adjust assertions: centiles and yhat should be > -1
---
 test/test_norm/test_normative_model_helper.py | 65 ++++++++-----------
 1 file changed, 27 insertions(+), 38 deletions(-)

diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index 38a39468..36b221f9 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -10,6 +10,7 @@
 from pcntoolkit.regression_model.blr import *
 from pcntoolkit.regression_model.hbr import *
 from test import test_norm
+from test.fixtures.blr_model_fixtures import *
 from test.fixtures.data_fixtures import *
 from test.fixtures.norm_data_fixtures import *
 from test.fixtures.path_fixtures import *
@@ -296,54 +297,42 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro
     assert model1.fixed_effect
     assert not model1.fixed_effect_var
 
-
-def test_001_log_transform_centiles_should_beNonNegative(
-    norm_test_model_with_log_transform: NormativeModel,
-    positive_norm_data: NormData,
-    positive_test_norm_data: NormData,
+def test_log_transformed_centiles(
+    norm_blr_model_with_log_transform: NormativeModel,
+    norm_data_from_arrays: NormData,
+    test_norm_data_from_arrays: NormData,
 ) -> None:
-    """Centiles must be >= 0 after log1p round-trip.
-
-    Extreme lower-tail centiles may be clipped to exactly 0
-    by the positivity enforcement, so we check >= 0.
-    """
-    # Arrange – fit the model, then predict on test data
-    norm_test_model_with_log_transform.fit_predict(
-        positive_norm_data, positive_test_norm_data
+    norm_blr_model_with_log_transform.fit_predict(
+        norm_data_from_arrays, test_norm_data_from_arrays
     )
-    # Assert – every centile in the back-transformed space >= 0
+    
+    # Check that Y are non-negative in the original data
     assert bool(
-        np.all(positive_test_norm_data["centiles"].values >= 0)
+        np.all(test_norm_data_from_arrays["Y"].values >= 0)
     )
-
-
-def test_002_log_transform_yhat_should_bePositive(
-    norm_test_model_with_log_transform: NormativeModel,
-    positive_norm_data: NormData,
-    positive_test_norm_data: NormData,
-) -> None:
-    """Yhat must be > 0 after log1p round-trip."""
-    # Arrange – fit the model, then predict on test data
-    norm_test_model_with_log_transform.fit_predict(
-        positive_norm_data, positive_test_norm_data
+    # Both training and test centiles should be bigger or equal to -1
+    assert bool(
+        np.all(norm_data_from_arrays["centiles"].values > -1)
     )
-    # Assert – every Yhat in the back-transformed space > 0
     assert bool(
-        np.all(positive_test_norm_data["Yhat"].values > 0)
+        np.all(test_norm_data_from_arrays["centiles"].values > -1)
     )
 
 
-def test_003_log_transform_Y_should_bePositive(
-    norm_test_model_with_log_transform: NormativeModel,
-    positive_norm_data: NormData,
-    positive_test_norm_data: NormData,
+
+def test_log_transformed_yhat(
+    norm_blr_model_with_log_transform: NormativeModel,
+    norm_data_from_arrays: NormData,
+    test_norm_data_from_arrays: NormData,
 ) -> None:
-    """Y must remain > 0 after the log1p round-trip."""
-    # Arrange – fit the model, then predict on test data
-    norm_test_model_with_log_transform.fit_predict(
-        positive_norm_data, positive_test_norm_data
+    norm_blr_model_with_log_transform.fit_predict(
+        norm_data_from_arrays, test_norm_data_from_arrays
+    )
+    # We dont expect any negative yhat values in the train dataset
+    assert bool(
+        np.all(norm_data_from_arrays["Yhat"].values >= 0)
     )
-    # Assert – every Y value after the round-trip is > 0
+    # test dataset: -1 possibility due to extrapolation
     assert bool(
-        np.all(positive_test_norm_data["Y"].values > 0)
+        np.all(test_norm_data_from_arrays["Yhat"].values > -1)
     )

From c791079b3bfe1e0582290828f4dd0dc8a543ab77 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Tue, 3 Mar 2026 17:53:58 +0100
Subject: [PATCH 06/22] fix - discard test model fixtures

---
 test/fixtures/test_model_fixtures.py | 84 ----------------------------
 1 file changed, 84 deletions(-)

diff --git a/test/fixtures/test_model_fixtures.py b/test/fixtures/test_model_fixtures.py
index 8e7aa8b1..0fa761db 100644
--- a/test/fixtures/test_model_fixtures.py
+++ b/test/fixtures/test_model_fixtures.py
@@ -47,87 +47,3 @@ def fitted_norm_test_model(new_norm_test_model: NormativeModel, norm_data_from_a
     os.makedirs(new_norm_test_model.save_dir, exist_ok=True)
     new_norm_test_model.fit(norm_data_from_arrays)
     return new_norm_test_model
-
-@pytest.fixture
-def positive_norm_data(
-    train_arrays: tuple,
-) -> NormData:
-    """Create NormData with strictly positive Y values.
-
-    Takes the standard training arrays and shifts Y so
-    that every element is strictly greater than zero.
-
-    Returns
-    -------
-    NormData
-        Dataset whose Y column values are all > 0.
-    """
-    # Unpack the standard training arrays
-    X, y, batch_effects = train_arrays
-    # Shift Y so every value is strictly positive
-    y_positive = np.abs(y) + 1.0
-    # Build a NormData from the positive arrays
-    return NormData.from_ndarrays(
-        "positive_data", X, y_positive, batch_effects
-    )
-
-
-@pytest.fixture
-def positive_test_norm_data(
-    test_arrays: tuple,
-) -> NormData:
-    """Create test NormData with strictly positive Y values.
-
-    Takes the standard test arrays and shifts Y so that
-    every element is strictly greater than zero.
-
-    Returns
-    -------
-    NormData
-        Dataset whose Y column values are all > 0.
-    """
-    # Unpack the standard test arrays
-    X, y, batch_effects = test_arrays
-    # Shift Y so every value is strictly positive
-    y_positive = np.abs(y) + 1.0
-    # Build a NormData from the positive arrays
-    return NormData.from_ndarrays(
-        "positive_test_data", X, y_positive, batch_effects
-    )
-
-
-@pytest.fixture
-def norm_test_model_with_log_transform(
-    save_dir_test_model: str,
-) -> NormativeModel:
-    """Create a NormativeModel using TestModel with log1p.
-
-    The model has ``y_transform='log1p'`` so that Y is
-    log-transformed before fitting and back-transformed
-    after prediction, ensuring all outputs remain positive.
-
-    Returns
-    -------
-    NormativeModel
-        Un-fitted normative model with log1p transform.
-    """
-    # Build a fresh save directory
-    log_dir = os.path.join(save_dir_test_model, "log1p")
-    if os.path.exists(log_dir):
-        shutil.rmtree(log_dir)
-    os.makedirs(log_dir, exist_ok=True)
-    # Create the underlying test regression model
-    test_model = TestModel("test_model_log1p")
-    # Return a NormativeModel with the log1p transform
-    return NormativeModel(
-        template_regression_model=test_model,
-        savemodel=False,
-        saveresults=False,
-        evaluate_model=False,
-        saveplots=False,
-        save_dir=log_dir,
-        inscaler="standardize",
-        outscaler="standardize",
-        y_transform="log1p",
-        name="test_model_log1p",
-    )

From fdc7371d56ddf15683f4f84a6fb0a13f963460fb Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Tue, 3 Mar 2026 17:55:19 +0100
Subject: [PATCH 07/22] enh -  add blr model fixtures

---
 test/fixtures/blr_model_fixtures.py | 37 +++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py
index 87129235..88958249 100644
--- a/test/fixtures/blr_model_fixtures.py
+++ b/test/fixtures/blr_model_fixtures.py
@@ -1,3 +1,5 @@
+from math import log
+
 import pytest
 
 from pcntoolkit.dataio.norm_data import NormData
@@ -6,6 +8,7 @@
 from pcntoolkit.regression_model.blr import BLR
 from test.fixtures.norm_data_fixtures import *
 from test.fixtures.path_fixtures import *
+import os
 
 
 @pytest.fixture
@@ -69,3 +72,37 @@ def fitted_norm_blr_model(new_norm_blr_model: NormativeModel, norm_data_from_arr
     os.makedirs(new_norm_blr_model.save_dir, exist_ok=True)
     new_norm_blr_model.fit(norm_data_from_arrays)
     return new_norm_blr_model
+
+
+@pytest.fixture
+def norm_blr_model_with_log_transform(
+    save_dir_test_model: str
+) -> NormativeModel:
+    """Create a NormativeModel using BLR with log1p.
+
+    Returns
+    -------
+    NormativeModel
+        Un-fitted normative model with log1p transform.
+    """
+    # Build a fresh save directory
+    log_dir = os.path.join(save_dir_test_model, "log1p")
+    if os.path.exists(log_dir):
+        shutil.rmtree(log_dir)
+    os.makedirs(log_dir, exist_ok=True)
+    # Create  test regression model
+    test_model = BLR("name=test_model_log1p")
+    # Return a NormativeModel with the log1p transform
+    return NormativeModel(
+        template_regression_model=test_model,
+        savemodel=False,
+        saveresults=False,
+        evaluate_model=False,
+        saveplots=True,
+        save_dir=log_dir,
+        inscaler="standardize",
+        outscaler="standardize",
+        name="test_model_log1p",
+        y_transform=None,
+    )
+

From fce4ee36b08c22ea775053d7acd294ef965cdfa8 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Tue, 3 Mar 2026 18:00:26 +0100
Subject: [PATCH 08/22] fix - remove redundancies

test_normative_model is the same as test_normative_model_helper
---
 test/test_norm/test_normative_model.py        | 296 ------------------
 test/test_norm/test_normative_model_helper.py |   3 +-
 2 files changed, 1 insertion(+), 298 deletions(-)
 delete mode 100644 test/test_norm/test_normative_model.py

diff --git a/test/test_norm/test_normative_model.py b/test/test_norm/test_normative_model.py
deleted file mode 100644
index 2e50d191..00000000
--- a/test/test_norm/test_normative_model.py
+++ /dev/null
@@ -1,296 +0,0 @@
-import copy
-
-import numpy as np
-import pytest
-
-from pcntoolkit.dataio.norm_data import NormData
-from pcntoolkit.math_functions.likelihood import NormalLikelihood
-from pcntoolkit.math_functions.prior import *
-from pcntoolkit.normative_model import NormativeModel
-from pcntoolkit.regression_model.blr import *
-from pcntoolkit.regression_model.hbr import *
-from test.fixtures.data_fixtures import *
-from test.fixtures.norm_data_fixtures import *
-from test.fixtures.path_fixtures import *
-from test.fixtures.test_model_fixtures import *
-
-"""
-This file contains tests for the NormBase class in the PCNtoolkit.
-
-The tests cover the following aspects:
-1. Fitting the model with valid data
-2. Handling invalid data for fitting
-3. Scaling methods for data
-4. Polynomial basis expansion
-5. B-spline basis expansion
-"""
-
-
-def test_fit(new_norm_test_model: NormativeModel, norm_data_from_arrays: NormData):
-    if os.path.exists(new_norm_test_model.save_dir):
-        shutil.rmtree(new_norm_test_model.save_dir)
-    os.makedirs(new_norm_test_model.save_dir, exist_ok=True)
-    new_norm_test_model.fit(norm_data_from_arrays)
-    assert new_norm_test_model.is_fitted
-
-
-def test_fit_predict(new_norm_test_model: NormativeModel, norm_data_from_arrays: NormData, test_norm_data_from_arrays: NormData):
-    if os.path.exists(new_norm_test_model.save_dir):
-        shutil.rmtree(new_norm_test_model.save_dir)
-    os.makedirs(new_norm_test_model.save_dir, exist_ok=True)
-    new_norm_test_model.fit_predict(norm_data_from_arrays, test_norm_data_from_arrays)
-    assert new_norm_test_model.is_fitted
-
-
-def test_predict(fitted_norm_test_model: NormativeModel, test_norm_data_from_arrays: NormData):
-    os.makedirs(fitted_norm_test_model.save_dir, exist_ok=True)
-    fitted_norm_test_model.predict(test_norm_data_from_arrays)
-    assert fitted_norm_test_model.is_fitted
-
-
-def test_harmonize(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData):
-    fitted_norm_test_model.harmonize(norm_data_from_arrays)
-    assert fitted_norm_test_model.is_fitted
-
-
-def test_compute_zscores(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData):
-    fitted_norm_test_model.compute_zscores(norm_data_from_arrays)
-    assert fitted_norm_test_model.is_fitted
-
-
-def test_compute_centiles(fitted_norm_test_model: NormativeModel, norm_data_from_arrays: NormData):
-    fitted_norm_test_model.compute_centiles(norm_data_from_arrays)
-    assert fitted_norm_test_model.is_fitted
-
-
-# Test the fit method of the NormBase class with invalid data
-def test_norm_base_fit_invalid_data(new_norm_test_model: NormativeModel):
-    with pytest.raises(AttributeError):
-        new_norm_test_model.fit(None)
-
-    with pytest.raises(AttributeError):
-        new_norm_test_model.fit("invalid_data")
-
-
-# Scaling tests
-@pytest.mark.parametrize("scaler", ["standardize", "minmax"])
-def test_scaling(
-    norm_data_from_arrays,
-    new_norm_test_model: NormativeModel,
-    n_covariates,
-    n_response_vars,
-    scaler,
-):
-    object.__setattr__(new_norm_test_model, "inscaler", scaler)
-    object.__setattr__(new_norm_test_model, "outscaler", scaler)
-    copydata = copy.deepcopy(norm_data_from_arrays)
-    copydata.attrs["is_scaled"] = False
-    for s in new_norm_test_model.inscalers:
-        s.is_fitted = False
-    for s in new_norm_test_model.outscalers:
-        s.is_fitted = False
-    X_bak = norm_data_from_arrays.X.data.copy()
-    y_bak = norm_data_from_arrays.Y.data.copy()
-    new_norm_test_model.scale_forward(copydata)
-
-    if scaler == "standardize":
-        assert_standardized(copydata, n_covariates, n_response_vars)
-    elif scaler == "minmax":
-        assert_minmax_scaled(copydata)
-
-    new_norm_test_model.scale_backward(copydata)
-    assert np.allclose(copydata.X.data, X_bak)
-    assert np.allclose(copydata.Y.data, y_bak)
-
-
-def assert_standardized(data, n_covariates, n_response_vars):
-    assert data.X.data.mean(axis=0) == pytest.approx(n_covariates * [0])
-    assert data.X.data.std(axis=0) == pytest.approx(n_covariates * [1])
-    assert data.Y.data.mean(axis=0) == pytest.approx(n_response_vars * [0])
-    assert data.Y.data.std(axis=0) == pytest.approx(n_response_vars * [1])
-
-
-def assert_minmax_scaled(data):
-    assert np.allclose(data.X.data.min(axis=0), 0)
-    assert np.allclose(data.X.data.max(axis=0), 1)
-    assert np.allclose(data.Y.data.min(axis=0), 0)
-    assert np.allclose(data.Y.data.max(axis=0), 1)
-
-
-def test_test_model_to_and_from_dict_and_args(test_model_args: dict, norm_data_from_arrays: NormData, save_dir_test_model):
-    model = NormativeModel.from_args(**test_model_args)
-    model_dict = model.to_dict()
-    assert model_dict["template_regression_model"]["success_ratio"] == test_model_args["success_ratio"]
-    model.fit(norm_data_from_arrays)
-    assert model.is_fitted
-    model_dict = model.to_dict()
-    assert model_dict["template_regression_model"]["success_ratio"] == test_model_args["success_ratio"]
-    model.predict(norm_data_from_arrays)
-    assert hasattr(norm_data_from_arrays, "Z")
-    Z_bak = copy.deepcopy(norm_data_from_arrays["Z"])
-    del norm_data_from_arrays["Z"]
-    assert not hasattr(norm_data_from_arrays, "Z")
-    model.save(save_dir_test_model)
-    loaded_model = NormativeModel.load(save_dir_test_model)
-    assert loaded_model.is_fitted
-    loaded_model.predict(norm_data_from_arrays)
-    assert np.allclose(norm_data_from_arrays["Z"], Z_bak)
-
-
-@pytest.fixture
-def hbr_model_args(save_dir_hbr):
-    return {
-        "savemodel": False,
-        "saveresults": False,
-        "evaluate_model": False,
-        "saveplots": False,
-        "save_dir": save_dir_hbr,
-        "inscaler": "standardize",
-        "outscaler": "standardize",
-        "name": "hbr_test_model",
-        "alg": "hbr",
-        "likelihood": "Normal",
-        "linear_mu": True,
-        "random_mu": False,
-        "random_slope_mu": False,
-        "random_intercept_mu": True,
-        "linear_sigma": False,
-        "random_sigma": True,
-        "dist_name_sigma_sigma": "LogNormal",
-        "dist_params_sigma_sigma": (2.5, 1.3),
-        "draws": 10,
-        "tune": 10,
-        "cores": 1,
-    }
-
-
-def test_hbr_model_to_and_from_dict_and_args(hbr_model_args: dict, norm_data_from_arrays: NormData, save_dir_hbr):
-    model = NormativeModel.from_args(**hbr_model_args)
-    model_dict = model.to_dict()
-    for k in ["savemodel", "saveresults", "evaluate_model", "saveplots", "inscaler", "outscaler", "name"]:
-        assert model_dict[k] == hbr_model_args[k]
-    tmplt = model.template_regression_model
-    assert isinstance(tmplt, HBR)
-    assert isinstance(tmplt.likelihood, NormalLikelihood)
-    assert isinstance(tmplt.likelihood.mu, LinearPrior)
-    assert isinstance(tmplt.likelihood.mu.intercept, RandomPrior)
-    assert isinstance(tmplt.likelihood.sigma, RandomPrior)
-    assert isinstance(tmplt.likelihood.sigma.sigma, Prior)
-    assert tmplt.likelihood.sigma.sigma.dist_name == "LogNormal"
-    assert tmplt.likelihood.sigma.sigma.dist_params == (2.5, 1.3)
-    model.fit(norm_data_from_arrays)
-    assert model.is_fitted
-
-    model1 = model[model.response_vars[0]]
-    assert isinstance(model1, HBR)
-    assert isinstance(model1.likelihood, NormalLikelihood)
-    assert isinstance(model1.likelihood.mu, LinearPrior)
-    assert isinstance(model1.likelihood.mu.intercept, RandomPrior)
-    assert isinstance(model1.likelihood.sigma, RandomPrior)
-    assert isinstance(model1.likelihood.sigma.sigma, Prior)
-    assert model1.likelihood.sigma.sigma.dist_name == "LogNormal"
-    assert model1.likelihood.sigma.sigma.dist_params == (2.5, 1.3)
-
-    model.predict(norm_data_from_arrays)
-    assert hasattr(norm_data_from_arrays, "Z")
-    Z_bak = copy.deepcopy(norm_data_from_arrays["Z"])
-    del norm_data_from_arrays["Z"]
-    assert not hasattr(norm_data_from_arrays, "Z")
-    model.save(save_dir_hbr)
-    del model
-    loaded_model = NormativeModel.load(save_dir_hbr)
-    loaded_model.predict(norm_data_from_arrays)
-    assert np.allclose(norm_data_from_arrays["Z"], Z_bak)
-    assert loaded_model.is_fitted
-    model1 = loaded_model[loaded_model.response_vars[0]]
-    assert isinstance(model1, HBR)
-    assert isinstance(model1.likelihood, NormalLikelihood)
-    assert isinstance(model1.likelihood.mu, LinearPrior)
-    assert isinstance(model1.likelihood.mu.intercept, RandomPrior)
-    assert isinstance(model1.likelihood.sigma, RandomPrior)
-    assert isinstance(model1.likelihood.sigma.sigma, Prior)
-    assert model1.likelihood.sigma.sigma.dist_name == "LogNormal"
-    assert model1.likelihood.sigma.sigma.dist_params == [2.5, 1.3]
-
-
-@pytest.fixture
-def blr_model_args(save_dir_blr):
-    return {
-        "savemodel": False,
-        "saveresults": False,
-        "evaluate_model": False,
-        "saveplots": False,
-        "save_dir": save_dir_blr,
-        "inscaler": "standardize",
-        "outscaler": "standardize",
-        "name": "blr_test_model",
-        "alg": "blr",
-        "n_iter": 10,
-        "tol": 1e-3,
-        "ard": False,
-        "optimizer": "l-bfgs-b",
-        "l_bfgs_b_l": 0.7,
-        "l_bfgs_b_epsilon": 0.1,
-        "l_bfgs_b_norm": "l2",
-        "fixed_effect": True,
-        "heteroskedastic": False,
-        "fixed_effect_var": False,
-    }
-
-
-def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_from_arrays: NormData, save_dir_blr):
-    model = NormativeModel.from_args(**blr_model_args)
-    model_dict = model.to_dict()
-    for k in ["savemodel", "saveresults", "evaluate_model", "saveplots", "inscaler", "outscaler", "name"]:
-        assert model_dict[k] == blr_model_args[k]
-    tmplt = model.template_regression_model
-    assert isinstance(tmplt, BLR)
-    assert tmplt.n_iter == 10
-    assert tmplt.tol == 1e-3
-    assert not tmplt.ard
-    assert tmplt.optimizer == "l-bfgs-b"
-    assert tmplt.l_bfgs_b_l == 0.7
-    assert tmplt.l_bfgs_b_epsilon == 0.1
-    assert tmplt.l_bfgs_b_norm == "l2"
-    assert tmplt.fixed_effect
-    assert not tmplt.heteroskedastic
-    assert not tmplt.fixed_effect_var
-    print("Fitting")
-    model.fit(norm_data_from_arrays)
-    print("fitted")
-    assert model.is_fitted
-
-    model1 = model[model.response_vars[0]]
-    assert isinstance(model1, BLR)
-    assert model1.n_iter == 10
-    assert model1.tol == 1e-3
-    assert not model1.ard
-    assert model1.optimizer == "l-bfgs-b"
-    assert model1.l_bfgs_b_l == 0.7
-    assert model1.l_bfgs_b_epsilon == 0.1
-    assert model1.l_bfgs_b_norm == "l2"
-    assert model1.fixed_effect
-    assert not model1.fixed_effect_var
-    model.predict(norm_data_from_arrays)
-    assert hasattr(norm_data_from_arrays, "Z")
-    Z_bak = copy.deepcopy(norm_data_from_arrays["Z"])
-    del norm_data_from_arrays["Z"]
-    assert not hasattr(norm_data_from_arrays, "Z")
-    model.save(save_dir_blr)
-    del model
-    loaded_model = NormativeModel.load(save_dir_blr)
-    model1 = loaded_model[loaded_model.response_vars[0]]
-    loaded_model.predict(norm_data_from_arrays)
-    assert np.allclose(norm_data_from_arrays["Z"], Z_bak)
-
-    assert loaded_model.is_fitted
-    assert isinstance(model1, BLR)
-    assert model1.n_iter == 10
-    assert model1.tol == 1e-3
-    assert not model1.ard
-    assert model1.optimizer == "l-bfgs-b"
-    assert model1.l_bfgs_b_l == 0.7
-    assert model1.l_bfgs_b_epsilon == 0.1
-    assert model1.l_bfgs_b_norm == "l2"
-    assert model1.fixed_effect
-    assert not model1.fixed_effect_var
diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index 36b221f9..91b7c4c2 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -9,7 +9,6 @@
 from pcntoolkit.normative_model import NormativeModel
 from pcntoolkit.regression_model.blr import *
 from pcntoolkit.regression_model.hbr import *
-from test import test_norm
 from test.fixtures.blr_model_fixtures import *
 from test.fixtures.data_fixtures import *
 from test.fixtures.norm_data_fixtures import *
@@ -297,6 +296,7 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro
     assert model1.fixed_effect
     assert not model1.fixed_effect_var
 
+
 def test_log_transformed_centiles(
     norm_blr_model_with_log_transform: NormativeModel,
     norm_data_from_arrays: NormData,
@@ -319,7 +319,6 @@ def test_log_transformed_centiles(
     )
 
 
-
 def test_log_transformed_yhat(
     norm_blr_model_with_log_transform: NormativeModel,
     norm_data_from_arrays: NormData,

From 2f42207b58e579a348d286a1e4cb687b560258e9 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 14:35:33 +0100
Subject: [PATCH 09/22] fix - remove redundant import

---
 test/fixtures/test_model_fixtures.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/fixtures/test_model_fixtures.py b/test/fixtures/test_model_fixtures.py
index 0fa761db..63a6bd9b 100644
--- a/test/fixtures/test_model_fixtures.py
+++ b/test/fixtures/test_model_fixtures.py
@@ -1,7 +1,6 @@
 import os
 import shutil
 
-import numpy as np
 import pytest
 
 from pcntoolkit.dataio.norm_data import NormData

From 27201ab770263f649b6ca4908a9428af1f2e1cdb Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 14:35:54 +0100
Subject: [PATCH 10/22] enh - add log transform functionality

---
 pcntoolkit/normative_model.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pcntoolkit/normative_model.py b/pcntoolkit/normative_model.py
index 89942170..facc2f5e 100644
--- a/pcntoolkit/normative_model.py
+++ b/pcntoolkit/normative_model.py
@@ -655,6 +655,10 @@ def _apply_y_transform(self, data: NormData) -> None:
             for var in "Y":
                 if var in data.data_vars:
                     data[var] = np.log1p(data[var])
+        elif self.y_transform == "log":
+            for var in "Y":
+                if var in data.data_vars:
+                    data[var] = np.log(data[var])
 
     def _invert_y_transform(self, data: NormData) -> None:
         """
@@ -680,6 +684,10 @@ def _invert_y_transform(self, data: NormData) -> None:
             for var in ("Y", "centiles", "Yhat", "Y_harmonized", "thrive_Y"):
                 if var in data.data_vars:
                     data[var] = np.expm1(data[var])
+        elif self.y_transform == "log":
+            for var in ("Y", "centiles", "Yhat", "Y_harmonized", "thrive_Y"):
+                if var in data.data_vars:
+                    data[var] = np.exp(data[var])
             
     def evaluate(self, data: NormData) -> None:
         """

From 87e41153020034765be7102b3666da4a0cda4c76 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 14:40:14 +0100
Subject: [PATCH 11/22] enh - rename

---
 test/fixtures/blr_model_fixtures.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py
index 8316cc33..4ad6e424 100644
--- a/test/fixtures/blr_model_fixtures.py
+++ b/test/fixtures/blr_model_fixtures.py
@@ -87,7 +87,7 @@ def fitted_blr_model(
 
 
 @pytest.fixture
-def new_norm_blr_model(
+def norm_blr_model(
     blr_model_factory: Callable,
     save_dir_blr
 ) -> NormativeModel:
@@ -105,15 +105,15 @@ def new_norm_blr_model(
 
 
 @pytest.fixture
-def fitted_norm_blr_model(new_norm_blr_model: NormativeModel,
+def fitted_norm_blr_model(norm_blr_model: NormativeModel,
                           norm_data_from_arrays: NormData
                           ) -> NormativeModel:
     print("removing items")
-    if os.path.exists(new_norm_blr_model.save_dir):
-        shutil.rmtree(new_norm_blr_model.save_dir)
-    os.makedirs(new_norm_blr_model.save_dir, exist_ok=True)
-    new_norm_blr_model.fit(norm_data_from_arrays)
-    return new_norm_blr_model
+    if os.path.exists(norm_blr_model.save_dir):
+        shutil.rmtree(norm_blr_model.save_dir)
+    os.makedirs(norm_blr_model.save_dir, exist_ok=True)
+    norm_blr_model.fit(norm_data_from_arrays)
+    return norm_blr_model
 
 
 @pytest.fixture

From 80a442fe8d08f07b8a685fc85510a0401078a7ef Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 14:43:55 +0100
Subject: [PATCH 12/22] enh - rename

---
 test/fixtures/blr_model_fixtures.py           | 2 +-
 test/test_norm/test_normative_model_helper.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py
index 4ad6e424..b9e93aa2 100644
--- a/test/fixtures/blr_model_fixtures.py
+++ b/test/fixtures/blr_model_fixtures.py
@@ -117,7 +117,7 @@ def fitted_norm_blr_model(norm_blr_model: NormativeModel,
 
 
 @pytest.fixture
-def norm_blr_model_with_log_transform(
+def log_transform_norm_blr_model(
     save_dir_test_model: str
 ) -> NormativeModel:
     """Create a NormativeModel using BLR with log1p.
diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index 91b7c4c2..423acbda 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -298,11 +298,11 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro
 
 
 def test_log_transformed_centiles(
-    norm_blr_model_with_log_transform: NormativeModel,
+    log_transform_norm_blr_model: NormativeModel,
     norm_data_from_arrays: NormData,
     test_norm_data_from_arrays: NormData,
 ) -> None:
-    norm_blr_model_with_log_transform.fit_predict(
+    log_transform_norm_blr_model.fit_predict(
         norm_data_from_arrays, test_norm_data_from_arrays
     )
     
@@ -320,11 +320,11 @@ def test_log_transformed_centiles(
 
 
 def test_log_transformed_yhat(
-    norm_blr_model_with_log_transform: NormativeModel,
+    log_transform_norm_blr_model: NormativeModel,
     norm_data_from_arrays: NormData,
     test_norm_data_from_arrays: NormData,
 ) -> None:
-    norm_blr_model_with_log_transform.fit_predict(
+    log_transform_norm_blr_model.fit_predict(
         norm_data_from_arrays, test_norm_data_from_arrays
     )
     # We dont expect any negative yhat values in the train dataset

From 4d31037c3c7431bf5dcfea6e72f41037f2c38b28 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 14:53:09 +0100
Subject: [PATCH 13/22] enh - refactor comments

---
 pcntoolkit/normative_model.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pcntoolkit/normative_model.py b/pcntoolkit/normative_model.py
index facc2f5e..84204309 100644
--- a/pcntoolkit/normative_model.py
+++ b/pcntoolkit/normative_model.py
@@ -651,12 +651,13 @@ def _apply_y_transform(self, data: NormData) -> None:
         # compute_thrivelines() that has a preprocess() call without a postprocess().
             
         if self.y_transform == "log1p":
-            # Apply transform to all Y-like input variables
-            for var in "Y":
+            # Apply log1p transform to the response variable Y
+            for var in ["Y"]:
                 if var in data.data_vars:
                     data[var] = np.log1p(data[var])
         elif self.y_transform == "log":
-            for var in "Y":
+            # Apply natural log transform to the response variable Y
+            for var in ["Y"]:
                 if var in data.data_vars:
                     data[var] = np.log(data[var])
 

From 71d5f704fb374a3c9d6a30fd84f3fbfe8b29ba68 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 14:55:27 +0100
Subject: [PATCH 14/22] enh - add test fixtures for log transform

---
 test/fixtures/blr_model_fixtures.py | 40 ++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py
index b9e93aa2..6132ed6d 100644
--- a/test/fixtures/blr_model_fixtures.py
+++ b/test/fixtures/blr_model_fixtures.py
@@ -117,7 +117,7 @@ def fitted_norm_blr_model(norm_blr_model: NormativeModel,
 
 
 @pytest.fixture
-def log_transform_norm_blr_model(
+def log1p_transform_norm_blr_model(
     save_dir_test_model: str
 ) -> NormativeModel:
     """Create a NormativeModel using BLR with log1p.
@@ -133,7 +133,7 @@ def log_transform_norm_blr_model(
         shutil.rmtree(log_dir)
     os.makedirs(log_dir, exist_ok=True)
     # Create  test regression model
-    test_model = BLR("name=test_model_log1p")
+    test_model = BLR("test_model_log1p")
     # Return a NormativeModel with the log1p transform
     return NormativeModel(
         template_regression_model=test_model,
@@ -145,6 +145,40 @@ def log_transform_norm_blr_model(
         inscaler="standardize",
         outscaler="standardize",
         name="test_model_log1p",
-        y_transform=None,
+        y_transform="log1p",
+    )
+
+
+@pytest.fixture
+def log_transform_norm_blr_model(
+    save_dir_blr: str,
+) -> NormativeModel:
+    """Create a NormativeModel using BLR with natural log y_transform.
+
+    Returns
+    -------
+    NormativeModel
+        Un-fitted normative model with natural-log transform.
+    """
+    # Build a fresh save directory for this fixture
+    log_dir = os.path.join(save_dir_blr, "log")
+    if os.path.exists(log_dir):
+        # Remove stale directory from previous test runs
+        shutil.rmtree(log_dir)
+    os.makedirs(log_dir, exist_ok=True)
+    # Create a BLR regression model for the natural-log transform test
+    test_model = BLR("test_model_log")
+    # Return NormativeModel with natural-log transform enabled
+    return NormativeModel(
+        template_regression_model=test_model,
+        savemodel=False,
+        saveresults=False,
+        evaluate_model=False,
+        saveplots=True,
+        save_dir=log_dir,
+        inscaler="standardize",
+        outscaler="standardize",
+        name="test_model_log",
+        y_transform="log",
     )
 

From dd702bdc983ab463d7f1b796b2ba991f3d4d0b5a Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 15:01:10 +0100
Subject: [PATCH 15/22] enh - refactor existing log1p tests

---
 test/test_norm/test_normative_model_helper.py | 28 +++++++++++++++----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index 423acbda..35cf1fbf 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -297,12 +297,12 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro
     assert not model1.fixed_effect_var
 
 
-def test_log_transformed_centiles(
-    log_transform_norm_blr_model: NormativeModel,
+def test_log1p_transformed_centiles(
+    log1p_transform_norm_blr_model: NormativeModel,
     norm_data_from_arrays: NormData,
     test_norm_data_from_arrays: NormData,
 ) -> None:
-    log_transform_norm_blr_model.fit_predict(
+    log1p_transform_norm_blr_model.fit_predict(
         norm_data_from_arrays, test_norm_data_from_arrays
     )
     
@@ -310,7 +310,8 @@ def test_log_transformed_centiles(
     assert bool(
         np.all(test_norm_data_from_arrays["Y"].values >= 0)
     )
-    # Both training and test centiles should be bigger or equal to -1
+    # Both training and test centiles should be bigger than 0 due to the 
+    # exp(Y) - 1 transform
     assert bool(
         np.all(norm_data_from_arrays["centiles"].values > -1)
     )
@@ -319,7 +320,24 @@ def test_log_transformed_centiles(
     )
 
 
-def test_log_transformed_yhat(
+def test_log1p_transformed_yhat(
+    log1p_transform_norm_blr_model: NormativeModel,
+    norm_data_from_arrays: NormData,
+    test_norm_data_from_arrays: NormData,
+) -> None:
+    log1p_transform_norm_blr_model.fit_predict(
+        norm_data_from_arrays, test_norm_data_from_arrays
+    )
+    # We dont expect any negative yhat values in the train and test dataset
+    assert bool(
+        np.all(norm_data_from_arrays["Yhat"].values >= 0)
+    )
+    assert bool(
+        np.all(test_norm_data_from_arrays["Yhat"].values >= 0)
+    )
+
+
+def test_log_transformed_centiles(
     log_transform_norm_blr_model: NormativeModel,
     norm_data_from_arrays: NormData,
     test_norm_data_from_arrays: NormData,

From ba5844e117e33575bac4cf006bb43114ac1e3403 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 15:02:18 +0100
Subject: [PATCH 16/22] enh - add log transform tests

---
 test/test_norm/test_normative_model_helper.py | 31 ++++++++++++++++---
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index 35cf1fbf..eb6905b6 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -345,11 +345,34 @@ def test_log_transformed_centiles(
     log_transform_norm_blr_model.fit_predict(
         norm_data_from_arrays, test_norm_data_from_arrays
     )
-    # We dont expect any negative yhat values in the train dataset
+
+    # Check that Y are positive in the original data
     assert bool(
-        np.all(norm_data_from_arrays["Yhat"].values >= 0)
+        np.all(test_norm_data_from_arrays["Y"].values > 0)
+    )
+    # Both training and test centiles should be bigger than 0 due to the exp(Y)
+    # transform
+    assert bool(
+        np.all(norm_data_from_arrays["centiles"].values > 0)
+    )
+    assert bool(
+        np.all(test_norm_data_from_arrays["centiles"].values > 0)
+    )
+
+
+def test_log_transformed_yhat(
+    log_transform_norm_blr_model: NormativeModel,
+    norm_data_from_arrays: NormData,
+    test_norm_data_from_arrays: NormData,
+) -> None:
+    log_transform_norm_blr_model.fit_predict(
+        norm_data_from_arrays, test_norm_data_from_arrays
     )
-    # test dataset: -1 possibility due to extrapolation
+    
+    # We dont expect any negative yhat values in the train and test dataset
     assert bool(
-        np.all(test_norm_data_from_arrays["Yhat"].values > -1)
+        np.all(norm_data_from_arrays["Yhat"].values > 0)
     )
+    assert bool(
+        np.all(test_norm_data_from_arrays["Yhat"].values > 0)
+    )
\ No newline at end of file

From e13a09a1960e40ad7df56f3740e0c14253cd27a6 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 15:04:12 +0100
Subject: [PATCH 17/22] enh - group tests for faster execution

---
 test/test_norm/test_normative_model_helper.py | 31 +++++--------------
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index eb6905b6..69cd67d9 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -297,7 +297,7 @@ def test_blr_model_to_and_from_dict_and_args(blr_model_args: dict, norm_data_fro
     assert not model1.fixed_effect_var
 
 
-def test_log1p_transformed_centiles(
+def test_log1p_transform(
     log1p_transform_norm_blr_model: NormativeModel,
     norm_data_from_arrays: NormData,
     test_norm_data_from_arrays: NormData,
@@ -305,11 +305,12 @@ def test_log1p_transformed_centiles(
     log1p_transform_norm_blr_model.fit_predict(
         norm_data_from_arrays, test_norm_data_from_arrays
     )
-    
+
     # Check that Y are non-negative in the original data
     assert bool(
-        np.all(test_norm_data_from_arrays["Y"].values >= 0)
+        np.all(test_norm_data_from_arrays["Y"].values > 0)
     )
+
     # Both training and test centiles should be bigger than 0 due to the 
     # exp(Y) - 1 transform
     assert bool(
@@ -319,15 +320,6 @@ def test_log1p_transformed_centiles(
         np.all(test_norm_data_from_arrays["centiles"].values > -1)
     )
 
-
-def test_log1p_transformed_yhat(
-    log1p_transform_norm_blr_model: NormativeModel,
-    norm_data_from_arrays: NormData,
-    test_norm_data_from_arrays: NormData,
-) -> None:
-    log1p_transform_norm_blr_model.fit_predict(
-        norm_data_from_arrays, test_norm_data_from_arrays
-    )
     # We dont expect any negative yhat values in the train and test dataset
     assert bool(
         np.all(norm_data_from_arrays["Yhat"].values >= 0)
@@ -337,7 +329,7 @@ def test_log1p_transformed_yhat(
     )
 
 
-def test_log_transformed_centiles(
+def test_log_transformed(
     log_transform_norm_blr_model: NormativeModel,
     norm_data_from_arrays: NormData,
     test_norm_data_from_arrays: NormData,
@@ -350,6 +342,7 @@ def test_log_transformed_centiles(
     assert bool(
         np.all(test_norm_data_from_arrays["Y"].values > 0)
     )
+
     # Both training and test centiles should be bigger than 0 due to the exp(Y)
     # transform
     assert bool(
@@ -359,20 +352,10 @@ def test_log_transformed_centiles(
         np.all(test_norm_data_from_arrays["centiles"].values > 0)
     )
 
-
-def test_log_transformed_yhat(
-    log_transform_norm_blr_model: NormativeModel,
-    norm_data_from_arrays: NormData,
-    test_norm_data_from_arrays: NormData,
-) -> None:
-    log_transform_norm_blr_model.fit_predict(
-        norm_data_from_arrays, test_norm_data_from_arrays
-    )
-    
     # We dont expect any negative yhat values in the train and test dataset
     assert bool(
         np.all(norm_data_from_arrays["Yhat"].values > 0)
     )
     assert bool(
         np.all(test_norm_data_from_arrays["Yhat"].values > 0)
-    )
\ No newline at end of file
+    )

From 9b0ec3b00fc32f7f080f90b8eac0b69d636d4e2d Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 17:04:18 +0100
Subject: [PATCH 18/22] enh - check that the log transform can be performed

---
 pcntoolkit/normative_model.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/pcntoolkit/normative_model.py b/pcntoolkit/normative_model.py
index 84204309..05f16559 100644
--- a/pcntoolkit/normative_model.py
+++ b/pcntoolkit/normative_model.py
@@ -56,9 +56,9 @@ class NormativeModel:
         Output (Y/response_vars) scaler to use.
     y_transform : str or None
         Optional transform applied to Y before fitting and inverted
-        after prediction. Currently supported: ``"log"`` applies
-        log(Y+1) before fitting and then inverts centiles of the resulting 
-        model back in the original space.
+        after prediction. Currently supported:
+        - ``"log1p"`` applies log(Y+1)
+        - ``"log"`` applies natural log(Y)
         This is useful for phenotypes that cannot be negative.
         Default is ``None`` (no transform).
     name: str
@@ -642,8 +642,7 @@ def _apply_y_transform(self, data: NormData) -> None:
         """
         if self.y_transform is None:
             return
-        
-                
+      
         # TODO: Check if we need to track if transform has already been 
         # applied to avoid double-inverting. Normally I dont expect any issues 
         # as every process() is followed by a postprocess(). The only issues can
@@ -653,12 +652,25 @@ def _apply_y_transform(self, data: NormData) -> None:
         if self.y_transform == "log1p":
             # Apply log1p transform to the response variable Y
             for var in ["Y"]:
-                if var in data.data_vars:
+                if (data[var] < -1).any():
+                    raise ValueError("Cannot apply log1p transform to variable "
+                                     f"'{var}' because it contains values less "
+                                     "than -1."
+                                     )
+                else:
                     data[var] = np.log1p(data[var])
+                    
         elif self.y_transform == "log":
             # Apply natural log transform to the response variable Y
             for var in ["Y"]:
-                if var in data.data_vars:
+                if (data[var] <= 0).any():
+                    raise ValueError(
+                        f"Cannot apply log transform to variable '{var}' "
+                        "because it contains non-positive values. "
+                        "Consider using 'log1p' transform or ensuring "
+                        "all values are positive."
+                    )
+                else:
                     data[var] = np.log(data[var])
 
     def _invert_y_transform(self, data: NormData) -> None:

From 235d13e4f73918c3b054d4e66e20c520e24b5d15 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 17:04:44 +0100
Subject: [PATCH 19/22] enh - plotter should assign positive response vars

---
 pcntoolkit/util/plotter.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pcntoolkit/util/plotter.py b/pcntoolkit/util/plotter.py
index 2a675474..d9f2ccc9 100644
--- a/pcntoolkit/util/plotter.py
+++ b/pcntoolkit/util/plotter.py
@@ -101,9 +101,10 @@ def plot_centiles(
     # Batch effects are the first ones in the highlighted batch effects
     for be, v in batch_effects.items():
         centile_df[be] = v
-    # Response vars are all 0, we don't need them
+    # Assign random values for response vars because they are not needed.
+    # They must be > 0 to satisfy later checks that require response_vars > 0.
     for rv in response_vars:
-        centile_df[rv] = 0
+        centile_df[rv] = 1e-6
 
     centile_data = NormData.from_dataframe(
         "centile",
@@ -352,9 +353,10 @@ def plot_centiles_advanced(
     # Batch effects are the first ones in the highlighted batch effects
     for be, v in batch_effects.items():
         centile_df[be] = v[0]
-    # Response vars are all 0, we don't need them
+    # Assign random values for response vars because they are not needed.
+    # They must be > 0 to satisfy later checks that require response_vars > 0.
     for rv in model.response_vars:
-        centile_df[rv] = 0
+        centile_df[rv] = 1e-6
     centile_data = NormData.from_dataframe(
         "centile",
         dataframe=centile_df,

From bc734f065e33b43e6d102042ba1de5160d3ea0d5 Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 17:05:53 +0100
Subject: [PATCH 20/22] enh - last improvements to the tests

---
 test/fixtures/blr_model_fixtures.py           | 16 ++++++++--------
 test/test_norm/test_normative_model_helper.py |  6 ++++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/test/fixtures/blr_model_fixtures.py b/test/fixtures/blr_model_fixtures.py
index 6132ed6d..c6dfebdf 100644
--- a/test/fixtures/blr_model_fixtures.py
+++ b/test/fixtures/blr_model_fixtures.py
@@ -133,14 +133,14 @@ def log1p_transform_norm_blr_model(
         shutil.rmtree(log_dir)
     os.makedirs(log_dir, exist_ok=True)
     # Create  test regression model
-    test_model = BLR("test_model_log1p")
+    blr_model = BLR("test_model_log1p")
     # Return a NormativeModel with the log1p transform
     return NormativeModel(
-        template_regression_model=test_model,
+        template_regression_model=blr_model,
         savemodel=False,
         saveresults=False,
         evaluate_model=False,
-        saveplots=True,
+        saveplots=False,
         save_dir=log_dir,
         inscaler="standardize",
         outscaler="standardize",
@@ -151,7 +151,7 @@ def log1p_transform_norm_blr_model(
 
 @pytest.fixture
 def log_transform_norm_blr_model(
-    save_dir_blr: str,
+    save_dir_test_model: str,
 ) -> NormativeModel:
     """Create a NormativeModel using BLR with natural log y_transform.
 
@@ -161,20 +161,20 @@ def log_transform_norm_blr_model(
         Un-fitted normative model with natural-log transform.
     """
     # Build a fresh save directory for this fixture
-    log_dir = os.path.join(save_dir_blr, "log")
+    log_dir = os.path.join(save_dir_test_model, "log")
     if os.path.exists(log_dir):
         # Remove stale directory from previous test runs
         shutil.rmtree(log_dir)
     os.makedirs(log_dir, exist_ok=True)
     # Create a BLR regression model for the natural-log transform test
-    test_model = BLR("test_model_log")
+    blr_model = BLR("test_model_log")
     # Return NormativeModel with natural-log transform enabled
     return NormativeModel(
-        template_regression_model=test_model,
+        template_regression_model=blr_model,
         savemodel=False,
         saveresults=False,
         evaluate_model=False,
-        saveplots=True,
+        saveplots=False,
         save_dir=log_dir,
         inscaler="standardize",
         outscaler="standardize",
diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index 69cd67d9..8ddc72d2 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -308,7 +308,7 @@ def test_log1p_transform(
 
     # Check that Y are non-negative in the original data
     assert bool(
-        np.all(test_norm_data_from_arrays["Y"].values > 0)
+        np.all(test_norm_data_from_arrays["Y"].values >= 0)
     )
 
     # Both training and test centiles should be bigger than 0 due to the 
@@ -337,7 +337,9 @@ def test_log_transformed(
     log_transform_norm_blr_model.fit_predict(
         norm_data_from_arrays, test_norm_data_from_arrays
     )
-
+    # Force the data to be >= 1e-6
+    test_norm_data_from_arrays["Y"].values.clip(min=1e-6)
+    
     # Check that Y are positive in the original data
     assert bool(
         np.all(test_norm_data_from_arrays["Y"].values > 0)

From 60e5ea6fbe1f682aba898194f5d24095cc0922bf Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 17:09:47 +0100
Subject: [PATCH 21/22] fix - clip before fitting

---
 test/test_norm/test_normative_model_helper.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/test_norm/test_normative_model_helper.py b/test/test_norm/test_normative_model_helper.py
index 8ddc72d2..6e5a5549 100644
--- a/test/test_norm/test_normative_model_helper.py
+++ b/test/test_norm/test_normative_model_helper.py
@@ -334,12 +334,13 @@ def test_log_transformed(
     norm_data_from_arrays: NormData,
     test_norm_data_from_arrays: NormData,
 ) -> None:
+    # Force the data to be >= 1e-6
+    test_norm_data_from_arrays["Y"].values.clip(min=1e-6)
+
     log_transform_norm_blr_model.fit_predict(
         norm_data_from_arrays, test_norm_data_from_arrays
     )
-    # Force the data to be >= 1e-6
-    test_norm_data_from_arrays["Y"].values.clip(min=1e-6)
-    
+
     # Check that Y are positive in the original data
     assert bool(
         np.all(test_norm_data_from_arrays["Y"].values > 0)

From 19301942897649bf0f6eb56021e93188f30b27da Mon Sep 17 00:00:00 2001
From: Konstantinos Tsilimparis <konstantinos.tsilimparis@outlook.com>
Date: Thu, 5 Mar 2026 17:20:35 +0100
Subject: [PATCH 22/22] enh - comment

---
 pcntoolkit/regression_model/blr.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pcntoolkit/regression_model/blr.py b/pcntoolkit/regression_model/blr.py
index 306fb702..d5a9a197 100644
--- a/pcntoolkit/regression_model/blr.py
+++ b/pcntoolkit/regression_model/blr.py
@@ -347,6 +347,7 @@ def backward(self, X: xr.DataArray, be: xr.DataArray, Z: xr.DataArray) -> xr.Dat
                 self.ys[mask] = self.ys[mask] + residual_mean
                 self.s2[mask] = np.square(np.sqrt(self.s2[mask]) * correction_factor)
 
+        # Compute the centiles in the original Y space: centiles = Z * std + mean
         centiles = np_Z * np.sqrt(self.s2) + self.ys
         if self.warp:
             centiles = self.warp.invf(centiles, self.gamma)