From cae59a83de5db33d8fc99d74c1e7a502c6d53bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81ngel=20Sevilla=20Molina?= <angelsevillamol@gmail.com>
Date: Fri, 4 Jul 2025 09:15:05 +0200
Subject: [PATCH 1/4] ENH: Initialize preprocessing module

---
 orca_python/preprocessing/__init__.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 orca_python/preprocessing/__init__.py

diff --git a/orca_python/preprocessing/__init__.py b/orca_python/preprocessing/__init__.py
new file mode 100644
index 0000000..0ba2ad5
--- /dev/null
+++ b/orca_python/preprocessing/__init__.py
@@ -0,0 +1 @@
+"""Preprocessing module."""

From 3bd5ff767c9b24fd906d7dfc4efdece1313d006a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81ngel=20Sevilla=20Molina?= <angelsevillamol@gmail.com>
Date: Fri, 4 Jul 2025 09:22:11 +0200
Subject: [PATCH 2/4] ENH: Add preprocessing module with core functions

---
 orca_python/preprocessing/__init__.py      |  12 +++
 orca_python/preprocessing/preprocessing.py | 107 +++++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100644 orca_python/preprocessing/preprocessing.py

diff --git a/orca_python/preprocessing/__init__.py b/orca_python/preprocessing/__init__.py
index 0ba2ad5..5144fcc 100644
--- a/orca_python/preprocessing/__init__.py
+++ b/orca_python/preprocessing/__init__.py
@@ -1 +1,13 @@
 """Preprocessing module."""
+
+from .preprocessing import (
+    normalize,
+    preprocess_input,
+    standardize,
+)
+
+__all__ = [
+    "normalize",
+    "preprocess_input",
+    "standardize",
+]
diff --git a/orca_python/preprocessing/preprocessing.py b/orca_python/preprocessing/preprocessing.py
new file mode 100644
index 0000000..2265ce2
--- /dev/null
+++ b/orca_python/preprocessing/preprocessing.py
@@ -0,0 +1,107 @@
+"""Preprocessing module."""
+
+from sklearn import preprocessing
+
+
+def preprocess_input(X_train, X_test=None, input_preprocessing=None):
+    """Apply normalization or standardization to the input data.
+
+    The preprocessing is fit on the training data and then applied to both
+    training and test data (if provided).
+
+    Parameters
+    ----------
+    X_train : np.ndarray
+        Feature matrix used specifically for model training.
+
+    X_test : np.ndarray, optional
+        Feature matrix used for model evaluation and prediction.
+
+    input_preprocessing : str, optional
+        Data normalization strategy:
+        - "norm": Linear scaling
+        - "std": Standardization
+        - None: No preprocessing
+
+    Returns
+    -------
+    X_train_scaled : np.ndarray
+        Scaled training data.
+
+    X_test_scaled : np.ndarray, optional
+        Scaled test data.
+
+    Raises
+    ------
+    ValueError
+        If an unknown preprocessing method is specified.
+
+    """
+    if input_preprocessing is None:
+        return X_train, X_test
+
+    input_preprocessing = input_preprocessing.lower()
+    if input_preprocessing == "norm":
+        X_train_scaled, X_test_scaled = normalize(X_train, X_test)
+    elif input_preprocessing == "std":
+        X_train_scaled, X_test_scaled = standardize(X_train, X_test)
+    else:
+        raise ValueError(f"Input preprocessing named '{input_preprocessing}' unknown")
+
+    return X_train_scaled, X_test_scaled
+
+
+def normalize(X_train, X_test=None):
+    """Normalize the data.
+
+    Test data normalization will be based on train data.
+
+    Parameters
+    ----------
+    X_train : np.ndarray
+        Feature matrix used specifically for model training.
+
+    X_test : np.ndarray, optional
+        Feature matrix used for model evaluation and prediction.
+
+    Returns
+    -------
+    X_train_normalized : np.ndarray
+        Normalized training data.
+
+    X_test_normalized : np.ndarray, optional
+        Normalized test data.
+
+    """
+    scaler = preprocessing.MinMaxScaler()
+    X_train_normalized = scaler.fit_transform(X_train)
+    X_test_normalized = scaler.transform(X_test) if X_test is not None else None
+    return X_train_normalized, X_test_normalized
+
+
+def standardize(X_train, X_test=None):
+    """Standardize the data.
+
+    Test data standardization will be based on train data.
+
+    Parameters
+    ----------
+    X_train : np.ndarray
+        Feature matrix used specifically for model training.
+
+    X_test : np.ndarray, optional
+        Feature matrix used for model evaluation and prediction.
+
+    Returns
+    -------
+    X_train_standardized : np.ndarray
+        Standardized training data.
+
+    X_test_standardized : np.ndarray, optional
+        Standardized test data.
+
+    """
+    scaler = preprocessing.StandardScaler()
+    X_train_standardized = scaler.fit_transform(X_train)
+    X_test_standardized = scaler.transform(X_test) if X_test is not None else None
+    return X_train_standardized, X_test_standardized

From 645115525e46f78cd643a8d5643e6d31905ceeb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81ngel=20Sevilla=20Molina?= <angelsevillamol@gmail.com>
Date: Fri, 4 Jul 2025 09:29:51 +0200
Subject: [PATCH 3/4] TST: Add preprocessing unit tests

---
 orca_python/preprocessing/tests/__init__.py   |  3 +
 .../preprocessing/tests/test_preprocessing.py | 71 +++++++++++++++++++
 2 files changed, 74 insertions(+)
 create mode 100644 orca_python/preprocessing/tests/__init__.py
 create mode 100644 orca_python/preprocessing/tests/test_preprocessing.py

diff --git a/orca_python/preprocessing/tests/__init__.py b/orca_python/preprocessing/tests/__init__.py
new file mode 100644
index 0000000..6d8ff4b
--- /dev/null
+++ b/orca_python/preprocessing/tests/__init__.py
@@ -0,0 +1,3 @@
+"""Tests for preprocessing module."""
+
+__all__ = []
diff --git a/orca_python/preprocessing/tests/test_preprocessing.py b/orca_python/preprocessing/tests/test_preprocessing.py
new file mode 100644
index 0000000..43a1727
--- /dev/null
+++ b/orca_python/preprocessing/tests/test_preprocessing.py
@@ -0,0 +1,71 @@
+"""Tests for the preprocessing module."""
+
+import numpy as np
+import numpy.testing as npt
+import pytest
+
+from orca_python.preprocessing import normalize, preprocess_input, standardize
+
+
+@pytest.fixture
+def dataset():
+    """Create synthetic dataset for testing preprocessing functions."""
+    X_train = np.random.randn(100, 5)
+    X_test = np.random.randn(50, 5)
+    return X_train, X_test
+
+
+def test_normalize_data(dataset):
+    """Test that normalize function correctly scales input data to [0,1] range."""
+    X_train, X_test = dataset
+    norm_X_train, _ = normalize(X_train, X_test)
+    assert np.all(norm_X_train >= 0) and np.all(norm_X_train <= 1)
+
+
+def test_standardize_data(dataset):
+    """Test that standardize function correctly produces output with zero mean and unit variance."""
+    X_train, X_test = dataset
+    std_X_train, _ = standardize(X_train, X_test)
+    npt.assert_almost_equal(np.mean(std_X_train), 0, decimal=6)
+    npt.assert_almost_equal(np.std(std_X_train), 1, decimal=6)
+
+
+@pytest.mark.parametrize(
+    "input_preprocessing, method_func",
+    [
+        ("norm", normalize),
+        ("std", standardize),
+    ],
+)
+def test_input_preprocessing(dataset, input_preprocessing, method_func):
+    """Test that different preprocessing methods work as expected."""
+    X_train, X_test = dataset
+    post_X_train, post_X_test = preprocess_input(X_train, X_test, input_preprocessing)
+    expected_X_train, expected_X_test = method_func(X_train, X_test)
+    npt.assert_array_almost_equal(post_X_train, expected_X_train)
+    npt.assert_array_almost_equal(post_X_test, expected_X_test)
+
+
+def test_none_input_preprocessing(dataset):
+    """Test that preprocessing function handles None input correctly."""
+    X_train, X_test = dataset
+    post_X_train, post_X_test = preprocess_input(X_train, X_test, None)
+    npt.assert_array_equal(post_X_train, X_train)
+    npt.assert_array_equal(post_X_test, X_test)
+
+
+def test_input_preprocessing_unknown_method(dataset):
+    """Test that an unknown preprocessing method raises an AttributeError."""
+    X_train, X_test = dataset
+    error_msg = "Input preprocessing named 'esc' unknown"
+    with pytest.raises(ValueError, match=error_msg):
+        preprocess_input(X_train, X_test, "esc")
+
+
+def test_input_preprocessing_inconsistent_features(dataset):
+    """Test that preprocessing with inconsistent feature dimensions raises error."""
+    X_train, X_test = dataset
+    X_test = X_test[:, :-1]
+    with pytest.raises(ValueError):
+        preprocess_input(X_train, X_test, "norm")
+        preprocess_input(X_train, X_test, "norm")

From f47577b93cf6eb0213a0a3fed1f971058c473f2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81ngel=20Sevilla=20Molina?= <angelsevillamol@gmail.com>
Date: Fri, 4 Jul 2025 09:30:31 +0200
Subject: [PATCH 4/4] STY: Pre-commit fixes

---
 .github/ISSUE_TEMPLATE/bug_report.yml      | 2 +-
 .github/ISSUE_TEMPLATE/doc_improvement.yml | 2 +-
 .github/ISSUE_TEMPLATE/feature_request.yml | 2 +-
 .github/ISSUE_TEMPLATE/other_issue.yml     | 2 +-
 .github/PULL_REQUEST_TEMPLATE.md           | 2 +-
 .github/workflows/pr_pytest.yml            | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index c669748..899afa4 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -48,4 +48,4 @@ body:
       Place traceback error here if applicable. If your issue has no traceback, please describe the observed output without formatting.
       ```
   validations:
-    required: true
\ No newline at end of file
+    required: true
diff --git a/.github/ISSUE_TEMPLATE/doc_improvement.yml b/.github/ISSUE_TEMPLATE/doc_improvement.yml
index cd1121d..002692d 100644
--- a/.github/ISSUE_TEMPLATE/doc_improvement.yml
+++ b/.github/ISSUE_TEMPLATE/doc_improvement.yml
@@ -15,4 +15,4 @@ body:
   attributes:
     label: Suggest a potential alternative/fix
     description: >
-      Tell us how you think the documentation could be improved.
\ No newline at end of file
+      Tell us how you think the documentation could be improved.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
index c2111a5..2210df1 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -27,4 +27,4 @@ body:
   attributes:
     label: Additional context
     description: >
-      Add any other context about the problem here.
\ No newline at end of file
+      Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/other_issue.yml b/.github/ISSUE_TEMPLATE/other_issue.yml
index ad364e9..95ff6d1 100644
--- a/.github/ISSUE_TEMPLATE/other_issue.yml
+++ b/.github/ISSUE_TEMPLATE/other_issue.yml
@@ -18,4 +18,4 @@ body:
     label: Suggest a potential alternative/fix
 - type: textarea
   attributes:
-    label: Additional context
\ No newline at end of file
+    label: Additional context
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 3947512..0a8b9a4 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -19,4 +19,4 @@ A clear and concise description of what you have implemented.
 
 <!--
 Please be aware that we are a team of volunteers so patience is necessary.
--->
\ No newline at end of file
+-->
diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml
index a4ab8f2..85fca86 100644
--- a/.github/workflows/pr_pytest.yml
+++ b/.github/workflows/pr_pytest.yml
@@ -2,7 +2,7 @@ name: Run Tests
 
 on:
   push:
-    branches: 
+    branches:
       - main
     paths:
       - "orca_python/**"