From cd9e40e3d0a01288c898e265cefaed2090a2cbc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81ngel=20Sevilla=20Molina?= <angelsevillamol@gmail.com>
Date: Tue, 1 Jul 2025 10:08:24 +0200
Subject: [PATCH 1/4] TST: Update imports, add docstrings, and improve variable
 naming

---
 orca_python/metrics/tests/test_metrics.py | 86 ++++++++++++++---------
 1 file changed, 52 insertions(+), 34 deletions(-)

diff --git a/orca_python/metrics/tests/test_metrics.py b/orca_python/metrics/tests/test_metrics.py
index 3959197..dcb7500 100644
--- a/orca_python/metrics/tests/test_metrics.py
+++ b/orca_python/metrics/tests/test_metrics.py
@@ -4,10 +4,18 @@
 import pytest
 from numpy import array
 
-# import metrics
-import orca_python.metrics as metrics
-
-# path.append('..')
+from orca_python.metrics import (
+    ccr,
+    gm,
+    mae,
+    mmae,
+    amae,
+    ms,
+    mze,
+    tkendall,
+    wkappa,
+    spearman,
+)
 
 
 @pytest.fixture
@@ -21,60 +29,70 @@ def predicted_y():
 
 
 def test_ccr(real_y, predicted_y):
-    real_ccr = 0.8000
-    predicted_ccr = metrics.ccr(real_y, predicted_y)
-    npt.assert_almost_equal(real_ccr, predicted_ccr, decimal=4)
+    """Test the Correctly Classified Ratio (CCR) metric."""
+    expected = 0.8000
+    actual = ccr(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_amae(real_y, predicted_y):
-    real_amae = 0.2937
-    predicted_amae = metrics.amae(real_y, predicted_y)
-    npt.assert_almost_equal(real_amae, predicted_amae, decimal=4)
+    """Test the Average Mean Absolute Error (AMAE) metric."""
+    expected = 0.2937
+    actual = amae(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_gm(real_y, predicted_y):
-    real_gm = 0.7991
-    predicted_gm = metrics.gm(real_y, predicted_y)
-    npt.assert_almost_equal(real_gm, predicted_gm, decimal=4)
+    """Test the Geometric Mean (GM) metric."""
+    expected = 0.7991
+    actual = gm(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_mae(real_y, predicted_y):
-    real_mae = 0.3000
-    predicted_mae = metrics.mae(real_y, predicted_y)
-    npt.assert_almost_equal(real_mae, predicted_mae, decimal=4)
+    """Test the Mean Absolute Error (MAE) metric."""
+    expected = 0.3000
+    actual = mae(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_mmae(real_y, predicted_y):
-    real_mmae = 0.4286
-    predicted_mmae = metrics.mmae(real_y, predicted_y)
-    npt.assert_almost_equal(real_mmae, predicted_mmae, decimal=4)
+    """Test the Mean Mean Absolute Error (MMAE) metric."""
+    expected = 0.4286
+    actual = mmae(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_ms(real_y, predicted_y):
-    real_ms = 0.7143
-    predicted_ms = metrics.ms(real_y, predicted_y)
-    npt.assert_almost_equal(real_ms, predicted_ms, decimal=4)
+    """Test the Mean Sensitivity (MS) metric."""
+    expected = 0.7143
+    actual = ms(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_mze(real_y, predicted_y):
-    real_mze = 0.2000
-    predicted_mze = metrics.mze(real_y, predicted_y)
-    npt.assert_almost_equal(real_mze, predicted_mze, decimal=4)
+    """Test the Mean Zero-one Error (MZE) metric."""
+    expected = 0.2000
+    actual = mze(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_tkendall(real_y, predicted_y):
-    real_tkendall = 0.6240
-    predicted_tkendall = metrics.tkendall(real_y, predicted_y)
-    npt.assert_almost_equal(real_tkendall, predicted_tkendall, decimal=4)
+    """Test the Kendall's Tau metric."""
+    expected = 0.6240
+    actual = tkendall(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_wkappa(real_y, predicted_y):
-    real_wkappa = 0.6703
-    predicted_wkappa = metrics.wkappa(real_y, predicted_y)
-    npt.assert_almost_equal(real_wkappa, predicted_wkappa, decimal=4)
+    """Test the Weighted Kappa metric."""
+    expected = 0.6703
+    actual = wkappa(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)
 
 
 def test_spearman(real_y, predicted_y):
-    real_spearman = 0.6429
-    predicted_spearman = metrics.spearman(real_y, predicted_y)
-    npt.assert_almost_equal(real_spearman, predicted_spearman, decimal=4)
+    """Test the Spearman's rank correlation coefficient metric."""
+    expected = 0.6429
+    actual = spearman(real_y, predicted_y)
+    npt.assert_almost_equal(expected, actual, decimal=4)

From ca57d13fbd8c40efc5da7ffdd5d88cc4b78aa397 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81ngel=20Sevilla=20Molina?= <angelsevillamol@gmail.com>
Date: Tue, 1 Jul 2025 10:13:34 +0200
Subject: [PATCH 2/4] TST: Add new test cases for all metrics

---
 orca_python/metrics/tests/test_metrics.py | 213 +++++++++++++++++-----
 1 file changed, 167 insertions(+), 46 deletions(-)

diff --git a/orca_python/metrics/tests/test_metrics.py b/orca_python/metrics/tests/test_metrics.py
index dcb7500..8bdf941 100644
--- a/orca_python/metrics/tests/test_metrics.py
+++ b/orca_python/metrics/tests/test_metrics.py
@@ -1,8 +1,7 @@
 """Tests for the metrics module."""
 
+import numpy as np
 import numpy.testing as npt
-import pytest
-from numpy import array
 
 from orca_python.metrics import (
     ccr,
@@ -18,81 +17,203 @@
 )
 
 
-@pytest.fixture
-def real_y():
-    return array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
-
-
-@pytest.fixture
-def predicted_y():
-    return array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
-
-
-def test_ccr(real_y, predicted_y):
+def test_ccr():
     """Test the Correctly Classified Ratio (CCR) metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
     expected = 0.8000
-    actual = ccr(real_y, predicted_y)
+    actual = ccr(y_true, y_pred)
     npt.assert_almost_equal(expected, actual, decimal=4)
 
-
-def test_amae(real_y, predicted_y):
-    """Test the Average Mean Absolute Error (AMAE) metric."""
-    expected = 0.2937
-    actual = amae(real_y, predicted_y)
-    npt.assert_almost_equal(expected, actual, decimal=4)
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = ccr(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
 
 
-def test_gm(real_y, predicted_y):
+def test_amae():
+    """Test the Average Mean Absolute Error (AMAE) metric."""
+    y_true = np.array([0, 0, 1, 1])
+    y_pred = np.array([0, 1, 0, 1])
+    expected = 0.5
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 1, 1, 2, 2])
+    y_pred = np.array([0, 0, 1, 1, 2, 2])
+    expected = 0.0
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 2, 1])
+    y_pred = np.array([0, 2, 0, 1])
+    expected = 1.0
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 2, 1, 3])
+    y_pred = np.array([2, 2, 0, 3, 1])
+    expected = 2.0
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_gm():
     """Test the Geometric Mean (GM) metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
     expected = 0.7991
-    actual = gm(real_y, predicted_y)
+    actual = gm(y_true, y_pred)
     npt.assert_almost_equal(expected, actual, decimal=4)
 
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = gm(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
 
-def test_mae(real_y, predicted_y):
+
+def test_mae():
     """Test the Mean Absolute Error (MAE) metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
     expected = 0.3000
-    actual = mae(real_y, predicted_y)
-    npt.assert_almost_equal(expected, actual, decimal=4)
-
-
-def test_mmae(real_y, predicted_y):
-    """Test the Mean Mean Absolute Error (MMAE) metric."""
-    expected = 0.4286
-    actual = mmae(real_y, predicted_y)
+    actual = mae(y_true, y_pred)
     npt.assert_almost_equal(expected, actual, decimal=4)
 
-
-def test_ms(real_y, predicted_y):
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = mae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_mmae():
+    """Test the Maximum Mean Absolute Error (MMAE) metric."""
+    y_true = np.array([0, 0, 1, 1])
+    y_pred = np.array([0, 1, 0, 1])
+    expected = 0.5
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 1, 1, 2, 2])
+    y_pred = np.array([0, 0, 1, 1, 2, 2])
+    expected = 0.0
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 2, 1])
+    y_pred = np.array([0, 2, 0, 1])
+    expected = 2.0
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 2, 1, 3])
+    y_pred = np.array([2, 2, 0, 3, 1])
+    expected = 2.0
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_ms():
     """Test the Mean Sensitivity (MS) metric."""
-    expected = 0.7143
-    actual = ms(real_y, predicted_y)
-    npt.assert_almost_equal(expected, actual, decimal=4)
-
-
-def test_mze(real_y, predicted_y):
+    y_true = np.array([0, 0, 1, 1])
+    y_pred = np.array([0, 1, 0, 1])
+    expected = 0.5
+    actual = ms(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 1, 1, 2, 2])
+    y_pred = np.array([0, 0, 1, 1, 2, 2])
+    expected = 1.0
+    actual = ms(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = ms(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_mze():
     """Test the Mean Zero-one Error (MZE) metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
     expected = 0.2000
-    actual = mze(real_y, predicted_y)
+    actual = mze(y_true, y_pred)
     npt.assert_almost_equal(expected, actual, decimal=4)
 
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = mze(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
 
-def test_tkendall(real_y, predicted_y):
+def test_tkendall():
     """Test the Kendall's Tau metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
     expected = 0.6240
-    actual = tkendall(real_y, predicted_y)
+    actual = tkendall(y_true, y_pred)
     npt.assert_almost_equal(expected, actual, decimal=4)
 
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.0
+    actual = tkendall(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
 
-def test_wkappa(real_y, predicted_y):
+def test_wkappa():
     """Test the Weighted Kappa metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
     expected = 0.6703
-    actual = wkappa(real_y, predicted_y)
+    actual = wkappa(y_true, y_pred)
     npt.assert_almost_equal(expected, actual, decimal=4)
 
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.0
+    actual = wkappa(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
 
-def test_spearman(real_y, predicted_y):
+
+def test_spearman():
     """Test the Spearman's rank correlation coefficient metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
     expected = 0.6429
-    actual = spearman(real_y, predicted_y)
+    actual = spearman(y_true, y_pred)
     npt.assert_almost_equal(expected, actual, decimal=4)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.0
+    actual = spearman(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)

From 67f172ec0ad3a6aed0055e77c66e58a4d7cab02f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81ngel=20Sevilla=20Molina?= <angelsevillamol@gmail.com>
Date: Tue, 1 Jul 2025 10:14:56 +0200
Subject: [PATCH 3/4] TST: Add greater_is_better function test coverage

---
 orca_python/metrics/tests/test_metrics.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/orca_python/metrics/tests/test_metrics.py b/orca_python/metrics/tests/test_metrics.py
index 8bdf941..0a867da 100644
--- a/orca_python/metrics/tests/test_metrics.py
+++ b/orca_python/metrics/tests/test_metrics.py
@@ -4,6 +4,7 @@
 import numpy.testing as npt
 
 from orca_python.metrics import (
+    greater_is_better,
     ccr,
     gm,
     mae,
@@ -17,6 +18,20 @@
 )
 
 
+def test_greater_is_better():
+    """Test the greater_is_better function."""
+    assert greater_is_better("ccr")
+    assert greater_is_better("gm")
+    assert not greater_is_better("mae")
+    assert not greater_is_better("mmae")
+    assert not greater_is_better("amae")
+    assert greater_is_better("ms")
+    assert not greater_is_better("mze")
+    assert greater_is_better("tkendall")
+    assert greater_is_better("wkappa")
+    assert greater_is_better("spearman")
+
+
 def test_ccr():
     """Test the Correctly Classified Ratio (CCR) metric."""
     y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])

From a6444ae1a84970b2d2fb1e94b0edbc1cdcd62864 Mon Sep 17 00:00:00 2001
From: angelsevillamol <26467846+angelsevillamol@users.noreply.github.com>
Date: Thu, 3 Jul 2025 09:33:03 +0000
Subject: [PATCH 4/4] Automatic `pre-commit` fixes

---
 orca_python/metrics/tests/test_metrics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/orca_python/metrics/tests/test_metrics.py b/orca_python/metrics/tests/test_metrics.py
index 0a867da..d7dba23 100644
--- a/orca_python/metrics/tests/test_metrics.py
+++ b/orca_python/metrics/tests/test_metrics.py
@@ -4,17 +4,17 @@
 import numpy.testing as npt
 
 from orca_python.metrics import (
-    greater_is_better,
+    amae,
     ccr,
     gm,
+    greater_is_better,
     mae,
     mmae,
-    amae,
     ms,
     mze,
+    spearman,
     tkendall,
     wkappa,
-    spearman,
 )