diff --git a/orca_python/metrics/tests/test_metrics.py b/orca_python/metrics/tests/test_metrics.py
index 3959197..d7dba23 100644
--- a/orca_python/metrics/tests/test_metrics.py
+++ b/orca_python/metrics/tests/test_metrics.py
@@ -1,80 +1,234 @@
 """Tests for the metrics module."""
 
+import numpy as np
 import numpy.testing as npt
-import pytest
-from numpy import array
 
-# import metrics
-import orca_python.metrics as metrics
-
-# path.append('..')
-
-
-@pytest.fixture
-def real_y():
-    return array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
-
-
-@pytest.fixture
-def predicted_y():
-    return array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
-
-
-def test_ccr(real_y, predicted_y):
-    real_ccr = 0.8000
-    predicted_ccr = metrics.ccr(real_y, predicted_y)
-    npt.assert_almost_equal(real_ccr, predicted_ccr, decimal=4)
-
-
-def test_amae(real_y, predicted_y):
-    real_amae = 0.2937
-    predicted_amae = metrics.amae(real_y, predicted_y)
-    npt.assert_almost_equal(real_amae, predicted_amae, decimal=4)
-
-
-def test_gm(real_y, predicted_y):
-    real_gm = 0.7991
-    predicted_gm = metrics.gm(real_y, predicted_y)
-    npt.assert_almost_equal(real_gm, predicted_gm, decimal=4)
-
-
-def test_mae(real_y, predicted_y):
-    real_mae = 0.3000
-    predicted_mae = metrics.mae(real_y, predicted_y)
-    npt.assert_almost_equal(real_mae, predicted_mae, decimal=4)
-
-
-def test_mmae(real_y, predicted_y):
-    real_mmae = 0.4286
-    predicted_mmae = metrics.mmae(real_y, predicted_y)
-    npt.assert_almost_equal(real_mmae, predicted_mmae, decimal=4)
-
-
-def test_ms(real_y, predicted_y):
-    real_ms = 0.7143
-    predicted_ms = metrics.ms(real_y, predicted_y)
-    npt.assert_almost_equal(real_ms, predicted_ms, decimal=4)
-
-
-def test_mze(real_y, predicted_y):
-    real_mze = 0.2000
-    predicted_mze = metrics.mze(real_y, predicted_y)
-    npt.assert_almost_equal(real_mze, predicted_mze, decimal=4)
-
-
-def test_tkendall(real_y, predicted_y):
-    real_tkendall = 0.6240
-    predicted_tkendall = metrics.tkendall(real_y, predicted_y)
-    npt.assert_almost_equal(real_tkendall, predicted_tkendall, decimal=4)
-
-
-def test_wkappa(real_y, predicted_y):
-    real_wkappa = 0.6703
-    predicted_wkappa = metrics.wkappa(real_y, predicted_y)
-    npt.assert_almost_equal(real_wkappa, predicted_wkappa, decimal=4)
-
-
-def test_spearman(real_y, predicted_y):
-    real_spearman = 0.6429
-    predicted_spearman = metrics.spearman(real_y, predicted_y)
-    npt.assert_almost_equal(real_spearman, predicted_spearman, decimal=4)
+from orca_python.metrics import (
+    amae,
+    ccr,
+    gm,
+    greater_is_better,
+    mae,
+    mmae,
+    ms,
+    mze,
+    spearman,
+    tkendall,
+    wkappa,
+)
+
+
+def test_greater_is_better():
+    """Test the greater_is_better function."""
+    assert greater_is_better("ccr")
+    assert greater_is_better("gm")
+    assert not greater_is_better("mae")
+    assert not greater_is_better("mmae")
+    assert not greater_is_better("amae")
+    assert greater_is_better("ms")
+    assert not greater_is_better("mze")
+    assert greater_is_better("tkendall")
+    assert greater_is_better("wkappa")
+    assert greater_is_better("spearman")
+
+
+def test_ccr():
+    """Test the Correctly Classified Ratio (CCR) metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
+    expected = 0.8000
+    actual = ccr(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=4)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = ccr(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_amae():
+    """Test the Average Mean Absolute Error (AMAE) metric."""
+    y_true = np.array([0, 0, 1, 1])
+    y_pred = np.array([0, 1, 0, 1])
+    expected = 0.5
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 1, 1, 2, 2])
+    y_pred = np.array([0, 0, 1, 1, 2, 2])
+    expected = 0.0
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 2, 1])
+    y_pred = np.array([0, 2, 0, 1])
+    expected = 1.0
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 2, 1, 3])
+    y_pred = np.array([2, 2, 0, 3, 1])
+    expected = 2.0
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = amae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_gm():
+    """Test the Geometric Mean (GM) metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
+    expected = 0.7991
+    actual = gm(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=4)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = gm(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_mae():
+    """Test the Mean Absolute Error (MAE) metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
+    expected = 0.3000
+    actual = mae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=4)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = mae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_mmae():
+    """Test the Maximum Mean Absolute Error (MMAE) metric."""
+    y_true = np.array([0, 0, 1, 1])
+    y_pred = np.array([0, 1, 0, 1])
+    expected = 0.5
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 1, 1, 2, 2])
+    y_pred = np.array([0, 0, 1, 1, 2, 2])
+    expected = 0.0
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 2, 1])
+    y_pred = np.array([0, 2, 0, 1])
+    expected = 2.0
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 2, 1, 3])
+    y_pred = np.array([2, 2, 0, 3, 1])
+    expected = 2.0
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = mmae(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_ms():
+    """Test the Mean Sensitivity (MS) metric."""
+    y_true = np.array([0, 0, 1, 1])
+    y_pred = np.array([0, 1, 0, 1])
+    expected = 0.5
+    actual = ms(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    y_true = np.array([0, 0, 1, 1, 2, 2])
+    y_pred = np.array([0, 0, 1, 1, 2, 2])
+    expected = 1.0
+    actual = ms(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = ms(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_mze():
+    """Test the Mean Zero-one Error (MZE) metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
+    expected = 0.2000
+    actual = mze(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=4)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.5
+    actual = mze(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_tkendall():
+    """Test the Kendall's Tau metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
+    expected = 0.6240
+    actual = tkendall(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=4)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.0
+    actual = tkendall(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_wkappa():
+    """Test the Weighted Kappa metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
+    expected = 0.6703
+    actual = wkappa(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=4)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.0
+    actual = wkappa(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)
+
+
+def test_spearman():
+    """Test the Spearman's rank correlation coefficient metric."""
+    y_true = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
+    y_pred = np.array([1, 3, 3, 1, 2, 3, 1, 2, 2, 1, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3])
+    expected = 0.6429
+    actual = spearman(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=4)
+
+    # Test using one-hot and probabilities
+    y_true = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
+    y_pred = np.array([[1, 0], [0, 1], [1, 0], [0, 1]])
+    expected = 0.0
+    actual = spearman(y_true, y_pred)
+    npt.assert_almost_equal(expected, actual, decimal=6)