From 96f06cbd8785f677b65f0d53ddf283dbe29bb9da Mon Sep 17 00:00:00 2001
From: hmacdope <hugomacdermott@gmail.com>
Date: Mon, 20 Apr 2026 12:41:24 +1000
Subject: [PATCH 1/4] Add return_members option to CommitteeRegressor.predict()

Adds return_members=False to _predict() and predict(). When True,
the raw per-member predictions are returned as the last element of
the tuple with shape (n_samples, n_tasks, n_members). Composes
cleanly with return_std: callers can request any combination of
mean, std, and member predictions.

Closes #464

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 openadmet/models/active_learning/committee.py | 45 ++++++++++++-------
 .../active_learning/test_active_learning.py   | 33 ++++++++++++++
 2 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/openadmet/models/active_learning/committee.py b/openadmet/models/active_learning/committee.py
index 4f8bf7fd..96ecc6a9 100644
--- a/openadmet/models/active_learning/committee.py
+++ b/openadmet/models/active_learning/committee.py
@@ -353,7 +353,7 @@ def query(self, X, query_strategy: str = None, **kwargs):
 
         return _ACQUISITION_FUNCTIONS[query_strategy](mean, std, **kwargs)
 
-    def _predict(self, X, return_std=False, **kwargs):
+    def _predict(self, X, return_std=False, return_members=False, **kwargs):
         """
         Make predictions using the committee model.
 
@@ -363,35 +363,43 @@ def _predict(self, X, return_std=False, **kwargs):
             The input samples to predict.
         return_std : bool, optional
             Whether to return the standard deviation of the predictions.
+        return_members : bool, optional
+            Whether to return the raw per-member predictions of shape
+            (n_samples, n_tasks, n_members). When True, returned as the
+            last element of the tuple.
         **kwargs : dict
             Additional keyword arguments to pass to the committee's predict method.
 
         Returns
         -------
-        array-like
-            Predicted values or probabilities, depending on the committee's implementation.
+        array-like or tuple
+            mean, or (mean, std), or (mean, members), or (mean, std, members)
+            depending on the values of return_std and return_members.
 
         """
-        # Make predictions
+        # Make predictions: (n_samples, n_tasks, n_members)
         preds = np.stack([model.predict(X, **kwargs) for model in self.models], axis=-1)
 
         # Compute mean
         mean = np.mean(preds, axis=-1)
 
-        # Skip std if not requested
-        if return_std is False:
+        if not return_std and not return_members:
             return mean
 
-        # Compute standard deviation
-        std = np.std(preds, axis=-1)
+        result = (mean,)
 
-        # Calibrate std if calibration model is available
-        if self.calibrated:
-            std = self._get_calibration_function()(std)
+        if return_std:
+            std = np.std(preds, axis=-1)
+            if self.calibrated:
+                std = self._get_calibration_function()(std)
+            result += (std,)
+
+        if return_members:
+            result += (preds,)
 
-        return mean, std
+        return result
 
-    def predict(self, X, return_std=False, **kwargs):
+    def predict(self, X, return_std=False, return_members=False, **kwargs):
         """
         Make predictions using the committee model.
 
@@ -401,13 +409,18 @@ def predict(self, X, return_std=False, **kwargs):
             The input samples to predict.
         return_std : bool, optional
             Whether to return the standard deviation of the predictions.
+        return_members : bool, optional
+            Whether to return the raw per-member predictions of shape
+            (n_samples, n_tasks, n_members). When True, returned as the
+            last element of the tuple.
         **kwargs : dict
             Additional keyword arguments to pass to the committee's predict method.
 
         Returns
         -------
-        array-like
-            Predicted values or probabilities, depending on the committee's implementation.
+        array-like or tuple
+            mean, or (mean, std), or (mean, members), or (mean, std, members)
+            depending on the values of return_std and return_members.
 
         """
         if return_std is True and not self.calibrated:
@@ -415,7 +428,7 @@ def predict(self, X, return_std=False, **kwargs):
                 "Standard deviation not calibrated: consider calling `calibrate_uncertainty`."
             )
 
-        return self._predict(X, return_std=return_std, **kwargs)
+        return self._predict(X, return_std=return_std, return_members=return_members, **kwargs)
 
     def _save_calibration_model(self, path: PathLike = "calibration_model.pkl"):
         # Save calibration model
diff --git a/openadmet/models/tests/unit/active_learning/test_active_learning.py b/openadmet/models/tests/unit/active_learning/test_active_learning.py
index 362d20a6..90c759eb 100644
--- a/openadmet/models/tests/unit/active_learning/test_active_learning.py
+++ b/openadmet/models/tests/unit/active_learning/test_active_learning.py
@@ -155,6 +155,39 @@ def deserialize(self, param_path, serial_path):
         pass
 
 
+def test_return_members(toy_data):
+    """Test that return_members exposes per-member predictions with correct shape."""
+    X_train, _, X_test, y_train, _, _ = toy_data
+    n_members = 4
+    n_tasks = 1
+
+    committee = CommitteeRegressor.train(
+        X_train,
+        y_train,
+        mod_class=MockCommitteeModel,
+        mod_params={},
+        n_models=n_members,
+        use_bagging=False,
+    )
+
+    # return_members only
+    mean, members = committee.predict(X_test, return_members=True)
+    assert members.shape == (X_test.shape[0], n_tasks, n_members)
+    assert mean.shape == (X_test.shape[0], n_tasks)
+    assert_allclose(mean, np.mean(members, axis=-1))
+
+    # return_members + return_std
+    mean2, std, members2 = committee.predict(X_test, return_std=True, return_members=True)
+    assert members2.shape == (X_test.shape[0], n_tasks, n_members)
+    assert std.shape == (X_test.shape[0], n_tasks)
+    assert_allclose(mean2, np.mean(members2, axis=-1))
+    assert_allclose(std, np.std(members2, axis=-1))
+
+    # Neither flag — plain mean returned (not a tuple)
+    result = committee.predict(X_test)
+    assert isinstance(result, np.ndarray)
+
+
 def test_committee_bagging_logic(toy_data):
     """Test that use_bagging flag correctly controls bootstrap aggregation."""
     X_train, _, _, y_train, _, _ = toy_data

From e0128d107a2576fd0d09d0e81911a6963a5565b0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 20 Apr 2026 02:41:44 +0000
Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openadmet/models/active_learning/committee.py                 | 4 +++-
 .../models/tests/unit/active_learning/test_active_learning.py | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/openadmet/models/active_learning/committee.py b/openadmet/models/active_learning/committee.py
index 96ecc6a9..5fbc615f 100644
--- a/openadmet/models/active_learning/committee.py
+++ b/openadmet/models/active_learning/committee.py
@@ -428,7 +428,9 @@ def predict(self, X, return_std=False, return_members=False, **kwargs):
                 "Standard deviation not calibrated: consider calling `calibrate_uncertainty`."
             )
 
-        return self._predict(X, return_std=return_std, return_members=return_members, **kwargs)
+        return self._predict(
+            X, return_std=return_std, return_members=return_members, **kwargs
+        )
 
     def _save_calibration_model(self, path: PathLike = "calibration_model.pkl"):
         # Save calibration model
diff --git a/openadmet/models/tests/unit/active_learning/test_active_learning.py b/openadmet/models/tests/unit/active_learning/test_active_learning.py
index 90c759eb..60c415b1 100644
--- a/openadmet/models/tests/unit/active_learning/test_active_learning.py
+++ b/openadmet/models/tests/unit/active_learning/test_active_learning.py
@@ -177,7 +177,9 @@ def test_return_members(toy_data):
     assert_allclose(mean, np.mean(members, axis=-1))
 
     # return_members + return_std
-    mean2, std, members2 = committee.predict(X_test, return_std=True, return_members=True)
+    mean2, std, members2 = committee.predict(
+        X_test, return_std=True, return_members=True
+    )
     assert members2.shape == (X_test.shape[0], n_tasks, n_members)
     assert std.shape == (X_test.shape[0], n_tasks)
     assert_allclose(mean2, np.mean(members2, axis=-1))

From a3769162ce793555f883012b50e32d7e71a7e34b Mon Sep 17 00:00:00 2001
From: Hugo MacDermott-Opeskin <hugomacdermott@gmail.com>
Date: Wed, 20 May 2026 20:36:20 +1000
Subject: [PATCH 3/4] Rename return_members to return_all, make mutually
 exclusive with return_std
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- return_all=True now returns only the raw (n_samples, n_tasks, n_members)
  array; callers derive mean/std themselves
- return_std and return_all are mutually exclusive; raises ValueError if both set
- Update tests: test_return_members → test_return_all, combined flag test
  becomes a ValueError assertion

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 openadmet/models/active_learning/committee.py | 55 ++++++++++---------
 .../active_learning/test_active_learning.py   | 26 ++++-----
 2 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/openadmet/models/active_learning/committee.py b/openadmet/models/active_learning/committee.py
index 5fbc615f..6e2e249a 100644
--- a/openadmet/models/active_learning/committee.py
+++ b/openadmet/models/active_learning/committee.py
@@ -353,7 +353,7 @@ def query(self, X, query_strategy: str = None, **kwargs):
 
         return _ACQUISITION_FUNCTIONS[query_strategy](mean, std, **kwargs)
 
-    def _predict(self, X, return_std=False, return_members=False, **kwargs):
+    def _predict(self, X, return_std=False, return_all=False, **kwargs):
         """
         Make predictions using the committee model.
 
@@ -363,43 +363,45 @@ def _predict(self, X, return_std=False, return_members=False, **kwargs):
             The input samples to predict.
         return_std : bool, optional
             Whether to return the standard deviation of the predictions.
-        return_members : bool, optional
+            Mutually exclusive with ``return_all``.
+        return_all : bool, optional
             Whether to return the raw per-member predictions of shape
-            (n_samples, n_tasks, n_members). When True, returned as the
-            last element of the tuple.
+            (n_samples, n_tasks, n_members) instead of the mean (and std).
+            Mutually exclusive with ``return_std``.
         **kwargs : dict
             Additional keyword arguments to pass to the committee's predict method.
 
         Returns
         -------
         array-like or tuple
-            mean, or (mean, std), or (mean, members), or (mean, std, members)
-            depending on the values of return_std and return_members.
+            mean, or (mean, std), or ndarray of shape (n_samples, n_tasks, n_members)
+            depending on the values of return_std and return_all.
 
         """
+        if return_std and return_all:
+            raise ValueError(
+                "return_std and return_all are mutually exclusive. "
+                "When return_all=True, compute mean and std from the returned array as needed."
+            )
+
         # Make predictions: (n_samples, n_tasks, n_members)
         preds = np.stack([model.predict(X, **kwargs) for model in self.models], axis=-1)
 
+        if return_all:
+            return preds
+
         # Compute mean
         mean = np.mean(preds, axis=-1)
 
-        if not return_std and not return_members:
+        if not return_std:
             return mean
 
-        result = (mean,)
-
-        if return_std:
-            std = np.std(preds, axis=-1)
-            if self.calibrated:
-                std = self._get_calibration_function()(std)
-            result += (std,)
-
-        if return_members:
-            result += (preds,)
-
-        return result
+        std = np.std(preds, axis=-1)
+        if self.calibrated:
+            std = self._get_calibration_function()(std)
+        return mean, std
 
-    def predict(self, X, return_std=False, return_members=False, **kwargs):
+    def predict(self, X, return_std=False, return_all=False, **kwargs):
         """
         Make predictions using the committee model.
 
@@ -409,18 +411,19 @@ def predict(self, X, return_std=False, return_members=False, **kwargs):
             The input samples to predict.
         return_std : bool, optional
             Whether to return the standard deviation of the predictions.
-        return_members : bool, optional
+            Mutually exclusive with ``return_all``.
+        return_all : bool, optional
             Whether to return the raw per-member predictions of shape
-            (n_samples, n_tasks, n_members). When True, returned as the
-            last element of the tuple.
+            (n_samples, n_tasks, n_members) instead of the mean (and std).
+            Mutually exclusive with ``return_std``.
         **kwargs : dict
             Additional keyword arguments to pass to the committee's predict method.
 
         Returns
         -------
         array-like or tuple
-            mean, or (mean, std), or (mean, members), or (mean, std, members)
-            depending on the values of return_std and return_members.
+            mean, or (mean, std), or ndarray of shape (n_samples, n_tasks, n_members)
+            depending on the values of return_std and return_all.
 
         """
         if return_std is True and not self.calibrated:
@@ -429,7 +432,7 @@ def predict(self, X, return_std=False, return_members=False, **kwargs):
             )
 
         return self._predict(
-            X, return_std=return_std, return_members=return_members, **kwargs
+            X, return_std=return_std, return_all=return_all, **kwargs
         )
 
     def _save_calibration_model(self, path: PathLike = "calibration_model.pkl"):
diff --git a/openadmet/models/tests/unit/active_learning/test_active_learning.py b/openadmet/models/tests/unit/active_learning/test_active_learning.py
index 60c415b1..75cc3ffd 100644
--- a/openadmet/models/tests/unit/active_learning/test_active_learning.py
+++ b/openadmet/models/tests/unit/active_learning/test_active_learning.py
@@ -155,8 +155,8 @@ def deserialize(self, param_path, serial_path):
         pass
 
 
-def test_return_members(toy_data):
-    """Test that return_members exposes per-member predictions with correct shape."""
+def test_return_all(toy_data):
+    """Test that return_all exposes raw per-member predictions with correct shape."""
     X_train, _, X_test, y_train, _, _ = toy_data
     n_members = 4
     n_tasks = 1
@@ -170,20 +170,16 @@ def test_return_members(toy_data):
         use_bagging=False,
     )
 
-    # return_members only
-    mean, members = committee.predict(X_test, return_members=True)
-    assert members.shape == (X_test.shape[0], n_tasks, n_members)
-    assert mean.shape == (X_test.shape[0], n_tasks)
-    assert_allclose(mean, np.mean(members, axis=-1))
+    # return_all returns only the raw array
+    preds = committee.predict(X_test, return_all=True)
+    assert preds.shape == (X_test.shape[0], n_tasks, n_members)
+    # mean and std are derivable from the returned array
+    assert_allclose(np.mean(preds, axis=-1).shape, (X_test.shape[0], n_tasks))
+    assert_allclose(np.std(preds, axis=-1).shape, (X_test.shape[0], n_tasks))
 
-    # return_members + return_std
-    mean2, std, members2 = committee.predict(
-        X_test, return_std=True, return_members=True
-    )
-    assert members2.shape == (X_test.shape[0], n_tasks, n_members)
-    assert std.shape == (X_test.shape[0], n_tasks)
-    assert_allclose(mean2, np.mean(members2, axis=-1))
-    assert_allclose(std, np.std(members2, axis=-1))
+    # return_all and return_std are mutually exclusive
+    with pytest.raises(ValueError, match="mutually exclusive"):
+        committee.predict(X_test, return_std=True, return_all=True)
 
     # Neither flag — plain mean returned (not a tuple)
     result = committee.predict(X_test)

From b3cb5f3ec97f6714cb915020537ef2892d0613ff Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 May 2026 10:42:23 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openadmet/models/active_learning/committee.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/openadmet/models/active_learning/committee.py b/openadmet/models/active_learning/committee.py
index 54228c74..d07d7dca 100644
--- a/openadmet/models/active_learning/committee.py
+++ b/openadmet/models/active_learning/committee.py
@@ -432,9 +432,7 @@ def predict(self, X, return_std=False, return_all=False, **kwargs):
                 "Standard deviation not calibrated: consider calling `calibrate_uncertainty`."
             )
 
-        return self._predict(
-            X, return_std=return_std, return_all=return_all, **kwargs
-        )
+        return self._predict(X, return_std=return_std, return_all=return_all, **kwargs)
 
     def _save_calibration_model(self, path: PathLike = "calibration_model.pkl"):
         # Save calibration model