From 96f06cbd8785f677b65f0d53ddf283dbe29bb9da Mon Sep 17 00:00:00 2001 From: hmacdope Date: Mon, 20 Apr 2026 12:41:24 +1000 Subject: [PATCH 1/4] Add return_members option to CommitteeRegressor.predict() Adds return_members=False to _predict() and predict(). When True, the raw per-member predictions are returned as the last element of the tuple with shape (n_samples, n_tasks, n_members). Composes cleanly with return_std: callers can request any combination of mean, std, and member predictions. Closes #464 Co-Authored-By: Claude Sonnet 4.6 --- openadmet/models/active_learning/committee.py | 45 ++++++++++++------- .../active_learning/test_active_learning.py | 33 ++++++++++++++ 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/openadmet/models/active_learning/committee.py b/openadmet/models/active_learning/committee.py index 4f8bf7fd..96ecc6a9 100644 --- a/openadmet/models/active_learning/committee.py +++ b/openadmet/models/active_learning/committee.py @@ -353,7 +353,7 @@ def query(self, X, query_strategy: str = None, **kwargs): return _ACQUISITION_FUNCTIONS[query_strategy](mean, std, **kwargs) - def _predict(self, X, return_std=False, **kwargs): + def _predict(self, X, return_std=False, return_members=False, **kwargs): """ Make predictions using the committee model. @@ -363,35 +363,43 @@ def _predict(self, X, return_std=False, **kwargs): The input samples to predict. return_std : bool, optional Whether to return the standard deviation of the predictions. + return_members : bool, optional + Whether to return the raw per-member predictions of shape + (n_samples, n_tasks, n_members). When True, returned as the + last element of the tuple. **kwargs : dict Additional keyword arguments to pass to the committee's predict method. Returns ------- - array-like - Predicted values or probabilities, depending on the committee's implementation. + array-like or tuple + mean, or (mean, std), or (mean, members), or (mean, std, members) + depending on the values of return_std and return_members. """ - # Make predictions + # Make predictions: (n_samples, n_tasks, n_members) preds = np.stack([model.predict(X, **kwargs) for model in self.models], axis=-1) # Compute mean mean = np.mean(preds, axis=-1) - # Skip std if not requested - if return_std is False: + if not return_std and not return_members: return mean - # Compute standard deviation - std = np.std(preds, axis=-1) + result = (mean,) - # Calibrate std if calibration model is available - if self.calibrated: - std = self._get_calibration_function()(std) + if return_std: + std = np.std(preds, axis=-1) + if self.calibrated: + std = self._get_calibration_function()(std) + result += (std,) + + if return_members: + result += (preds,) - return mean, std + return result - def predict(self, X, return_std=False, **kwargs): + def predict(self, X, return_std=False, return_members=False, **kwargs): """ Make predictions using the committee model. @@ -401,13 +409,18 @@ def predict(self, X, return_std=False, **kwargs): The input samples to predict. return_std : bool, optional Whether to return the standard deviation of the predictions. + return_members : bool, optional + Whether to return the raw per-member predictions of shape + (n_samples, n_tasks, n_members). When True, returned as the + last element of the tuple. **kwargs : dict Additional keyword arguments to pass to the committee's predict method. Returns ------- - array-like - Predicted values or probabilities, depending on the committee's implementation. + array-like or tuple + mean, or (mean, std), or (mean, members), or (mean, std, members) + depending on the values of return_std and return_members. """ if return_std is True and not self.calibrated: @@ -415,7 +428,7 @@ def predict(self, X, return_std=False, **kwargs): "Standard deviation not calibrated: consider calling `calibrate_uncertainty`." ) - return self._predict(X, return_std=return_std, **kwargs) + return self._predict(X, return_std=return_std, return_members=return_members, **kwargs) def _save_calibration_model(self, path: PathLike = "calibration_model.pkl"): # Save calibration model diff --git a/openadmet/models/tests/unit/active_learning/test_active_learning.py b/openadmet/models/tests/unit/active_learning/test_active_learning.py index 362d20a6..90c759eb 100644 --- a/openadmet/models/tests/unit/active_learning/test_active_learning.py +++ b/openadmet/models/tests/unit/active_learning/test_active_learning.py @@ -155,6 +155,39 @@ def deserialize(self, param_path, serial_path): pass +def test_return_members(toy_data): + """Test that return_members exposes per-member predictions with correct shape.""" + X_train, _, X_test, y_train, _, _ = toy_data + n_members = 4 + n_tasks = 1 + + committee = CommitteeRegressor.train( + X_train, + y_train, + mod_class=MockCommitteeModel, + mod_params={}, + n_models=n_members, + use_bagging=False, + ) + + # return_members only + mean, members = committee.predict(X_test, return_members=True) + assert members.shape == (X_test.shape[0], n_tasks, n_members) + assert mean.shape == (X_test.shape[0], n_tasks) + assert_allclose(mean, np.mean(members, axis=-1)) + + # return_members + return_std + mean2, std, members2 = committee.predict(X_test, return_std=True, return_members=True) + assert members2.shape == (X_test.shape[0], n_tasks, n_members) + assert std.shape == (X_test.shape[0], n_tasks) + assert_allclose(mean2, np.mean(members2, axis=-1)) + assert_allclose(std, np.std(members2, axis=-1)) + + # Neither flag — plain mean returned (not a tuple) + result = committee.predict(X_test) + assert isinstance(result, np.ndarray) + + def test_committee_bagging_logic(toy_data): """Test that use_bagging flag correctly controls bootstrap aggregation.""" X_train, _, _, y_train, _, _ = toy_data From e0128d107a2576fd0d09d0e81911a6963a5565b0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 Apr 2026 02:41:44 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openadmet/models/active_learning/committee.py | 4 +++- .../models/tests/unit/active_learning/test_active_learning.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openadmet/models/active_learning/committee.py b/openadmet/models/active_learning/committee.py index 96ecc6a9..5fbc615f 100644 --- a/openadmet/models/active_learning/committee.py +++ b/openadmet/models/active_learning/committee.py @@ -428,7 +428,9 @@ def predict(self, X, return_std=False, return_members=False, **kwargs): "Standard deviation not calibrated: consider calling `calibrate_uncertainty`." ) - return self._predict(X, return_std=return_std, return_members=return_members, **kwargs) + return self._predict( + X, return_std=return_std, return_members=return_members, **kwargs + ) def _save_calibration_model(self, path: PathLike = "calibration_model.pkl"): # Save calibration model diff --git a/openadmet/models/tests/unit/active_learning/test_active_learning.py b/openadmet/models/tests/unit/active_learning/test_active_learning.py index 90c759eb..60c415b1 100644 --- a/openadmet/models/tests/unit/active_learning/test_active_learning.py +++ b/openadmet/models/tests/unit/active_learning/test_active_learning.py @@ -177,7 +177,9 @@ def test_return_members(toy_data): assert_allclose(mean, np.mean(members, axis=-1)) # return_members + return_std - mean2, std, members2 = committee.predict(X_test, return_std=True, return_members=True) + mean2, std, members2 = committee.predict( + X_test, return_std=True, return_members=True + ) assert members2.shape == (X_test.shape[0], n_tasks, n_members) assert std.shape == (X_test.shape[0], n_tasks) assert_allclose(mean2, np.mean(members2, axis=-1)) From a3769162ce793555f883012b50e32d7e71a7e34b Mon Sep 17 00:00:00 2001 From: Hugo MacDermott-Opeskin Date: Wed, 20 May 2026 20:36:20 +1000 Subject: [PATCH 3/4] Rename return_members to return_all, make mutually exclusive with return_std MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - return_all=True now returns only the raw (n_samples, n_tasks, n_members) array; callers derive mean/std themselves - return_std and return_all are mutually exclusive; raises ValueError if both set - Update tests: test_return_members → test_return_all, combined flag test becomes a ValueError assertion Co-Authored-By: Claude Sonnet 4.6 --- openadmet/models/active_learning/committee.py | 55 ++++++++++--------- .../active_learning/test_active_learning.py | 26 ++++----- 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/openadmet/models/active_learning/committee.py b/openadmet/models/active_learning/committee.py index 5fbc615f..6e2e249a 100644 --- a/openadmet/models/active_learning/committee.py +++ b/openadmet/models/active_learning/committee.py @@ -353,7 +353,7 @@ def query(self, X, query_strategy: str = None, **kwargs): return _ACQUISITION_FUNCTIONS[query_strategy](mean, std, **kwargs) - def _predict(self, X, return_std=False, return_members=False, **kwargs): + def _predict(self, X, return_std=False, return_all=False, **kwargs): """ Make predictions using the committee model. @@ -363,43 +363,45 @@ def _predict(self, X, return_std=False, return_members=False, **kwargs): The input samples to predict. return_std : bool, optional Whether to return the standard deviation of the predictions. - return_members : bool, optional + Mutually exclusive with ``return_all``. + return_all : bool, optional Whether to return the raw per-member predictions of shape - (n_samples, n_tasks, n_members). When True, returned as the - last element of the tuple. + (n_samples, n_tasks, n_members) instead of the mean (and std). + Mutually exclusive with ``return_std``. **kwargs : dict Additional keyword arguments to pass to the committee's predict method. Returns ------- array-like or tuple - mean, or (mean, std), or (mean, members), or (mean, std, members) - depending on the values of return_std and return_members. + mean, or (mean, std), or ndarray of shape (n_samples, n_tasks, n_members) + depending on the values of return_std and return_all. """ + if return_std and return_all: + raise ValueError( + "return_std and return_all are mutually exclusive. " + "When return_all=True, compute mean and std from the returned array as needed." + ) + # Make predictions: (n_samples, n_tasks, n_members) preds = np.stack([model.predict(X, **kwargs) for model in self.models], axis=-1) + if return_all: + return preds + # Compute mean mean = np.mean(preds, axis=-1) - if not return_std and not return_members: + if not return_std: return mean - result = (mean,) - - if return_std: - std = np.std(preds, axis=-1) - if self.calibrated: - std = self._get_calibration_function()(std) - result += (std,) - - if return_members: - result += (preds,) - - return result + std = np.std(preds, axis=-1) + if self.calibrated: + std = self._get_calibration_function()(std) + return mean, std - def predict(self, X, return_std=False, return_members=False, **kwargs): + def predict(self, X, return_std=False, return_all=False, **kwargs): """ Make predictions using the committee model. @@ -409,18 +411,19 @@ def predict(self, X, return_std=False, return_members=False, **kwargs): The input samples to predict. return_std : bool, optional Whether to return the standard deviation of the predictions. - return_members : bool, optional + Mutually exclusive with ``return_all``. + return_all : bool, optional Whether to return the raw per-member predictions of shape - (n_samples, n_tasks, n_members). When True, returned as the - last element of the tuple. + (n_samples, n_tasks, n_members) instead of the mean (and std). + Mutually exclusive with ``return_std``. **kwargs : dict Additional keyword arguments to pass to the committee's predict method. Returns ------- array-like or tuple - mean, or (mean, std), or (mean, members), or (mean, std, members) - depending on the values of return_std and return_members. + mean, or (mean, std), or ndarray of shape (n_samples, n_tasks, n_members) + depending on the values of return_std and return_all. """ if return_std is True and not self.calibrated: @@ -429,7 +432,7 @@ def predict(self, X, return_std=False, return_members=False, **kwargs): ) return self._predict( - X, return_std=return_std, return_members=return_members, **kwargs + X, return_std=return_std, return_all=return_all, **kwargs ) def _save_calibration_model(self, path: PathLike = "calibration_model.pkl"): diff --git a/openadmet/models/tests/unit/active_learning/test_active_learning.py b/openadmet/models/tests/unit/active_learning/test_active_learning.py index 60c415b1..75cc3ffd 100644 --- a/openadmet/models/tests/unit/active_learning/test_active_learning.py +++ b/openadmet/models/tests/unit/active_learning/test_active_learning.py @@ -155,8 +155,8 @@ def deserialize(self, param_path, serial_path): pass -def test_return_members(toy_data): - """Test that return_members exposes per-member predictions with correct shape.""" +def test_return_all(toy_data): + """Test that return_all exposes raw per-member predictions with correct shape.""" X_train, _, X_test, y_train, _, _ = toy_data n_members = 4 n_tasks = 1 @@ -170,20 +170,16 @@ def test_return_members(toy_data): use_bagging=False, ) - # return_members only - mean, members = committee.predict(X_test, return_members=True) - assert members.shape == (X_test.shape[0], n_tasks, n_members) - assert mean.shape == (X_test.shape[0], n_tasks) - assert_allclose(mean, np.mean(members, axis=-1)) + # return_all returns only the raw array + preds = committee.predict(X_test, return_all=True) + assert preds.shape == (X_test.shape[0], n_tasks, n_members) + # mean and std are derivable from the returned array + assert_allclose(np.mean(preds, axis=-1).shape, (X_test.shape[0], n_tasks)) + assert_allclose(np.std(preds, axis=-1).shape, (X_test.shape[0], n_tasks)) - # return_members + return_std - mean2, std, members2 = committee.predict( - X_test, return_std=True, return_members=True - ) - assert members2.shape == (X_test.shape[0], n_tasks, n_members) - assert std.shape == (X_test.shape[0], n_tasks) - assert_allclose(mean2, np.mean(members2, axis=-1)) - assert_allclose(std, np.std(members2, axis=-1)) + # return_all and return_std are mutually exclusive + with pytest.raises(ValueError, match="mutually exclusive"): + committee.predict(X_test, return_std=True, return_all=True) # Neither flag — plain mean returned (not a tuple) result = committee.predict(X_test) From b3cb5f3ec97f6714cb915020537ef2892d0613ff Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 May 2026 10:42:23 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openadmet/models/active_learning/committee.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/openadmet/models/active_learning/committee.py b/openadmet/models/active_learning/committee.py index 54228c74..d07d7dca 100644 --- a/openadmet/models/active_learning/committee.py +++ b/openadmet/models/active_learning/committee.py @@ -432,9 +432,7 @@ def predict(self, X, return_std=False, return_all=False, **kwargs): "Standard deviation not calibrated: consider calling `calibrate_uncertainty`." ) - return self._predict( - X, return_std=return_std, return_all=return_all, **kwargs - ) + return self._predict(X, return_std=return_std, return_all=return_all, **kwargs) def _save_calibration_model(self, path: PathLike = "calibration_model.pkl"): # Save calibration model