From fb95c4c06bf524513bdcdbe145f11f0b852d8db6 Mon Sep 17 00:00:00 2001 From: George Pickett Date: Wed, 20 May 2026 10:45:15 -0700 Subject: [PATCH] Fix HPO gate pass reporting --- src/eval/inference/hpo_search_baselines.py | 8 +++++--- tests/test_hpo_search_baselines.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/eval/inference/hpo_search_baselines.py b/src/eval/inference/hpo_search_baselines.py index 8f9bbb7..de90a39 100644 --- a/src/eval/inference/hpo_search_baselines.py +++ b/src/eval/inference/hpo_search_baselines.py @@ -359,12 +359,14 @@ def primary_metric(metrics: dict[str, Any], scenario: str) -> tuple[float, str | def gate_passed(metrics: dict[str, Any]) -> bool: + if metrics.get("error"): + return False if metrics.get("score") == 0: return False - qc = metrics.get("quality_check") or {} - if qc.get("pass") is False: + qc = metrics.get("quality_check") + if not isinstance(qc, dict): return False - return True + return qc.get("pass") is True def _run_text(cmd: list[str], timeout_s: float = 20.0) -> str: diff --git a/tests/test_hpo_search_baselines.py b/tests/test_hpo_search_baselines.py index 9cd01e3..c275bd3 100644 --- a/tests/test_hpo_search_baselines.py +++ b/tests/test_hpo_search_baselines.py @@ -142,3 +142,15 @@ def test_gate_passed_ignores_legacy_success_gate_field(): def test_gate_passed_still_fails_quality_and_zero_score(): assert not hpo.gate_passed({"score": 0, "quality_check": {"pass": True}}) assert not hpo.gate_passed({"quality_check": {"pass": False}}) + + +def test_gate_passed_requires_explicit_successful_quality_check(): + assert hpo.gate_passed({"quality_check": {"pass": True}}) + assert not hpo.gate_passed({}) + assert not hpo.gate_passed({"quality_check": {}}) + assert not hpo.gate_passed( + { + "error": "evaluate.py exited with code 1", + "quality_check": {"pass": True}, + } + )