Skip to content

Commit eb35ccf

Browse files
igerberclaude
andcommitted
Relax Wave B golden bit-identity to assert_allclose(rtol=1e-14)
CI Pure Python Fallback (Linux py3.14) drifted 1 ULP from the macOS Accelerate capture machine on `test_event_study_false_matches_wave_b_golden` -- expected -0.08620379515400438, got -0.08620379515400439. The 6 `==` checks against _WAVE_B_GOLDEN_* are cross-machine pins, exactly the BLAS reduction-order class that `feedback_assert_allclose_numerical_parity` warns about. Switched all 6 golden assertions to `np.testing.assert_allclose(rtol=1e-14, atol=1e-14)` -- tight enough to catch real aggregate-path drift, loose enough to absorb cross-runner ULP differences. The same-machine determinism check `test_event_study_false_bit_identical_to_wave_b_fixture` keeps `==` (both fits run on the same runner). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 75ff0e2 commit eb35ccf

1 file changed

Lines changed: 44 additions & 18 deletions

File tree

tests/test_spillover.py

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3508,10 +3508,14 @@ class TestSpilloverDiDEventStudyBackwardCompat:
35083508

35093509
def test_event_study_false_matches_wave_b_golden(self):
35103510
"""Pre-Wave-C golden parity (not just determinism): pin att/se on a
3511-
deterministic DGP and assert bit-identical reproduction. Strengthened
3511+
deterministic DGP at 1e-14 tolerance and assert reproduction within
3512+
ULP-scale BLAS reduction-order drift across runners. Strengthened
35123513
per PR #456 R3 review — the previous determinism check (fit twice on
35133514
the current code path) did not actually anchor against a pre-Wave-C
3514-
baseline."""
3515+
baseline. Tolerance softened from `==` to `assert_allclose(rtol=1e-14,
3516+
atol=1e-14)` after CI Pure Python Fallback (Linux py3.14) flagged a
3517+
1-ULP drift from the macOS Accelerate capture machine — the
3518+
identification claim is unchanged; the platform-pinning was."""
35153519
df = generate_butts_nonstaggered_dgp(seed=42)
35163520
est = SpilloverDiD(
35173521
rings=[0.0, 50.0, 200.0],
@@ -3524,27 +3528,49 @@ def test_event_study_false_matches_wave_b_golden(self):
35243528
with _w.catch_warnings():
35253529
_w.simplefilter("ignore", UserWarning)
35263530
res = est.fit(df, outcome="y", unit="unit", time="time", treatment="D")
3527-
# Scalar att/se must match the pre-Wave-C golden at machine precision.
3528-
assert res.att == self._WAVE_B_GOLDEN_ATT, (
3529-
f"event_study=False att drift: got {res.att!r}, "
3530-
f"expected {self._WAVE_B_GOLDEN_ATT!r}"
3531-
)
3532-
assert res.se == self._WAVE_B_GOLDEN_SE, (
3533-
f"event_study=False se drift: got {res.se!r}, " f"expected {self._WAVE_B_GOLDEN_SE!r}"
3531+
# Goldens were captured on a single machine (BLAS reduction order is
3532+
# platform-dependent); pin at 1e-14 tolerance per
3533+
# `feedback_assert_allclose_numerical_parity`. Tight enough to catch
3534+
# real aggregate-path drift, loose enough to absorb ULP-scale
3535+
# cross-runner reduction-order differences (Pure Python Fallback on
3536+
# Linux py3.14 drifts ~1 ULP from macOS Accelerate captures).
3537+
np.testing.assert_allclose(
3538+
res.att,
3539+
self._WAVE_B_GOLDEN_ATT,
3540+
rtol=1e-14,
3541+
atol=1e-14,
3542+
err_msg=f"event_study=False att drift: got {res.att!r}, expected {self._WAVE_B_GOLDEN_ATT!r}",
3543+
)
3544+
np.testing.assert_allclose(
3545+
res.se,
3546+
self._WAVE_B_GOLDEN_SE,
3547+
rtol=1e-14,
3548+
atol=1e-14,
3549+
err_msg=f"event_study=False se drift: got {res.se!r}, expected {self._WAVE_B_GOLDEN_SE!r}",
35343550
)
35353551
# Per-ring entries must also match.
35363552
inner = res.spillover_effects.loc["[0, 50)"]
3537-
assert inner["coef"] == self._WAVE_B_GOLDEN_RING_INNER_COEF, (
3538-
f"inner ring coef drift: got {inner['coef']!r}, "
3539-
f"expected {self._WAVE_B_GOLDEN_RING_INNER_COEF!r}"
3540-
)
3541-
assert inner["se"] == self._WAVE_B_GOLDEN_RING_INNER_SE, (
3542-
f"inner ring se drift: got {inner['se']!r}, "
3543-
f"expected {self._WAVE_B_GOLDEN_RING_INNER_SE!r}"
3553+
np.testing.assert_allclose(
3554+
inner["coef"],
3555+
self._WAVE_B_GOLDEN_RING_INNER_COEF,
3556+
rtol=1e-14,
3557+
atol=1e-14,
3558+
err_msg=f"inner ring coef drift: got {inner['coef']!r}, expected {self._WAVE_B_GOLDEN_RING_INNER_COEF!r}",
3559+
)
3560+
np.testing.assert_allclose(
3561+
inner["se"],
3562+
self._WAVE_B_GOLDEN_RING_INNER_SE,
3563+
rtol=1e-14,
3564+
atol=1e-14,
3565+
err_msg=f"inner ring se drift: got {inner['se']!r}, expected {self._WAVE_B_GOLDEN_RING_INNER_SE!r}",
35443566
)
35453567
outer = res.spillover_effects.loc["[50, 200]"]
3546-
assert outer["coef"] == self._WAVE_B_GOLDEN_RING_OUTER_COEF
3547-
assert outer["se"] == self._WAVE_B_GOLDEN_RING_OUTER_SE
3568+
np.testing.assert_allclose(
3569+
outer["coef"], self._WAVE_B_GOLDEN_RING_OUTER_COEF, rtol=1e-14, atol=1e-14
3570+
)
3571+
np.testing.assert_allclose(
3572+
outer["se"], self._WAVE_B_GOLDEN_RING_OUTER_SE, rtol=1e-14, atol=1e-14
3573+
)
35483574

35493575
def test_event_study_false_bit_identical_to_wave_b_fixture(self):
35503576
df = generate_butts_nonstaggered_dgp(seed=42)

0 commit comments

Comments
 (0)