Skip to content

Commit d59039e

Browse files
authored
Merge pull request #505 from igerber/fix/ci-pure-python-runtime
Right-size pure-Python CI test runtime (SyntheticControl regression + SDID safe trims)
2 parents d0a8752 + f16c417 commit d59039e

2 files changed

Lines changed: 105 additions & 23 deletions

File tree

tests/test_methodology_sdid.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3076,13 +3076,17 @@ def test_baseline_parity_small_scale(self, variance_method):
30763076
assert len(r.placebo_effects) == n0
30773077

30783078
@pytest.mark.parametrize("variance_method", ["placebo", "bootstrap", "jackknife"])
3079-
def test_scale_equivariance(self, variance_method):
3079+
def test_scale_equivariance(self, variance_method, ci_params):
30803080
"""τ/a, SE/|a|, p-value, and n_successful must be invariant under
30813081
(Y → a*Y + b) across ~15 orders of magnitude."""
3082+
# Pure invariance check (baseline captured at runtime, not vs _BASELINE), so the
3083+
# absolute n_bootstrap is irrelevant: r0 and the scaled refits all use the same
3084+
# (ci_params-scaled in pure-Python, 200 under Rust) count, preserving equivariance.
3085+
nb = ci_params.bootstrap(200)
30823086
data = _make_panel(seed=42)
30833087
with warnings.catch_warnings():
30843088
warnings.simplefilter("ignore", UserWarning)
3085-
r0 = self._fit(data, variance_method)
3089+
r0 = self._fit(data, variance_method, n_bootstrap=nb)
30863090
att0, se0, p0 = r0.att, r0.se, r0.p_value
30873091
n0 = len(r0.placebo_effects)
30883092
noise0 = r0.noise_level
@@ -3092,7 +3096,7 @@ def test_scale_equivariance(self, variance_method):
30923096
scaled = self._rescale(data, a, b)
30933097
with warnings.catch_warnings():
30943098
warnings.simplefilter("ignore", UserWarning)
3095-
r = self._fit(scaled, variance_method)
3099+
r = self._fit(scaled, variance_method, n_bootstrap=nb)
30963100
# Variance-method success count must be identical; divergence
30973101
# would shift the empirical p-value floor 1/(n+1).
30983102
assert len(r.placebo_effects) == n0, (
@@ -3172,13 +3176,15 @@ class TestPValueSemantics:
31723176
null draws either and also use the analytical p-value.
31733177
"""
31743178

3175-
def test_bootstrap_p_value_matches_analytical(self):
3179+
def test_bootstrap_p_value_matches_analytical(self, ci_params):
31763180
"""Bootstrap p-value must equal safe_inference(att, se)[1]."""
3181+
# Self-consistency check (reported p vs the analytical formula on the reported se) —
3182+
# independent of the bootstrap draw count, so ci_params scaling is safe.
31773183
df = _make_panel(seed=42)
31783184
with warnings.catch_warnings():
31793185
warnings.simplefilter("ignore", UserWarning)
31803186
r = SyntheticDiD(
3181-
variance_method="bootstrap", n_bootstrap=200, seed=1
3187+
variance_method="bootstrap", n_bootstrap=ci_params.bootstrap(200), seed=1
31823188
).fit(
31833189
df, outcome="outcome", treatment="treated",
31843190
unit="unit", time="period",
@@ -3189,13 +3195,15 @@ def test_bootstrap_p_value_matches_analytical(self):
31893195
f"bootstrap p_value={r.p_value} != analytical {expected_p}"
31903196
)
31913197

3192-
def test_placebo_p_value_uses_empirical_formula(self):
3198+
def test_placebo_p_value_uses_empirical_formula(self, ci_params):
31933199
"""Placebo p-value must equal max(mean(|draws| >= |att|), 1/(r+1))."""
3200+
# Self-consistency check (reported p vs the empirical formula on the reported
3201+
# placebo_effects) — independent of the draw count, so ci_params scaling is safe.
31943202
df = _make_panel(seed=42)
31953203
with warnings.catch_warnings():
31963204
warnings.simplefilter("ignore", UserWarning)
31973205
r = SyntheticDiD(
3198-
variance_method="placebo", n_bootstrap=200, seed=1
3206+
variance_method="placebo", n_bootstrap=ci_params.bootstrap(200), seed=1
31993207
).fit(
32003208
df, outcome="outcome", treatment="treated",
32013209
unit="unit", time="period",

tests/test_methodology_synthetic_control.py

Lines changed: 90 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,31 @@
4747
]
4848

4949

50+
# ---------------------------------------------------------------------------
51+
# Cheap optimizer settings for behavior tests (pure-Python CI speed)
52+
# ---------------------------------------------------------------------------
53+
# Behavior tests only need a VALID, cleanly-converged fit, not data-driven V quality.
54+
# The production nested defaults (n_starts=4, inner_max_iter=10000, inner_min_decrease=1e-5)
55+
# cost 30-150s per *pure-Python* fit because the inner Frank-Wolfe solve grinds its slow
56+
# sublinear tail to hit the tight tolerance on every objective evaluation. Loosening the
57+
# inner tolerance + a single start + a small outer cap gives a clean ~0.1s fit without
58+
# changing what these tests assert. Pure-Python coverage of the production-default nested
59+
# path (n_starts=4 with the _v_starts heuristic candidates + the tight inner_min_decrease=1e-5)
60+
# is kept by the dedicated non-slow ``test_nested_production_defaults_smoke`` (a 2-donor panel
61+
# whose inner FW simplex is ~1-D, so defaults stay <0.1s). The @slow Tier-2 Basque test
62+
# additionally covers the defaults in the Rust matrix, and the Rust<->numpy Frank-Wolfe kernel
63+
# equivalence is locked by tests/test_rust_backend.py::test_sc_weight_fw_matches_numpy.
64+
#
65+
# NB: inner_max_iter is deliberately LEFT AT DEFAULT here — the speedup comes from the
66+
# looser tolerance letting FW terminate on *convergence* (not on an iteration cap), so the
67+
# solve stays clean (no non-convergence warning). Do NOT fold inner_max_iter into _FAST or
68+
# the inner-non-convergence warning starts firing spuriously.
69+
_FAST = dict(n_starts=1, optimizer_options={"maxiter": 50}, inner_min_decrease=1e-3)
70+
# Churn tests deliberately force inner non-convergence (inner_max_iter=1); KEEP that and only
71+
# cap the outer optimizer so it does not iterate to maxiter on the flat penalty landscape.
72+
_FAST_CHURN = dict(n_starts=1, optimizer_options={"maxiter": 5})
73+
74+
5075
# ---------------------------------------------------------------------------
5176
# Synthetic panel builders (fast; no R needed)
5277
# ---------------------------------------------------------------------------
@@ -197,8 +222,12 @@ def test_post_periods_canonicalized_and_gap_order_independent():
197222
df, years, T0 = _make_panel()
198223
ordered = years[T0:]
199224
scrambled = list(reversed(ordered)) + [ordered[-1]] # unsorted + duplicate
200-
r1 = synthetic_control(df, "y", "treated", "unit", "year", post_periods=ordered, seed=0)
201-
r2 = synthetic_control(df, "y", "treated", "unit", "year", post_periods=scrambled, seed=0)
225+
r1 = synthetic_control(
226+
df, "y", "treated", "unit", "year", post_periods=ordered, seed=0, **_FAST
227+
)
228+
r2 = synthetic_control(
229+
df, "y", "treated", "unit", "year", post_periods=scrambled, seed=0, **_FAST
230+
)
202231
assert r1.post_periods == r2.post_periods == ordered
203232
assert abs(r1.att - r2.att) < 1e-12
204233
gdf = r2.get_gap_df()
@@ -214,7 +243,9 @@ def test_post_periods_canonicalized_and_gap_order_independent():
214243

215244
def test_donor_pool_restricts_donors():
216245
df, years, T0 = _make_panel(n_donors=4)
217-
res = synthetic_control(df, "y", "treated", "unit", "year", donor_pool=["d0", "d1"], seed=0)
246+
res = synthetic_control(
247+
df, "y", "treated", "unit", "year", donor_pool=["d0", "d1"], seed=0, **_FAST
248+
)
218249
assert res.n_donors == 2
219250
assert set(res.get_weights_df()["unit"]) <= {"d0", "d1"}
220251

@@ -309,8 +340,19 @@ def test_outer_v_nonconvergence_warning():
309340
# Outer V-search non-convergence must not be silent (optimizer capped at 1 iter).
310341
df, _, _ = _make_panel()
311342
with pytest.warns(UserWarning, match="Outer V-search"):
343+
# maxiter=1 forces the OUTER non-convergence; n_starts=1 + a loose inner tolerance
344+
# keep the (still-real) inner solves cheap. Loosening inner_min_decrease does not
345+
# affect whether the outer optimizer hits its 1-iteration cap.
312346
synthetic_control(
313-
df, "y", "treated", "unit", "year", seed=0, optimizer_options={"maxiter": 1}
347+
df,
348+
"y",
349+
"treated",
350+
"unit",
351+
"year",
352+
seed=0,
353+
n_starts=1,
354+
optimizer_options={"maxiter": 1},
355+
inner_min_decrease=1e-3,
314356
)
315357

316358

@@ -319,7 +361,9 @@ def test_inner_v_search_nonconvergence_warning():
319361
# inner_max_iter=1 makes them truncate, and the estimator emits an aggregated warning.
320362
df, _, _ = _make_panel()
321363
with pytest.warns(UserWarning, match="during nested V selection"):
322-
synthetic_control(df, "y", "treated", "unit", "year", seed=0, inner_max_iter=1)
364+
synthetic_control(
365+
df, "y", "treated", "unit", "year", seed=0, inner_max_iter=1, **_FAST_CHURN
366+
)
323367

324368

325369
def test_single_inner_nonconvergence_excluded_from_v_ranking(monkeypatch):
@@ -348,7 +392,7 @@ def patched(X1s, X0s, v, max_iter, min_decrease):
348392

349393
monkeypatch.setattr(sc, "_inner_solve_W", patched)
350394
with pytest.warns(UserWarning, match="during nested V selection"):
351-
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0)
395+
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
352396

353397
assert state["failed"] # the patch actually fired on an objective evaluation
354398
assert np.isfinite(res.att)
@@ -361,11 +405,37 @@ def test_n_starts_one_runs():
361405
# n_starts=1 uses only the uniform start (short-circuits the heuristic candidates)
362406
# and still produces a valid nested fit.
363407
df, _, _ = _make_panel()
364-
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, n_starts=1)
408+
res = synthetic_control(
409+
df,
410+
"y",
411+
"treated",
412+
"unit",
413+
"year",
414+
seed=0,
415+
n_starts=1,
416+
optimizer_options={"maxiter": 50},
417+
inner_min_decrease=1e-3,
418+
)
365419
assert np.isfinite(res.att)
366420
assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-6
367421

368422

423+
def test_nested_production_defaults_smoke():
424+
# Coverage anchor: exercise the FULL production-default nested path end-to-end in
425+
# pure-Python — n_starts=4 (so the _v_starts heuristic candidates: inverse-variance,
426+
# univariate-fit and Dirichlet starts are generated, which the n_starts=1 _FAST tests
427+
# skip) and the tight inner_min_decrease=1e-5. A 2-donor panel keeps the inner
428+
# Frank-Wolfe simplex effectively 1-D, so the default settings still run in <0.1s and
429+
# this stays non-slow. The @slow Tier-2 Basque test covers the defaults only in the Rust
430+
# matrix; this is the pure-Python complement.
431+
df, _, _ = _make_panel(n_donors=2)
432+
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0) # production defaults
433+
assert np.isfinite(res.att)
434+
assert abs(sum(res.donor_weights.values()) - 1.0) < 1e-6
435+
assert res.n_donors == 2
436+
assert res.mspe_v is not None # nested V was selected by minimizing pre-period MSPE
437+
438+
369439
def test_non_finite_outcome_rejected():
370440
df, years, T0 = _make_panel()
371441
df = df.copy()
@@ -378,7 +448,7 @@ def test_distinct_special_period_sets_not_duplicate():
378448
# Same var/op, same endpoints + length, different intermediate period -> distinct
379449
# predictors, must NOT be rejected as duplicates.
380450
df, years, T0 = _make_panel(T=8, T0=6)
381-
res = SyntheticControl(seed=0).fit(
451+
res = SyntheticControl(seed=0, **_FAST).fit(
382452
df,
383453
"y",
384454
"treated",
@@ -423,6 +493,7 @@ def test_duplicate_predictor_window_periods_deduped():
423493
predictors=["y"],
424494
predictor_window=[years[0], years[0], years[1]],
425495
seed=0,
496+
**_FAST,
426497
)
427498
r_uniq = synthetic_control(
428499
df,
@@ -433,6 +504,7 @@ def test_duplicate_predictor_window_periods_deduped():
433504
predictors=["y"],
434505
predictor_window=[years[0], years[1]],
435506
seed=0,
507+
**_FAST,
436508
)
437509
assert abs(r_dup.att - r_uniq.att) < 1e-9
438510

@@ -465,7 +537,7 @@ def test_poor_fit_warning():
465537
rows.append({"unit": "treated", "year": yr, "y": 50 + 2.0 * t, "treated": int(t >= T0)})
466538
df = pd.DataFrame(rows)
467539
with pytest.warns(UserWarning, match="Pre-treatment fit is poor"):
468-
synthetic_control(df, "y", "treated", "unit", "year", seed=0)
540+
synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
469541

470542

471543
def test_poor_fit_warning_flat_treated_pre_path():
@@ -484,7 +556,7 @@ def test_poor_fit_warning_flat_treated_pre_path():
484556
)
485557
df = pd.DataFrame(rows)
486558
with pytest.warns(UserWarning, match="Pre-treatment fit is poor"):
487-
synthetic_control(df, "y", "treated", "unit", "year", seed=0)
559+
synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
488560

489561

490562
# ---------------------------------------------------------------------------
@@ -520,7 +592,7 @@ def test_duplicate_regular_predictor_rejected():
520592
def test_inner_nonconvergence_warning():
521593
df, _, _ = _make_panel(n_donors=4)
522594
with pytest.warns(UserWarning, match="did not converge"):
523-
SyntheticControl(seed=0, v_method="nested", inner_max_iter=1).fit(
595+
SyntheticControl(seed=0, v_method="nested", inner_max_iter=1, **_FAST_CHURN).fit(
524596
df, "y", "treated", "unit", "year"
525597
)
526598

@@ -532,7 +604,7 @@ def test_inner_nonconvergence_warning():
532604

533605
def test_standardize_none_runs():
534606
df, _, _ = _make_panel()
535-
res = synthetic_control(df, "y", "treated", "unit", "year", standardize="none", seed=0)
607+
res = synthetic_control(df, "y", "treated", "unit", "year", standardize="none", seed=0, **_FAST)
536608
assert res.standardize == "none"
537609
assert np.isfinite(res.att)
538610

@@ -652,7 +724,7 @@ def test_set_params_rolls_back_on_invalid():
652724

653725
def test_nan_inference_contract():
654726
df, _, _ = _make_panel()
655-
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0)
727+
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
656728
assert_nan_inference(
657729
{"se": res.se, "t_stat": res.t_stat, "p_value": res.p_value, "conf_int": res.conf_int}
658730
)
@@ -661,7 +733,7 @@ def test_nan_inference_contract():
661733

662734
def test_result_accessors_render():
663735
df, _, _ = _make_panel()
664-
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0)
736+
res = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
665737
assert isinstance(res, SyntheticControlResults)
666738
assert isinstance(res.summary(), str) and "Synthetic Control" in res.summary()
667739
assert "att" in res.to_dict()
@@ -676,8 +748,10 @@ def test_result_accessors_render():
676748

677749
def test_inferred_post_matches_explicit():
678750
df, years, T0 = _make_panel()
679-
r_inf = synthetic_control(df, "y", "treated", "unit", "year", seed=0)
680-
r_exp = synthetic_control(df, "y", "treated", "unit", "year", post_periods=years[T0:], seed=0)
751+
r_inf = synthetic_control(df, "y", "treated", "unit", "year", seed=0, **_FAST)
752+
r_exp = synthetic_control(
753+
df, "y", "treated", "unit", "year", post_periods=years[T0:], seed=0, **_FAST
754+
)
681755
assert r_inf.post_periods == r_exp.post_periods == years[T0:]
682756
assert abs(r_inf.att - r_exp.att) < 1e-12
683757

0 commit comments

Comments
 (0)