Skip to content

Commit 3d18f7e

Browse files
igerberclaude
andcommitted
spillover-tva: address CI codex R4 P2+P3 — verbatim-sync + geometry pins
CI codex R4 caught two real gaps in the drift coverage: P2 (Maintainability): the test file's docstring claims the DGP-builder is duplicated "verbatim" from the notebook §2 cell, but only the parameter CONSTANTS are pinned (`test_dgp_true_parameters_match_quoted`). Non-constant edits (coordinate ranges, lambda_t, row construction) could drift silently if the headline numbers stay within tolerance. Adds `test_notebook_dgp_ast_matches_test_fixture` — parses the notebook JSON, extracts the §2 `build_t23_panel` FunctionDef, and compares its AST (with docstring stripped, function name normalized) against `_build_t23_panel`'s. Uses `ast.dump` for whitespace- and comment-agnostic semantic equality. Any DGP-logic divergence between the two copies now fails loudly; cosmetic-only edits (whitespace, comments) don't trigger spurious failures. P3 (Documentation/Tests): §2 quotes seed-specific geometry numbers (max ~12 km, cluster diameter ~22 km, near 12-82 km, far 224-331 km) and §6 quotes pair-support percentages (far×far ~95% within 100 km, near×near 100%). Drift tests only pinned the band counts and lat bounds, so those prose details could drift silently. Adds `test_seed_specific_geometry_pins_match_quoted` — recomputes each quoted value from the seed-23 panel using haversine-deg-to-km arithmetic and asserts they match the notebook narrative integers (rounded). If a future RNG/geometry change shifts any number outside the rounded value, the test fails and the maintainer must update either the prose or the layout parameters. 20 drift tests pass (16 → 20: +AST sync, +geometry pin, + `test_rings_grid_d_bar_100_to_200_identical_delta_1` from R3, + the R3 §6 warning-policy guard). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e1e0ba5 commit 3d18f7e

1 file changed

Lines changed: 117 additions & 0 deletions

File tree

tests/test_t23_spillover_tva_drift.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,123 @@ def test_summary_renders_without_warning(spillover_fit):
363363
assert len(out) > 0
364364

365365

366+
def test_notebook_dgp_ast_matches_test_fixture():
367+
"""P2 sync guard: enforces the "verbatim" duplication claim by
368+
parsing the notebook's §2 ``build_t23_panel`` definition and
369+
asserting that, modulo function name (notebook: ``build_t23_panel``;
370+
test: ``_build_t23_panel``) and docstring, its AST matches the test
371+
fixture's. Catches silent drift in non-constant DGP logic (coordinate
372+
ranges, lambda_t, row construction) that the numerical-value pins
373+
don't see — codex R4 P2 flagged this gap.
374+
375+
Uses ``ast.dump`` for a whitespace-/comment-agnostic comparison:
376+
semantically identical code matches, cosmetic edits don't trigger
377+
spurious failures."""
378+
import ast
379+
import inspect
380+
import json
381+
from pathlib import Path
382+
383+
nb_path = Path(__file__).resolve().parents[1] / "docs" / "tutorials" / "23_spillover_tva.ipynb"
384+
with nb_path.open() as f:
385+
nb = json.load(f)
386+
387+
matches = [
388+
c
389+
for c in nb["cells"]
390+
if c["cell_type"] == "code" and any("def build_t23_panel" in s for s in c["source"])
391+
]
392+
assert len(matches) == 1, (
393+
f"Expected exactly one notebook code cell defining `build_t23_panel`; "
394+
f"found {len(matches)}. If you renamed or split the §2 DGP cell, "
395+
f"update this test's cell-locator."
396+
)
397+
nb_cell_src = "".join(matches[0]["source"])
398+
399+
def _extract_normalized_fn(src: str, fn_name: str) -> str:
400+
"""Parse `src`, find FunctionDef `fn_name`, strip its docstring,
401+
rename it to the canonical `build_t23_panel`, and return the
402+
normalized AST dump."""
403+
tree = ast.parse(src)
404+
fn = next(
405+
(n for n in ast.walk(tree) if isinstance(n, ast.FunctionDef) and n.name == fn_name),
406+
None,
407+
)
408+
assert fn is not None, f"Could not find FunctionDef `{fn_name}` in source"
409+
if (
410+
fn.body
411+
and isinstance(fn.body[0], ast.Expr)
412+
and isinstance(fn.body[0].value, ast.Constant)
413+
and isinstance(fn.body[0].value.value, str)
414+
):
415+
fn.body = fn.body[1:]
416+
fn.name = "build_t23_panel"
417+
return ast.dump(fn, annotate_fields=True, include_attributes=False)
418+
419+
nb_norm = _extract_normalized_fn(nb_cell_src, "build_t23_panel")
420+
test_norm = _extract_normalized_fn(inspect.getsource(_build_t23_panel), "_build_t23_panel")
421+
422+
assert nb_norm == test_norm, (
423+
f"Notebook §2 DGP cell drifted from test fixture `_build_t23_panel`.\n"
424+
f"--- notebook AST ---\n{nb_norm[:400]}...\n"
425+
f"--- test AST ---\n{test_norm[:400]}...\n"
426+
f"Update one or both so the function bodies match modulo name + docstring."
427+
)
428+
429+
430+
def test_seed_specific_geometry_pins_match_quoted(panel):
431+
"""P3 sync guard: the §2 panel-layout table and §6 within-cutoff
432+
enumeration quote seed-specific geometry numbers (max distance from
433+
origin, cluster diameter, band extents, far×far / near×near
434+
pair-within-100km percentages). The drift test pins all the values
435+
quoted in the notebook so prose can't go stale even if the headline
436+
estimates remain within tolerance — codex R4 P3 flagged this gap."""
437+
treated = panel[panel["ever_treated"] == 1].drop_duplicates("unit")
438+
near = panel[(panel["ever_treated"] == 0) & (panel["lat"] <= 1.0)].drop_duplicates("unit")
439+
far = panel[(panel["ever_treated"] == 0) & (panel["lat"] > 1.0)].drop_duplicates("unit")
440+
deg_to_km = 111.0
441+
442+
def _max_dist_from_origin_km(d):
443+
return float(np.sqrt(d["lat"] ** 2 + d["lon"] ** 2).max() * deg_to_km)
444+
445+
def _min_dist_from_origin_km(d):
446+
return float(np.sqrt(d["lat"] ** 2 + d["lon"] ** 2).min() * deg_to_km)
447+
448+
def _band_diameter_km(d):
449+
lats = d["lat"].values
450+
lons = d["lon"].values
451+
diffs = np.sqrt((lats[:, None] - lats[None, :]) ** 2 + (lons[:, None] - lons[None, :]) ** 2)
452+
return float(diffs.max() * deg_to_km)
453+
454+
def _pct_pairs_within_100km(d):
455+
lats = d["lat"].values
456+
lons = d["lon"].values
457+
n = len(lats)
458+
dist = (
459+
np.sqrt((lats[:, None] - lats[None, :]) ** 2 + (lons[:, None] - lons[None, :]) ** 2)
460+
* deg_to_km
461+
)
462+
triu = np.triu(np.ones((n, n), dtype=bool), k=1)
463+
pair_d = dist[triu]
464+
return float((pair_d <= 100.0).sum() / len(pair_d) * 100.0)
465+
466+
# §2 quoted: "clustered around (0,0); max ~12 km from origin, cluster diameter ~22 km at seed 23"
467+
assert round(_max_dist_from_origin_km(treated)) == 12, _max_dist_from_origin_km(treated)
468+
assert round(_band_diameter_km(treated)) == 22, _band_diameter_km(treated)
469+
# §2 quoted: "~12-82 km north"
470+
assert round(_min_dist_from_origin_km(near)) == 12, _min_dist_from_origin_km(near)
471+
assert round(_max_dist_from_origin_km(near)) == 82, _max_dist_from_origin_km(near)
472+
# §2 quoted: "~224-331 km north"
473+
assert round(_min_dist_from_origin_km(far)) == 224, _min_dist_from_origin_km(far)
474+
assert round(_max_dist_from_origin_km(far)) == 331, _max_dist_from_origin_km(far)
475+
# §6 quoted: "lat extent is ~131 km" for far band
476+
assert round(_band_diameter_km(far)) == 131, _band_diameter_km(far)
477+
# §6 quoted: "100% of within-band pairs are within 100 km" for near band
478+
assert round(_pct_pairs_within_100km(near)) == 100, _pct_pairs_within_100km(near)
479+
# §6 quoted: "~95% of within-band pair distances are within 100 km" for far band
480+
assert round(_pct_pairs_within_100km(far)) == 95, _pct_pairs_within_100km(far)
481+
482+
366483
def _assert_post_filter_warning_surface_is_clean(captured) -> None:
367484
"""Shared T19-style platform-agnostic warning-policy assertion.
368485

0 commit comments

Comments
 (0)