|
| 1 | +# Generate cross-language end-to-end parity fixture for HAD Phase 4 |
| 2 | +# (PR #389 R-parity vs `Credible-Answers/did_had`). |
| 3 | +# |
| 4 | +# Purpose: validate Python `HeterogeneousAdoptionDiD.fit()` (overall, |
| 5 | +# event-study, placebo, yatchew, trends_lin) against R `DIDHAD::did_had()` |
| 6 | +# bit-exactly on shared input. The R package is the methodology source |
| 7 | +# of truth (the de Chaisemartin team wrote it); matching it within |
| 8 | +# `atol=1e-8` on point/SE/CI and `atol=1e-10` on closed-form Yatchew |
| 9 | +# T-stats is a strictly stronger correctness signal than reproducing the |
| 10 | +# paper's published Pierce-Schott numbers (which depend on a |
| 11 | +# LBD-restricted analysis panel). |
| 12 | +# |
| 13 | +# Usage: |
| 14 | +# Rscript benchmarks/R/generate_did_had_golden.R |
| 15 | +# |
| 16 | +# Output: |
| 17 | +# benchmarks/data/did_had_golden.json |
| 18 | +# |
| 19 | +# Phase 4 of HeterogeneousAdoptionDiD (de Chaisemartin et al. 2025). |
| 20 | +# Python test loader: tests/test_did_had_parity.py. |
| 21 | +# |
| 22 | +# Pin: DIDHAD == 2.0.0 (CRAN current as of 2026-04). YatchewTest >= 1.1.0. |
| 23 | + |
| 24 | +library(jsonlite) |
| 25 | +library(DIDHAD) |
| 26 | +library(YatchewTest) |
| 27 | + |
| 28 | +stopifnot(packageVersion("DIDHAD") >= "2.0.0") |
| 29 | +stopifnot(packageVersion("YatchewTest") >= "1.1.0") |
| 30 | + |
| 31 | +# ------------------------------------------------------------------------- |
| 32 | +# Panel builder: 5-period panel with F=4 (treatment onset at t=4). |
| 33 | +# Pre-periods: 1, 2, 3 (D=0). Post-periods: 4, 5 (D=fixed positive dose). |
| 34 | +# Y[g, t] = unit_fe[g] + trend[g] * (t - 1) + (dose[g] + dose[g]^2) * (t >= F) + noise |
| 35 | +# ------------------------------------------------------------------------- |
| 36 | + |
| 37 | +build_panel <- function(G, F_treat, T_periods, dose_draws, seed, |
| 38 | + unit_trend_sd = 0.05, noise_sd = 0.5) { |
| 39 | + set.seed(seed) |
| 40 | + n <- G * T_periods |
| 41 | + unit_fe <- rnorm(G, mean = 0, sd = 1.0) |
| 42 | + unit_trend <- rnorm(G, mean = 0.1, sd = unit_trend_sd) |
| 43 | + noise <- rnorm(n, mean = 0, sd = noise_sd) |
| 44 | + |
| 45 | + rows <- vector("list", n) |
| 46 | + k <- 1 |
| 47 | + for (g in seq_len(G)) { |
| 48 | + for (t in seq_len(T_periods)) { |
| 49 | + treated <- as.numeric(t >= F_treat) |
| 50 | + y <- unit_fe[g] + unit_trend[g] * (t - 1) + |
| 51 | + (dose_draws[g] + dose_draws[g]^2) * treated + |
| 52 | + noise[k] |
| 53 | + d_obs <- if (treated == 1) dose_draws[g] else 0.0 |
| 54 | + # Use short column names (g, t, d, y) matching DIDHAD's tutorial |
| 55 | + # convention. The package has a data-masking issue when column |
| 56 | + # names alias the formal parameter names (e.g., column "time" with |
| 57 | + # `time = "time"` resolves to the column values inside dplyr's |
| 58 | + # `.data[[get("time")]]` lookup), so avoid that overlap upstream. |
| 59 | + rows[[k]] <- data.frame( |
| 60 | + g = g, |
| 61 | + t = t, |
| 62 | + y = y, |
| 63 | + d = d_obs, |
| 64 | + stringsAsFactors = FALSE |
| 65 | + ) |
| 66 | + k <- k + 1 |
| 67 | + } |
| 68 | + } |
| 69 | + do.call(rbind, rows) |
| 70 | +} |
| 71 | + |
| 72 | +# DGP 1: D ~ Uniform(0, 1). |
| 73 | +dgp_uniform <- function(G = 200, F_treat = 4, T_periods = 5, seed = 20260426) { |
| 74 | + set.seed(seed * 2L + 1L) |
| 75 | + d <- runif(G, min = 0.0, max = 1.0) |
| 76 | + list( |
| 77 | + name = "uniform_G200_F4_T5", |
| 78 | + panel = build_panel(G, F_treat, T_periods, d, seed = seed), |
| 79 | + G = G, F_treat = F_treat, T_periods = T_periods, |
| 80 | + dose_distribution = "Uniform(0, 1)", |
| 81 | + seed = seed |
| 82 | + ) |
| 83 | +} |
| 84 | + |
| 85 | +# DGP 2: D ~ Beta(2, 2). Symmetric, bell-shaped on [0, 1]. |
| 86 | +dgp_beta22 <- function(G = 200, F_treat = 4, T_periods = 5, seed = 20260426) { |
| 87 | + set.seed(seed * 2L + 2L) |
| 88 | + d <- rbeta(G, shape1 = 2, shape2 = 2) |
| 89 | + list( |
| 90 | + name = "beta22_G200_F4_T5", |
| 91 | + panel = build_panel(G, F_treat, T_periods, d, seed = seed), |
| 92 | + G = G, F_treat = F_treat, T_periods = T_periods, |
| 93 | + dose_distribution = "Beta(2, 2)", |
| 94 | + seed = seed |
| 95 | + ) |
| 96 | +} |
| 97 | + |
| 98 | +# DGP 3: D ~ Beta(0.5, 1). Heavy left tail (mass near 0); approximates |
| 99 | +# the empirical Pierce-Schott NTR-gap distribution where many industries |
| 100 | +# have small tariff gaps (boundary density vanishes property). |
| 101 | +dgp_boundary <- function(G = 200, F_treat = 4, T_periods = 5, seed = 20260426) { |
| 102 | + set.seed(seed * 2L + 3L) |
| 103 | + d <- rbeta(G, shape1 = 0.5, shape2 = 1.0) |
| 104 | + list( |
| 105 | + name = "boundary_G200_F4_T5", |
| 106 | + panel = build_panel(G, F_treat, T_periods, d, seed = seed), |
| 107 | + G = G, F_treat = F_treat, T_periods = T_periods, |
| 108 | + dose_distribution = "Beta(0.5, 1)", |
| 109 | + seed = seed |
| 110 | + ) |
| 111 | +} |
| 112 | + |
| 113 | +# ------------------------------------------------------------------------- |
| 114 | +# Run did_had with given options and extract the standardized result |
| 115 | +# matrix. The R package returns a `did_had` S3 object whose `results` |
| 116 | +# slot has `resmat` (effects + placebos) and optionally `yatchew_test`. |
| 117 | +# ------------------------------------------------------------------------- |
| 118 | + |
| 119 | +run_did_had <- function(panel, effects = 1, placebo = 0, |
| 120 | + trends_lin = FALSE, yatchew = FALSE) { |
| 121 | + # graph_off=TRUE suppresses the auto-print of the event-study plot. |
| 122 | + fit <- did_had( |
| 123 | + df = panel, |
| 124 | + outcome = "y", |
| 125 | + group = "g", |
| 126 | + time = "t", |
| 127 | + treatment = "d", |
| 128 | + effects = effects, |
| 129 | + placebo = placebo, |
| 130 | + trends_lin = trends_lin, |
| 131 | + yatchew = yatchew, |
| 132 | + graph_off = TRUE |
| 133 | + ) |
| 134 | + res <- fit$results |
| 135 | + resmat <- res$resmat |
| 136 | + out <- list( |
| 137 | + n_effects_actual = res$res.effects, |
| 138 | + n_placebo_actual = res$res.placebo, |
| 139 | + rownames = rownames(resmat), |
| 140 | + estimate = unname(resmat[, "Estimate"]), |
| 141 | + se = unname(resmat[, "SE"]), |
| 142 | + ci_lo = unname(resmat[, "LB.CI"]), |
| 143 | + ci_hi = unname(resmat[, "UB.CI"]), |
| 144 | + n_per_horizon = unname(as.integer(resmat[, "N"])), |
| 145 | + bw_per_horizon = unname(resmat[, "BW"]), |
| 146 | + n_within_bw = unname(as.integer(resmat[, "N.BW"])), |
| 147 | + qug_t = unname(resmat[, "T"]), |
| 148 | + qug_p = unname(resmat[, "p.val"]), |
| 149 | + event_id = unname(as.integer(resmat[, "ID"])) |
| 150 | + ) |
| 151 | + if (yatchew) { |
| 152 | + yt <- res$yatchew_test |
| 153 | + out$yatchew_t <- unname(yt[, "T_hr"]) |
| 154 | + out$yatchew_p <- unname(yt[, "p-value"]) |
| 155 | + out$yatchew_n <- unname(as.integer(yt[, "N"])) |
| 156 | + # Capture sigma2 components for diagnostic comparison; the column |
| 157 | + # names contain unicode (sigma², σ²). Use positional indexing. |
| 158 | + out$yatchew_sigma2_lin <- unname(yt[, 1]) |
| 159 | + out$yatchew_sigma2_diff <- unname(yt[, 2]) |
| 160 | + } |
| 161 | + out |
| 162 | +} |
| 163 | + |
| 164 | +# ------------------------------------------------------------------------- |
| 165 | +# Build the DGP × method-combo fixture grid. |
| 166 | +# ------------------------------------------------------------------------- |
| 167 | + |
| 168 | +dgp_builders <- list( |
| 169 | + uniform = dgp_uniform, |
| 170 | + beta22 = dgp_beta22, |
| 171 | + boundary = dgp_boundary |
| 172 | +) |
| 173 | + |
| 174 | +# Per-DGP method matrix. Each combo runs did_had with the named flags |
| 175 | +# and stores the resulting standardized resmat dict alongside the input |
| 176 | +# panel arrays. Python parity test loops over combos and asserts. |
| 177 | +# |
| 178 | +# Why effects=2/placebo=2: F=4 with T=5 leaves 2 post-period horizons |
| 179 | +# (t=4, 5) and 2 pre-period placebos (t=2, 1) without trends_lin. R |
| 180 | +# auto-truncates if requested > feasible. Under trends_lin, the |
| 181 | +# F-2 -> F-1 evolution is consumed by the slope estimator and R reduces |
| 182 | +# max placebo by 1 (so only placebo at t=1 survives). |
| 183 | +combos <- list( |
| 184 | + list(name = "overall_e1", effects = 1, placebo = 0, |
| 185 | + trends_lin = FALSE, yatchew = FALSE), |
| 186 | + list(name = "event_e2_p2", effects = 2, placebo = 2, |
| 187 | + trends_lin = FALSE, yatchew = FALSE), |
| 188 | + list(name = "event_e2_p2_yatchew", effects = 2, placebo = 2, |
| 189 | + trends_lin = FALSE, yatchew = TRUE), |
| 190 | + list(name = "event_e2_p2_trendslin", effects = 2, placebo = 2, |
| 191 | + trends_lin = TRUE, yatchew = FALSE), |
| 192 | + list(name = "event_e2_p2_yatchew_trendslin", effects = 2, placebo = 2, |
| 193 | + trends_lin = TRUE, yatchew = TRUE) |
| 194 | +) |
| 195 | + |
| 196 | +fixtures <- list() |
| 197 | +for (dgp_name in names(dgp_builders)) { |
| 198 | + dgp <- dgp_builders[[dgp_name]]() |
| 199 | + panel <- dgp$panel |
| 200 | + combo_results <- list() |
| 201 | + for (combo in combos) { |
| 202 | + res <- run_did_had( |
| 203 | + panel = panel, |
| 204 | + effects = combo$effects, |
| 205 | + placebo = combo$placebo, |
| 206 | + trends_lin = combo$trends_lin, |
| 207 | + yatchew = combo$yatchew |
| 208 | + ) |
| 209 | + combo_results[[combo$name]] <- list( |
| 210 | + effects = combo$effects, |
| 211 | + placebo = combo$placebo, |
| 212 | + trends_lin = combo$trends_lin, |
| 213 | + yatchew = combo$yatchew, |
| 214 | + result = res |
| 215 | + ) |
| 216 | + } |
| 217 | + fixtures[[dgp$name]] <- list( |
| 218 | + name = dgp$name, |
| 219 | + G = dgp$G, |
| 220 | + F = dgp$F_treat, |
| 221 | + T = dgp$T_periods, |
| 222 | + dose_distribution = dgp$dose_distribution, |
| 223 | + seed = dgp$seed, |
| 224 | + panel = list( |
| 225 | + g = panel$g, |
| 226 | + t = panel$t, |
| 227 | + y = panel$y, |
| 228 | + d = panel$d |
| 229 | + ), |
| 230 | + combos = combo_results |
| 231 | + ) |
| 232 | +} |
| 233 | + |
| 234 | +# ------------------------------------------------------------------------- |
| 235 | +# Serialize |
| 236 | +# ------------------------------------------------------------------------- |
| 237 | + |
| 238 | +out <- list( |
| 239 | + metadata = list( |
| 240 | + description = paste( |
| 241 | + "DIDHAD::did_had end-to-end parity fixture for HAD Phase 4", |
| 242 | + "(PR #389 R-parity).", |
| 243 | + sep = " " |
| 244 | + ), |
| 245 | + didhad_version = as.character(packageVersion("DIDHAD")), |
| 246 | + yatchewtest_version = as.character(packageVersion("YatchewTest")), |
| 247 | + nprobust_version = as.character(packageVersion("nprobust")), |
| 248 | + r_version = as.character(getRversion()), |
| 249 | + n_dgps = length(fixtures), |
| 250 | + n_combos_per_dgp = length(combos), |
| 251 | + point_atol = 1e-8, |
| 252 | + se_atol = 1e-8, |
| 253 | + ci_atol = 1e-8, |
| 254 | + yatchew_atol = 1e-10, |
| 255 | + qug_atol = 1e-12, |
| 256 | + notes = paste( |
| 257 | + "Three synthetic DGPs (Uniform, Beta(2,2), Beta(0.5,1) approximation", |
| 258 | + "of the empirical Pierce-Schott NTR-gap distribution). Each DGP runs", |
| 259 | + "5 method combos covering overall, event-study, placebo, yatchew,", |
| 260 | + "and trends_lin variants. Tolerances per the Phase 4 plan.", |
| 261 | + sep = " " |
| 262 | + ) |
| 263 | + ), |
| 264 | + fixtures = fixtures |
| 265 | +) |
| 266 | + |
| 267 | +out_dir <- "benchmarks/data" |
| 268 | +if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE) |
| 269 | +out_path <- file.path(out_dir, "did_had_golden.json") |
| 270 | +write_json(out, path = out_path, digits = 17, auto_unbox = TRUE, null = "null") |
| 271 | +message(sprintf( |
| 272 | + "Wrote %d DGP fixtures (each with %d combos) to %s", |
| 273 | + length(fixtures), length(combos), out_path |
| 274 | +)) |
0 commit comments