From 3d8d568d8c35f7b29c566c19ecbeada1305025ed Mon Sep 17 00:00:00 2001 From: mohossam01 Date: Fri, 15 May 2026 04:23:08 -0400 Subject: [PATCH] feat: add student-t and laplace noise families --- CHANGELOG.md | 18 + docs/site/config-reference.md | 39 ++- docs/site/manifest-reference.md | 41 ++- plotsim-schema.json | 23 ++ plotsim/builder/input.py | 12 + plotsim/builder/interpreter.py | 2 + plotsim/config.py | 32 ++ plotsim/inspect.py | 15 +- plotsim/manifest.py | 81 +++-- plotsim/metrics.py | 32 +- tests/test_heavy_tailed_noise.py | 422 ++++++++++++++++++++++++ tests/test_heteroscedastic_noise.py | 15 +- tests/test_manifest.py | 11 +- tests/test_multi_source.py | 13 +- tests/test_time_varying_correlations.py | 8 +- 15 files changed, 695 insertions(+), 69 deletions(-) create mode 100644 tests/test_heavy_tailed_noise.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ef8e3f..6b978bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,24 @@ Versioning: [SemVer](https://semver.org/spec/v2.0.0.html). 1.6 → 1.7 with a new optional `noise_config` field populated only when the flag is enabled. +- **Heavy-tailed noise families (Student-t, Laplace).** New + `noise_family` field on `NoiseConfig` accepts `"gaussian"` (default, + byte-identical to prior behavior), `"student_t"` (with required + `degrees_of_freedom`), or `"laplace"`. Heavy-tailed families produce + outlier-prone residuals without explicit outlier injection — useful + for modeling sensor noise, financial returns, or any domain with + fat-tailed observation error. Family dispatch composes orthogonally + with `scale_with_trajectory`: the resolved scale is the same for + every family, only the sampling distribution differs. Config-time + validation rejects `student_t` without `degrees_of_freedom`, rejects + `degrees_of_freedom` on other families, and rejects `df < 1`. + Builder mirror on `NoiseInput`; preset shorthand always resolves to + gaussian. Manifest schema bumps 1.7 → 1.8 — `NoiseConfigInfo` gains + `noise_family` and `degrees_of_freedom`, and its emission criterion + broadens to "heteroscedastic OR non-default family" so the manifest + records the realized noise family whenever it diverges from the + historical lane. + - **`pool.` source on per_entity_per_period facts.** Widens the per-entity value-pool surface to the most common fact grain (one row per entity per period). Two new dispatch handlers — diff --git a/docs/site/config-reference.md b/docs/site/config-reference.md index f5b4726..d277345 100644 --- a/docs/site/config-reference.md +++ b/docs/site/config-reference.md @@ -25,7 +25,7 @@ bridges: [ ... ] quality: [ ... ] holdout: { target, periods, min_training_periods } entity_features: true | false | { metrics, include_labels } -noise: | { gaussian_sigma, outlier_rate, mcar_rate, scale_with_trajectory } +noise: | { gaussian_sigma, outlier_rate, mcar_rate, scale_with_trajectory, noise_family, degrees_of_freedom } output: csv | parquet | jsonl | sql | { format, directory, cell_budget, denormalized, partition_by, sql_dialect } locale: seed: @@ -644,14 +644,18 @@ noise: outlier_rate: 0.02 mcar_rate: 0.01 scale_with_trajectory: false + noise_family: gaussian + degrees_of_freedom: null # required when noise_family is "student_t" ``` | Field | Type | Default | Range | Effect | |---|---|---|---|---| -| `gaussian_sigma` | `float` | `0.0` | `0.0`–`5.0` | Multiplicative log-normal jitter on each draw — `value *= exp(N(0, σ²))`. Bigger σ = wider spread | +| `gaussian_sigma` | `float` | `0.0` | `0.0`–`5.0` | Multiplicative log-normal jitter on each draw — `value *= exp(N(0, σ²))`. Bigger σ = wider spread. Used by every `noise_family` as the scale parameter | | `outlier_rate` | `float` | `0.0` | `0.0`–`1.0` | Probability per cell of replacing the value with a 3-σ tail draw | | `mcar_rate` | `float` | `0.0` | `0.0`–`1.0` | Probability per cell of dropping the value to NaN (missing-completely-at-random) | -| `scale_with_trajectory` | `bool` | `false` | — | When `true`, the gaussian standard deviation at each cell becomes `gaussian_sigma × trajectory_position` instead of `gaussian_sigma × \|value\|`. Position-zero cells receive zero gaussian noise; position-one cells receive the full σ. Outlier and MCAR branches are unchanged. Use when the dataset's noise model should be heteroscedastic — e.g. high-engagement entities exhibit larger observation variance — rather than proportional to the value magnitude | +| `scale_with_trajectory` | `bool` | `false` | — | When `true`, the gaussian standard deviation at each cell becomes `gaussian_sigma × trajectory_position` instead of `gaussian_sigma × \|value\|`. Position-zero cells receive zero gaussian noise; position-one cells receive the full σ. Outlier and MCAR branches are unchanged. Use when the dataset's noise model should be heteroscedastic — e.g. high-engagement entities exhibit larger observation variance — rather than proportional to the value magnitude. Composes orthogonally with `noise_family` | +| `noise_family` | `str` | `"gaussian"` | `"gaussian"` / `"student_t"` / `"laplace"` | Distribution of the additive jitter. `"gaussian"` (default) preserves the historical behavior byte-for-byte. `"student_t"` draws from a Student-t with `degrees_of_freedom` and produces heavier tails (outlier-prone residuals without explicit `outlier_rate`). `"laplace"` draws from a Laplace distribution — sharper peak, heavier tails than Gaussian. Composes with `scale_with_trajectory`: the resolved scale is the same for every family | +| `degrees_of_freedom` | `float` or `null` | `null` | ≥ `1.0` | Required when `noise_family: student_t`; forbidden otherwise (a non-null value with any other family raises at load time). Lower values yield heavier tails; `df = 1` is the Cauchy limit (no finite mean). Typical values: `df = 3`–`5` for visibly heavy tails, `df = 10`–`30` for mild Gaussian-like residuals | Four named presets accept the lower-case canonical name OR a friendly alias — pick whichever reads naturally: @@ -665,8 +669,33 @@ alias — pick whichever reads naturally: The same constants are exported from `plotsim` for engine-direct mutation: `PERFECTLY_CLEAN`, `SLIGHTLY_MESSY`, `REALISTIC`, `DIRTY`. -Presets always set `scale_with_trajectory: false`; opt into the -heteroscedastic lane by passing the explicit dict form. +Presets always set `scale_with_trajectory: false` and +`noise_family: gaussian`; opt into the heteroscedastic lane or a +heavy-tailed family by passing the explicit dict form. + +**Picking a heavy-tailed family.** `student_t` with low `df` (3–5) +models occasional large deviations driven by a heavy-tailed underlying +process — sensor failures, financial return spikes, support-ticket +volume after an outage. `laplace` is similar but with a sharper peak +around the center and exponential (rather than power-law) tails — a +good fit when most residuals are small but a non-negligible minority +are several scales out. Both compose with `outlier_rate` if you also +want explicit "blow up the value by 3–10×" injection on top of the +heavy-tailed jitter. + +```yaml +# Heavy-tailed noise from a Student-t +noise: + gaussian_sigma: 0.10 + noise_family: student_t + degrees_of_freedom: 4 + +# Laplace residuals, heteroscedastic amplitude +noise: + gaussian_sigma: 0.05 + scale_with_trajectory: true + noise_family: laplace +``` `noise` is independent of the `quality` block — `noise` perturbs metric values *during* generation (correlations and trajectory still hold); diff --git a/docs/site/manifest-reference.md b/docs/site/manifest-reference.md index bfa6f02..4ab149f 100644 --- a/docs/site/manifest-reference.md +++ b/docs/site/manifest-reference.md @@ -70,7 +70,7 @@ produces a byte-identical `manifest.json`. Encoding: UTF-8, | Field | Type | Description | |---|---|---| -| `schema_version` | `str` | Wire-shape version. Currently `"1.7"` (bumped over time as new additive sections — `causal_graph`, `correlations`, `outlier_injections`, multi-source mappings, `parent_child_relations`, `noise_config` — landed) | +| `schema_version` | `str` | Wire-shape version. Currently `"1.8"` (bumped over time as new additive sections — `causal_graph`, `correlations`, `outlier_injections`, multi-source mappings, `parent_child_relations`, `noise_config` — landed; 1.7 → 1.8 extended `noise_config` with `noise_family` / `degrees_of_freedom`) | | `seed` | `int` | The seed used for generation — `config.seed` | | `config_sha256` | `str` | Full SHA-256 hex of the JSON-serialized config. Detects config drift between generation and consumption | | `archetype_assignments` | array | One entry per entity; see below | @@ -87,7 +87,7 @@ produces a byte-identical `manifest.json`. Encoding: UTF-8, | `causal_graph` | array | One `CausalEdge` per metric with a non-None `causal_lag`. Empty list when no metric uses `causal_lag` | | `correlations` | array | One entry per user-declared `config.correlations` pair, with the realized (post-Higham, post-compensation) coefficient. Empty list when no correlations are configured | | `outlier_injections` | array or `null` | Per-cell outlier-fire log. `null` when skipped (no `outlier_rate`, vectorized mode, or cell budget exceeded). `[]` when the detector ran and observed no firings | -| `noise_config` | object or `null` | Noise-model record. `null` when the run uses the default magnitude-scaled gaussian lane; populated only when `noise.scale_with_trajectory` is `true` | +| `noise_config` | object or `null` | Noise-model record. `null` when the run uses the default magnitude-scaled gaussian lane; populated when EITHER `noise.scale_with_trajectory` is `true` OR `noise.noise_family` is non-default (`"student_t"` / `"laplace"`) | --- @@ -627,10 +627,12 @@ seed signals a generation regression. ## `noise_config` -Noise-model record — emitted only when the run opted into -heteroscedastic gaussian noise via `noise.scale_with_trajectory: true`. -`null` for the default magnitude-scaled lane (and absent from manifests -produced before `schema_version: "1.7"`). +Noise-model record — emitted whenever the run diverges from the +historical magnitude-scaled gaussian lane. Two triggers, either +sufficient: `noise.scale_with_trajectory: true` (heteroscedastic +amplitude) OR `noise.noise_family` is non-default (heavy-tailed +family — `"student_t"` or `"laplace"`). `null` for the default lane +(and absent from manifests produced before `schema_version: "1.7"`). ```json { @@ -638,22 +640,29 @@ produced before `schema_version: "1.7"`). "gaussian_sigma": 0.20, "outlier_rate": 0.0, "mcar_rate": 0.0, - "scale_with_trajectory": true + "scale_with_trajectory": true, + "noise_family": "student_t", + "degrees_of_freedom": 4.0 } } ``` | Field | Type | Description | |---|---|---| -| `gaussian_sigma` | `float` | The σ multiplier from `config.noise.gaussian_sigma`. Under the heteroscedastic lane the realized scale at a cell is `gaussian_sigma × trajectory_position` | -| `outlier_rate` | `float` | Mirrors `config.noise.outlier_rate`. Unaffected by the heteroscedastic flag — recorded here for completeness so the manifest fully describes the noise model | -| `mcar_rate` | `float` | Mirrors `config.noise.mcar_rate`. Unaffected by the heteroscedastic flag | -| `scale_with_trajectory` | `bool` | Always `true` when this record is present (the field exists for forward compatibility in case the manifest later starts recording the default-off lane as well) | - -**Use case** — distinguish a run that opted into position-scaled -gaussian noise from one that didn't, without re-reading the YAML -config. Anomaly-detection scoring that assumes uniform noise variance -can read this field to switch to a position-aware likelihood model. +| `gaussian_sigma` | `float` | The σ multiplier from `config.noise.gaussian_sigma`. Under the heteroscedastic lane the realized scale at a cell is `gaussian_sigma × trajectory_position`; otherwise `gaussian_sigma × \|value\|`. Used by every family as the scale parameter | +| `outlier_rate` | `float` | Mirrors `config.noise.outlier_rate`. Unaffected by the family or heteroscedastic flag — recorded here for completeness so the manifest fully describes the noise model | +| `mcar_rate` | `float` | Mirrors `config.noise.mcar_rate`. Unaffected by the family or heteroscedastic flag | +| `scale_with_trajectory` | `bool` | `true` when the heteroscedastic lane was engaged. `false` when the record was emitted purely because `noise_family` diverged from the default | +| `noise_family` | `str` | The additive-jitter distribution — one of `"gaussian"`, `"student_t"`, `"laplace"`. Mirrors `config.noise.noise_family` | +| `degrees_of_freedom` | `float` or `null` | Populated only when `noise_family == "student_t"`; `null` otherwise | + +**Use case** — distinguish a run that opted into position-scaled or +heavy-tailed gaussian noise from one that didn't, without re-reading +the YAML config. Anomaly-detection scoring that assumes uniform +gaussian noise variance can read this record to switch to a +position-aware or family-aware likelihood model — e.g., switching to +a t-distribution likelihood when `noise_family == "student_t"` keeps +the scorer well-calibrated under the heavier-tailed residuals. --- diff --git a/plotsim-schema.json b/plotsim-schema.json index 508ba4f..e647175 100644 --- a/plotsim-schema.json +++ b/plotsim-schema.json @@ -921,6 +921,29 @@ "default": false, "title": "Scale With Trajectory", "type": "boolean" + }, + "noise_family": { + "default": "gaussian", + "enum": [ + "gaussian", + "student_t", + "laplace" + ], + "title": "Noise Family", + "type": "string" + }, + "degrees_of_freedom": { + "anyOf": [ + { + "minimum": 1.0, + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Degrees Of Freedom" } }, "title": "NoiseConfig", diff --git a/plotsim/builder/input.py b/plotsim/builder/input.py index a318786..579b880 100644 --- a/plotsim/builder/input.py +++ b/plotsim/builder/input.py @@ -1145,6 +1145,18 @@ class NoiseInput(BaseModel): # (``"clean"`` / ``"slightly_messy"`` / ...) always leaves this False; # users opt in by passing the explicit dict form. scale_with_trajectory: bool = False + # 0.6-M23: mirrors ``NoiseConfig.noise_family``. Selects the additive + # jitter distribution — ``"gaussian"`` (default, byte-identical to + # pre-M23 behavior), ``"student_t"`` (heavy-tailed; requires + # ``degrees_of_freedom``), or ``"laplace"`` (heavy-tailed, sharper + # peak). Preset shorthand always resolves to ``"gaussian"``. + noise_family: Literal["gaussian", "student_t", "laplace"] = "gaussian" + # 0.6-M23: mirrors ``NoiseConfig.degrees_of_freedom``. Required when + # ``noise_family="student_t"``; forbidden otherwise. The engine-side + # validator on ``NoiseConfig`` raises with a clear message if the + # combination is incoherent — the builder simply passes the field + # through. + degrees_of_freedom: Optional[float] = Field(default=None, ge=1.0) class OutputInput(BaseModel): diff --git a/plotsim/builder/interpreter.py b/plotsim/builder/interpreter.py index daec9fb..6e23caa 100644 --- a/plotsim/builder/interpreter.py +++ b/plotsim/builder/interpreter.py @@ -194,6 +194,8 @@ def interpret(user_input: UserInput) -> PlotsimConfig: outlier_rate=user_input.noise.outlier_rate, mcar_rate=user_input.noise.mcar_rate, scale_with_trajectory=user_input.noise.scale_with_trajectory, + noise_family=user_input.noise.noise_family, + degrees_of_freedom=user_input.noise.degrees_of_freedom, ) else: noise_cfg = NoiseConfig() diff --git a/plotsim/config.py b/plotsim/config.py index 241b03f..2d21be6 100644 --- a/plotsim/config.py +++ b/plotsim/config.py @@ -2470,6 +2470,38 @@ class NoiseConfig(_Frozen): # MCAR rates are unaffected. Default False preserves the multiplicative- # on-magnitude behavior bit-for-bit. scale_with_trajectory: bool = False + # 0.6-M23: distribution family for the additive jitter branch. ``"gaussian"`` + # (default) preserves the historical ``rng.normal`` draw byte-for-byte. + # ``"student_t"`` draws from a Student-t distribution scaled by ``scale`` + # (heavier tails — outlier-prone residuals without explicit outlier + # injection). ``"laplace"`` draws from a Laplace distribution scaled by + # ``scale`` (sharper peak + heavier tails than Gaussian). Composes + # orthogonally with ``scale_with_trajectory``: in both lanes the realized + # scale is the same value the gaussian branch would have used, just + # parameterizing a different family. + noise_family: Literal["gaussian", "student_t", "laplace"] = "gaussian" + # 0.6-M23: degrees-of-freedom parameter for ``noise_family="student_t"``. + # Lower values produce heavier tails; ``df=1`` is the Cauchy limit (no + # finite mean). Validator below requires this be set (and >= 1.0) when + # the family is ``student_t``, and absent for every other family. + degrees_of_freedom: Optional[float] = Field(default=None, ge=1.0) + + @model_validator(mode="after") + def _validate_noise_family_params(self) -> "NoiseConfig": + if self.noise_family == "student_t": + if self.degrees_of_freedom is None: + raise ValueError( + "noise_family='student_t' requires degrees_of_freedom to be set " + "(float >= 1.0; lower values mean heavier tails)" + ) + else: + if self.degrees_of_freedom is not None: + raise ValueError( + f"degrees_of_freedom is only valid when noise_family='student_t'; " + f"got noise_family={self.noise_family!r} with " + f"degrees_of_freedom={self.degrees_of_freedom}" + ) + return self class ManifestConfig(_Frozen): diff --git a/plotsim/inspect.py b/plotsim/inspect.py index 7340719..8ae101c 100644 --- a/plotsim/inspect.py +++ b/plotsim/inspect.py @@ -619,6 +619,12 @@ def _detect_noise_branches( the side RNG in lockstep — same number of bytes consumed, same value drawn. Callers must pass the same ``trajectory_position`` the engine saw at this cell. + + 0.6-M23: when ``noise.noise_family`` is non-default, the replay must + invoke the same family on the side generator so the post-jitter RNG + state matches the engine's. Otherwise the subsequent ``random()`` calls + for outlier and MCAR checks would read from a different byte position, + yielding garbage outlier-injection records in the manifest. """ side = np.random.default_rng() side.bit_generator.state = rng_state_snapshot @@ -629,7 +635,14 @@ def _detect_noise_branches( else: mag = abs(v) if v != 0.0 else 1.0 scale = noise.gaussian_sigma * mag - v = v + float(side.normal(loc=0.0, scale=scale)) + family = getattr(noise, "noise_family", "gaussian") + if family == "gaussian": + v = v + float(side.normal(loc=0.0, scale=scale)) + elif family == "student_t": + df = float(noise.degrees_of_freedom) + v = v + float(side.standard_t(df)) * scale + else: # "laplace" + v = v + float(side.laplace(loc=0.0, scale=scale)) outlier_fired = False if noise.outlier_rate > 0.0: if side.random() < noise.outlier_rate: diff --git a/plotsim/manifest.py b/plotsim/manifest.py index b861075..bb62ba1 100644 --- a/plotsim/manifest.py +++ b/plotsim/manifest.py @@ -111,7 +111,13 @@ # readers parse 1.7 manifests cleanly. Populated only when the # heteroscedastic-noise feature is enabled — keeps default-off runs # byte-equivalent to pre-M22 modulo the schema version string. -MANIFEST_SCHEMA_VERSION = "1.7" +# 0.6-M23: bumped 1.7 → 1.8 for ``noise_family`` and ``degrees_of_freedom`` +# on ``NoiseConfigInfo``. Emission criterion widens to also cover +# non-gaussian families (``noise_family != "gaussian"``) so the manifest +# records the realized noise family whenever it diverges from the +# historical lane, not only when heteroscedastic amplitude is on. +# Default-family default-amplitude runs still emit ``noise_config=None``. +MANIFEST_SCHEMA_VERSION = "1.8" class _ManifestBase(BaseModel): @@ -557,30 +563,43 @@ class ParentChildRelation(_ManifestBase): class NoiseConfigInfo(_ManifestBase): - """0.6-M22: ground-truth record of the noise model. + """0.6-M22 / 0.6-M23: ground-truth record of the noise model. - Emitted on the manifest only when - ``config.noise.scale_with_trajectory=True`` — i.e. when the engine ran - the heteroscedastic-noise lane. ``None`` otherwise. Carries the four - declared ``NoiseConfig`` knobs so a downstream consumer knows exactly - how the gaussian standard deviation was parameterized at each cell - without re-reading the YAML config. + Emitted on the manifest when EITHER + ``config.noise.scale_with_trajectory=True`` (M22 heteroscedastic lane) + OR ``config.noise.noise_family != "gaussian"`` (M23 heavy-tailed lane). + ``None`` otherwise — default-family default-amplitude runs stay + byte-equivalent to the historical pre-M22 manifest modulo the schema + version string. Carries the declared ``NoiseConfig`` knobs so a + downstream consumer can fully describe the noise model without + re-reading the YAML config. * ``gaussian_sigma`` — the σ multiplier; the realized scale at a cell is ``gaussian_sigma * trajectory_position`` under the - heteroscedastic lane. - * ``outlier_rate`` / ``mcar_rate`` — unchanged by the M22 flag; - recorded here for completeness so the manifest fully describes the - noise model. - * ``scale_with_trajectory`` — always ``True`` when this record is - emitted (the field exists for forward compatibility in case the - manifest later starts recording the default-off lane as well). + heteroscedastic lane and ``gaussian_sigma * abs(value)`` (with the + zero-value fallback) otherwise. Used by every family as the scale + parameter — Gaussian σ, Laplace scale, Student-t scale multiplier. + * ``outlier_rate`` / ``mcar_rate`` — recorded for completeness so the + manifest fully describes the noise model. + * ``scale_with_trajectory`` — ``True`` when the heteroscedastic lane + was engaged, ``False`` otherwise (e.g. when the record was emitted + purely because ``noise_family`` diverged from the default). + * ``noise_family`` — the additive-jitter distribution; one of + ``"gaussian"``, ``"student_t"``, ``"laplace"``. + * ``degrees_of_freedom`` — populated only when + ``noise_family == "student_t"``; ``None`` otherwise. """ gaussian_sigma: float outlier_rate: float mcar_rate: float scale_with_trajectory: bool + # 0.6-M23: distribution family for the additive jitter branch. + # Default ``"gaussian"`` keeps the field readable on pre-M23 manifests + # on disk (they reparse cleanly with this as the inferred value). + noise_family: str = "gaussian" + # 0.6-M23: only populated when ``noise_family == "student_t"``. + degrees_of_freedom: Optional[float] = None class HoldoutInfo(_ManifestBase): @@ -1298,18 +1317,28 @@ def build_manifest( outlier_injections = detect_outlier_injections(config) - # 0.6-M22: emit the noise-model record only when the heteroscedastic - # lane is engaged. Default-off configs leave ``noise_config=None`` so - # the manifest stays byte-equivalent to pre-M22 modulo the schema - # version bump. + # 0.6-M22 / 0.6-M23: emit the noise-model record when EITHER the + # heteroscedastic lane is engaged (M22) OR a non-default noise family + # is configured (M23). Default-family default-amplitude configs leave + # ``noise_config=None`` so the manifest stays byte-equivalent to + # pre-M22 modulo the schema version bump. noise_config_info: Optional[NoiseConfigInfo] = None - if config.noise is not None and getattr(config.noise, "scale_with_trajectory", False): - noise_config_info = NoiseConfigInfo( - gaussian_sigma=float(config.noise.gaussian_sigma), - outlier_rate=float(config.noise.outlier_rate), - mcar_rate=float(config.noise.mcar_rate), - scale_with_trajectory=True, - ) + if config.noise is not None: + heteroscedastic = getattr(config.noise, "scale_with_trajectory", False) + family = getattr(config.noise, "noise_family", "gaussian") + if heteroscedastic or family != "gaussian": + noise_config_info = NoiseConfigInfo( + gaussian_sigma=float(config.noise.gaussian_sigma), + outlier_rate=float(config.noise.outlier_rate), + mcar_rate=float(config.noise.mcar_rate), + scale_with_trajectory=bool(heteroscedastic), + noise_family=str(family), + degrees_of_freedom=( + float(config.noise.degrees_of_freedom) + if config.noise.degrees_of_freedom is not None + else None + ), + ) return ManifestSchema( schema_version=MANIFEST_SCHEMA_VERSION, diff --git a/plotsim/metrics.py b/plotsim/metrics.py index 00b26f4..5e2b222 100644 --- a/plotsim/metrics.py +++ b/plotsim/metrics.py @@ -1133,6 +1133,14 @@ def apply_noise( ``noise.gaussian_sigma * trajectory_position`` (replacing the ``abs(v)`` factor). Position-zero cells receive zero gaussian noise. Outlier and MCAR branches are unchanged regardless of the flag. + + 0.6-M23: ``noise.noise_family`` selects the additive-jitter distribution: + ``"gaussian"`` (default; ``rng.normal``), ``"student_t"`` (``rng.standard_t`` + times the resolved scale; requires ``degrees_of_freedom``), or ``"laplace"`` + (``rng.laplace``). Family dispatch is orthogonal to ``scale_with_trajectory`` + — the resolved ``scale`` is identical regardless of family. The gaussian + lane preserves the historical ``rng.normal(loc=0.0, scale=scale)`` call + byte-for-byte; default-family configs see no RNG-consumption change. """ v = value if noise.gaussian_sigma > 0.0: @@ -1143,7 +1151,13 @@ def apply_noise( # so a metric that legitimately sits at 0 still receives noise. mag = abs(v) if v != 0.0 else 1.0 scale = noise.gaussian_sigma * mag - v = v + float(rng.normal(loc=0.0, scale=scale)) + if noise.noise_family == "gaussian": + v = v + float(rng.normal(loc=0.0, scale=scale)) + elif noise.noise_family == "student_t": + df = float(noise.degrees_of_freedom) # type: ignore[arg-type] + v = v + float(rng.standard_t(df)) * scale + else: # "laplace" + v = v + float(rng.laplace(loc=0.0, scale=scale)) if noise.outlier_rate > 0.0 and rng.random() < noise.outlier_rate: sign = 1.0 if v >= 0.0 else -1.0 @@ -1659,6 +1673,14 @@ def _apply_noise_batch( the gaussian standard deviation becomes ``noise.gaussian_sigma * trajectory_position``, replacing the ``abs(value)`` factor for every cell in the batch. + + 0.6-M23: ``noise.noise_family`` selects the additive-jitter family + identically to the scalar path. Family dispatch is orthogonal to + ``scale_with_trajectory``: the resolved ``scale`` (per-cell array under + multiplicative jitter, scalar under heteroscedastic) is the same value + for every family. The gaussian lane preserves the historical + ``rng.normal(..., size=n)`` call, so default-family runs are + byte-identical to pre-M23. """ n = values.shape[0] v = values.astype(np.float64, copy=True) @@ -1671,7 +1693,13 @@ def _apply_noise_batch( # Multiplicative jitter. Where v==0, fall back to absolute sigma. mag = np.where(v != 0.0, np.abs(v), 1.0) scale = noise.gaussian_sigma * mag - v = v + rng.normal(loc=0.0, scale=scale, size=n) + if noise.noise_family == "gaussian": + v = v + rng.normal(loc=0.0, scale=scale, size=n) + elif noise.noise_family == "student_t": + df = float(noise.degrees_of_freedom) # type: ignore[arg-type] + v = v + rng.standard_t(df, size=n) * scale + else: # "laplace" + v = v + rng.laplace(loc=0.0, scale=scale, size=n) if noise.outlier_rate > 0.0: coin = rng.random(size=n) diff --git a/tests/test_heavy_tailed_noise.py b/tests/test_heavy_tailed_noise.py new file mode 100644 index 0000000..b3a84b2 --- /dev/null +++ b/tests/test_heavy_tailed_noise.py @@ -0,0 +1,422 @@ +"""0.6-M23 — Heavy-tailed noise families (Student-t, Laplace). + +Acceptance criteria: + + * ``noise_family="student_t"`` with low ``degrees_of_freedom`` produces + residuals with markedly heavier tails than Gaussian — empirical + kurtosis significantly above 3, and a KS test against the t(df) reference + does not reject. + * ``noise_family="laplace"`` produces residuals consistent with the + Laplace distribution — KS test against the Laplace reference does not + reject. + * ``noise_family="gaussian"`` (default) produces byte-identical RNG draws + and identical noise output as the pre-M23 code path. + * Config-time validation: ``noise_family="student_t"`` requires + ``degrees_of_freedom``; ``degrees_of_freedom`` is rejected when the + family is not Student-t; ``df < 1`` is rejected. + * Manifest records ``noise_family`` and ``degrees_of_freedom`` whenever + the family is non-default (independent of the M22 heteroscedastic flag). + * Builder ``NoiseInput`` mirrors the engine fields and the interpreter + forwards them onto ``NoiseConfig`` unchanged. + * The two new families compose with M22 ``scale_with_trajectory`` — both + branches honor the position-scaled lane when enabled. +""" + +from __future__ import annotations + +import warnings + +import numpy as np +import pytest +from pydantic import ValidationError +from scipy import stats as sp_stats + +from plotsim import create, generate_tables +from plotsim.builder.input import NoiseInput +from plotsim.config import NoiseConfig +from plotsim.manifest import build_manifest +from plotsim.metrics import _apply_noise_batch, apply_noise + + +# --- Config-time validation ------------------------------------------------- + + +def test_student_t_requires_degrees_of_freedom(): + """A config that names Student-t without supplying ``degrees_of_freedom`` + must raise at construction time with a clear message — silent fallback + to a default df would change the realized tail thickness invisibly.""" + with pytest.raises(ValidationError) as exc: + NoiseConfig(gaussian_sigma=0.10, noise_family="student_t") + assert "degrees_of_freedom" in str(exc.value) + + +def test_degrees_of_freedom_rejected_when_family_not_student_t(): + """``degrees_of_freedom`` is a Student-t-only knob. Setting it on the + gaussian or laplace family must raise — keeps the field from + silently doing nothing and leaving a confusing audit trail.""" + with pytest.raises(ValidationError) as exc: + NoiseConfig(gaussian_sigma=0.10, degrees_of_freedom=5.0) + assert "degrees_of_freedom" in str(exc.value) + + with pytest.raises(ValidationError) as exc2: + NoiseConfig(gaussian_sigma=0.10, noise_family="laplace", degrees_of_freedom=5.0) + assert "degrees_of_freedom" in str(exc2.value) + + +def test_degrees_of_freedom_below_one_rejected(): + """df < 1 is degenerate (Student-t has no defined mean below df=1). + Pydantic ``ge=1.0`` rejects the value with a locatable error.""" + with pytest.raises(ValidationError): + NoiseConfig(gaussian_sigma=0.10, noise_family="student_t", degrees_of_freedom=0.5) + + +def test_laplace_does_not_accept_extra_params(): + """Laplace has no extra parameter beyond the shared scale. A config + that sets only family=laplace + gaussian_sigma must construct cleanly.""" + cfg = NoiseConfig(gaussian_sigma=0.10, noise_family="laplace") + assert cfg.noise_family == "laplace" + assert cfg.degrees_of_freedom is None + + +# --- apply_noise byte-identity for the gaussian default --------------------- + + +def test_apply_noise_default_family_byte_identical_to_pre_m23(): + """The default ``noise_family="gaussian"`` must consume RNG bytes + identically to the historical lane. Verified by comparing against a + bare ``rng.normal`` call on the same seed, mirroring the M22 + byte-identity test pattern.""" + noise = NoiseConfig(gaussian_sigma=0.10) + rng_engine = np.random.default_rng(7) + out = apply_noise(20.0, noise, rng_engine) + + rng_ref = np.random.default_rng(7) + expected = 20.0 + float(rng_ref.normal(loc=0.0, scale=0.10 * 20.0)) + assert out == expected + + +def test_apply_noise_batch_default_family_byte_identical_to_pre_m23(): + """Vectorized path: default family must match the historical batch + lane on identical input + same RNG seed.""" + noise = NoiseConfig(gaussian_sigma=0.10) + values = np.full(64, 30.0, dtype=np.float64) + out = _apply_noise_batch(values, noise, np.random.default_rng(99)) + + rng_ref = np.random.default_rng(99) + mag = np.where(values != 0.0, np.abs(values), 1.0) + expected = values + rng_ref.normal(loc=0.0, scale=0.10 * mag, size=64) + np.testing.assert_array_equal(out, expected) + + +# --- Student-t draw shape --------------------------------------------------- + + +def test_apply_noise_student_t_uses_standard_t_draw(): + """The scalar path with ``noise_family="student_t"`` must use + ``rng.standard_t(df)`` and multiply by the resolved scale — verified + against a hand-computed reference using the same RNG seed.""" + noise = NoiseConfig(gaussian_sigma=0.10, noise_family="student_t", degrees_of_freedom=3.0) + rng_engine = np.random.default_rng(13) + out = apply_noise(50.0, noise, rng_engine) + + rng_ref = np.random.default_rng(13) + expected = 50.0 + float(rng_ref.standard_t(3.0)) * (0.10 * 50.0) + assert out == expected + + +def test_apply_noise_batch_student_t_kurtosis_significantly_above_three(): + """Empirical kurtosis of t(3) residuals must blow past Gaussian's 3.0. + At 5_000 samples the population kurtosis (= ∞ for t(3), but the + realized sample kurtosis lands well above any reasonable Gaussian + band). Assertion threshold ≥ 6.0 is conservative — the theoretical + excess kurtosis is unbounded for df=3.""" + noise = NoiseConfig(gaussian_sigma=1.0, noise_family="student_t", degrees_of_freedom=3.0) + values = np.zeros(5_000, dtype=np.float64) + # Use value=0 so the "fallback mag=1" lane is exercised → scale ≈ 1.0 + # for every cell. Residuals = the raw t(3) draws. + out = _apply_noise_batch(values, noise, np.random.default_rng(2026)) + sample_kurt = float(sp_stats.kurtosis(out, fisher=False)) # Pearson def: Gaussian = 3 + assert sample_kurt > 6.0, f"kurtosis {sample_kurt} not heavy-tailed enough" + + +def test_apply_noise_batch_student_t_ks_does_not_reject(): + """KS test of t(df) residuals against the scipy ``t(df)`` reference + must not reject at p > 0.01. Use a moderate df (5) for a less + pathological tail.""" + df = 5.0 + noise = NoiseConfig(gaussian_sigma=1.0, noise_family="student_t", degrees_of_freedom=df) + values = np.zeros(5_000, dtype=np.float64) + out = _apply_noise_batch(values, noise, np.random.default_rng(31)) + # Residual is the draw itself (since value=0 → scale=1.0 → noise = t(df)). + ks_stat, p_value = sp_stats.kstest(out, "t", args=(df,)) + assert p_value > 0.01, f"t({df}) KS rejected: p={p_value}, stat={ks_stat}" + + +# --- Laplace draw shape ----------------------------------------------------- + + +def test_apply_noise_laplace_uses_rng_laplace(): + """The scalar path with ``noise_family="laplace"`` must use + ``rng.laplace(loc=0.0, scale=scale)``, verified against a same-seed + reference.""" + noise = NoiseConfig(gaussian_sigma=0.10, noise_family="laplace") + rng_engine = np.random.default_rng(17) + out = apply_noise(50.0, noise, rng_engine) + + rng_ref = np.random.default_rng(17) + expected = 50.0 + float(rng_ref.laplace(loc=0.0, scale=0.10 * 50.0)) + assert out == expected + + +def test_apply_noise_batch_laplace_ks_does_not_reject(): + """KS test of Laplace residuals against the scipy ``laplace(scale=1)`` + reference must not reject at p > 0.01.""" + noise = NoiseConfig(gaussian_sigma=1.0, noise_family="laplace") + values = np.zeros(5_000, dtype=np.float64) + out = _apply_noise_batch(values, noise, np.random.default_rng(53)) + ks_stat, p_value = sp_stats.kstest(out, "laplace", args=(0.0, 1.0)) + assert p_value > 0.01, f"Laplace KS rejected: p={p_value}, stat={ks_stat}" + + +# --- Composition with M22 heteroscedastic flag ------------------------------ + + +def test_student_t_composes_with_scale_with_trajectory(): + """Heavy-tailed family + heteroscedastic amplitude must compose + orthogonally: the resolved scale is ``sigma * position`` (not + ``sigma * abs(value)``) and the family is Student-t.""" + noise = NoiseConfig( + gaussian_sigma=0.10, + noise_family="student_t", + degrees_of_freedom=4.0, + scale_with_trajectory=True, + ) + rng_engine = np.random.default_rng(23) + out = apply_noise(50.0, noise, rng_engine, trajectory_position=0.7) + + rng_ref = np.random.default_rng(23) + expected = 50.0 + float(rng_ref.standard_t(4.0)) * (0.10 * 0.7) + assert out == expected + + +def test_laplace_composes_with_scale_with_trajectory(): + """Same orthogonality check for the Laplace family.""" + noise = NoiseConfig( + gaussian_sigma=0.10, + noise_family="laplace", + scale_with_trajectory=True, + ) + rng_engine = np.random.default_rng(29) + out = apply_noise(50.0, noise, rng_engine, trajectory_position=0.4) + + rng_ref = np.random.default_rng(29) + expected = 50.0 + float(rng_ref.laplace(loc=0.0, scale=0.10 * 0.4)) + assert out == expected + + +def test_student_t_position_zero_yields_zero_noise_under_heteroscedastic(): + """Under the heteroscedastic lane, position=0 collapses the scale to + zero — a Student-t draw multiplied by zero is exactly zero, so the + value passes through unchanged.""" + noise = NoiseConfig( + gaussian_sigma=0.50, + noise_family="student_t", + degrees_of_freedom=3.0, + scale_with_trajectory=True, + ) + out = apply_noise( + 42.0, + noise, + np.random.default_rng(0), + trajectory_position=0.0, + ) + assert out == 42.0 + + +# --- Builder propagation ---------------------------------------------------- + + +def test_builder_noise_input_defaults_match_engine_defaults(): + n = NoiseInput(gaussian_sigma=0.05) + assert n.noise_family == "gaussian" + assert n.degrees_of_freedom is None + + +def test_builder_interpreter_propagates_student_t_to_engine_config(): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cfg = create( + about="student-t propagation", + unit="company", + window=("2024-01", "2024-12"), + metrics=[ + {"name": "engagement", "type": "score", "polarity": "positive"}, + ], + segments=[ + {"name": "alpha", "count": 5, "archetype": "growth"}, + ], + noise={ + "gaussian_sigma": 0.05, + "noise_family": "student_t", + "degrees_of_freedom": 4.0, + }, + ) + assert cfg.noise is not None + assert cfg.noise.noise_family == "student_t" + assert cfg.noise.degrees_of_freedom == pytest.approx(4.0) + + +def test_builder_interpreter_propagates_laplace_to_engine_config(): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cfg = create( + about="laplace propagation", + unit="company", + window=("2024-01", "2024-12"), + metrics=[ + {"name": "engagement", "type": "score", "polarity": "positive"}, + ], + segments=[ + {"name": "alpha", "count": 5, "archetype": "growth"}, + ], + noise={"gaussian_sigma": 0.05, "noise_family": "laplace"}, + ) + assert cfg.noise is not None + assert cfg.noise.noise_family == "laplace" + assert cfg.noise.degrees_of_freedom is None + + +# --- Manifest --------------------------------------------------------------- + + +def _build_small_config(**noise_overrides): + noise_payload = {"gaussian_sigma": 0.05, **noise_overrides} + return create( + about="manifest heavy-tail check", + unit="company", + window=("2024-01", "2024-12"), + metrics=[ + {"name": "engagement", "type": "score", "polarity": "positive"}, + {"name": "mrr", "type": "amount", "polarity": "positive", "range": [100, 50000]}, + ], + segments=[ + {"name": "growth", "count": 5, "archetype": "growth"}, + {"name": "decline", "count": 5, "archetype": "decline"}, + ], + noise=noise_payload, + ) + + +def _generate_and_manifest(cfg, seed: int = 0): + rng = np.random.default_rng(seed) + tables = generate_tables(cfg, rng) + from plotsim.trajectory import compute_all_trajectories + + n_periods = len(tables["dim_date"]) + trajectories = compute_all_trajectories(cfg, n_periods) + manifest = build_manifest(cfg, trajectories, tables) + return tables, manifest + + +def test_manifest_records_student_t_family_and_df(): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cfg = _build_small_config(noise_family="student_t", degrees_of_freedom=4.0) + _tables, manifest = _generate_and_manifest(cfg) + + assert manifest.noise_config is not None + assert manifest.noise_config.noise_family == "student_t" + assert manifest.noise_config.degrees_of_freedom == pytest.approx(4.0) + assert manifest.noise_config.scale_with_trajectory is False + + +def test_manifest_records_laplace_family(): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cfg = _build_small_config(noise_family="laplace") + _tables, manifest = _generate_and_manifest(cfg) + + assert manifest.noise_config is not None + assert manifest.noise_config.noise_family == "laplace" + assert manifest.noise_config.degrees_of_freedom is None + + +def test_manifest_omits_noise_config_when_gaussian_default_amplitude(): + """Default gaussian family + default amplitude lane → no noise_config + record. Preserves the byte-equivalence M22 established for the + historical lane.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cfg = _build_small_config() # default family, default amplitude + _tables, manifest = _generate_and_manifest(cfg) + + assert manifest.noise_config is None + + +def test_manifest_records_when_family_default_but_heteroscedastic_on(): + """Pre-existing M22 contract still holds: heteroscedastic lane emits + the record even when the family is the default gaussian.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cfg = _build_small_config(scale_with_trajectory=True) + _tables, manifest = _generate_and_manifest(cfg) + + assert manifest.noise_config is not None + assert manifest.noise_config.noise_family == "gaussian" + assert manifest.noise_config.scale_with_trajectory is True + + +# --- End-to-end engine run with the new families --------------------------- + + +def test_engine_run_student_t_produces_distinct_output_from_gaussian(): + """A run with ``noise_family="student_t"`` at small df must yield a + materially different fact table than the same config under gaussian + noise at the same seed — confirms the dispatch reaches the engine + output, not just the noise helpers in isolation.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cfg_gauss = _build_small_config() + cfg_t = _build_small_config(noise_family="student_t", degrees_of_freedom=2.5) + + tables_g = generate_tables(cfg_gauss, np.random.default_rng(42)) + tables_t = generate_tables(cfg_t, np.random.default_rng(42)) + + # Look at any fact table the build produced and compare a numeric column. + fact_name = next(name for name, df in tables_g.items() if name.startswith("fct_")) + df_g = tables_g[fact_name] + df_t = tables_t[fact_name] + numeric_cols = [c for c in df_g.columns if df_g[c].dtype.kind == "f"] + assert numeric_cols, "expected at least one float metric column" + # At least one column must differ — the noise lane was actually + # exercised end-to-end. + diverged = False + for col in numeric_cols: + if not np.allclose(df_g[col].to_numpy(), df_t[col].to_numpy(), equal_nan=True): + diverged = True + break + assert diverged, "Student-t run produced identical output to Gaussian run" + + +def test_engine_run_default_family_byte_identical_to_pre_m23(): + """End-to-end check: a config that doesn't mention the new fields + produces byte-identical fact tables to one that explicitly sets + ``noise_family="gaussian"`` — proves the M23 code paths are no-ops on + the default lane.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cfg_implicit = _build_small_config() + cfg_explicit = _build_small_config(noise_family="gaussian") + + tables_a = generate_tables(cfg_implicit, np.random.default_rng(42)) + tables_b = generate_tables(cfg_explicit, np.random.default_rng(42)) + + assert set(tables_a.keys()) == set(tables_b.keys()) + for name, df_a in tables_a.items(): + df_b = tables_b[name] + for col in df_a.columns: + np.testing.assert_array_equal( + df_a[col].to_numpy(), + df_b[col].to_numpy(), + err_msg=f"column {col!r} in table {name!r} diverged on default-family lane", + ) diff --git a/tests/test_heteroscedastic_noise.py b/tests/test_heteroscedastic_noise.py index 6af2697..0632391 100644 --- a/tests/test_heteroscedastic_noise.py +++ b/tests/test_heteroscedastic_noise.py @@ -264,17 +264,18 @@ def test_manifest_omits_noise_config_when_off(): assert manifest.noise_config is None -def test_manifest_schema_version_pins_1_7(): - """0.6-M22 bumped the manifest schema version 1.6 → 1.7. The - test_schema_version_bumped_to_1_7 test in tests/test_manifest.py is - the authoritative pin; this assertion is a load-bearing reminder that - M22 owns the bump (so a future mission that adds a manifest field - knows to bump again).""" +def test_manifest_schema_version_pins_1_8(): + """0.6-M22 bumped the manifest schema version 1.6 → 1.7; 0.6-M23 bumped + 1.7 → 1.8. The test_schema_version_bumped_to_1_8 test in + tests/test_manifest.py is the authoritative pin; this assertion is a + load-bearing reminder that the heteroscedastic-emitting path participates + in the schema-version contract too (so a future mission that adds a + manifest field knows to bump again).""" with warnings.catch_warnings(): warnings.simplefilter("ignore") cfg = _build_small_config(scale_with_trajectory=True) _tables, manifest = _generate_and_manifest(cfg) - assert manifest.schema_version == "1.7" + assert manifest.schema_version == "1.8" # --- End-to-end byte-identity for default-off engine path ------------------- diff --git a/tests/test_manifest.py b/tests/test_manifest.py index d1c0e17..0adc40a 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -365,7 +365,7 @@ def test_all_bundled_templates_produce_valid_manifest(template, tmp_path): # --- 0.6-M5: causal_graph --------------------------------------------------- -def test_schema_version_bumped_to_1_7(): +def test_schema_version_bumped_to_1_8(): """0.6-M5 added causal_graph / correlations / outlier_injections (1.0 → 1.1). 0.6-M8a added per-entity ``active_window`` on EntityArchetypeAssignment (1.1 → 1.2). 0.6-M8c added per-entity ``treatment`` and the top-level @@ -377,13 +377,16 @@ def test_schema_version_bumped_to_1_7(): (1.4 → 1.5). 0.6-M18 added the ``parent_child_relations`` list for the parent/child fact grain (1.5 → 1.6). 0.6-M22 added the optional ``noise_config`` field, populated only on heteroscedastic-noise runs - (1.6 → 1.7). + (1.6 → 1.7). 0.6-M23 extended ``NoiseConfigInfo`` with ``noise_family`` + and ``degrees_of_freedom`` for heavy-tailed noise families and + broadened the emission criterion to cover non-default families + (1.7 → 1.8). The version pin lives in this test rather than just the manifest module - so a downstream consumer pinning ``schema_version >= "1.7"`` has a + so a downstream consumer pinning ``schema_version >= "1.8"`` has a direct on-disk contract test it can reference. """ - assert MANIFEST_SCHEMA_VERSION == "1.7" + assert MANIFEST_SCHEMA_VERSION == "1.8" def test_causal_graph_emits_one_edge_per_metric_with_lag(saas_run): diff --git a/tests/test_multi_source.py b/tests/test_multi_source.py index 1d53c53..8d34c28 100644 --- a/tests/test_multi_source.py +++ b/tests/test_multi_source.py @@ -306,12 +306,15 @@ def test_manifest_source_entity_mappings_complete(): assert field in canonical_columns -def test_manifest_schema_bumped_to_1_7(): +def test_manifest_schema_bumped_to_1_8(): # 1.5 introduced the source_entity_mappings list (0.6-M13); 1.6 added # the parent_child_relations list (0.6-M18); 1.7 adds the optional - # ``noise_config`` field (0.6-M22). This module's contract tracks the - # pin at the schema level, not the field semantics. - assert MANIFEST_SCHEMA_VERSION == "1.7" + # ``noise_config`` field (0.6-M22); 1.8 extends ``NoiseConfigInfo`` + # with ``noise_family`` / ``degrees_of_freedom`` and broadens its + # emission criterion to cover non-gaussian families (0.6-M23). This + # module's contract tracks the pin at the schema level, not the field + # semantics. + assert MANIFEST_SCHEMA_VERSION == "1.8" # ── AC6: single-source configs unchanged (no multi_source block) ────────── @@ -401,7 +404,7 @@ def test_bundled_template_loads_and_validates(tmp_path: Path): assert (out_dir / "dim_company_crm.csv").is_file() assert (out_dir / "dim_company_billing.csv").is_file() manifest_payload = json.loads((out_dir / "manifest.json").read_text(encoding="utf-8")) - assert manifest_payload["schema_version"] == "1.7" + assert manifest_payload["schema_version"] == "1.8" # 20 entities × 2 sources = 40 mapping records. assert len(manifest_payload["source_entity_mappings"]) == 40 diff --git a/tests/test_time_varying_correlations.py b/tests/test_time_varying_correlations.py index 0e8ebfc..42b9a37 100644 --- a/tests/test_time_varying_correlations.py +++ b/tests/test_time_varying_correlations.py @@ -679,11 +679,13 @@ def test_project_phase_correlation_or_issue_invalid_index(self): class TestManifestIntegration: """Manifest carries per-phase entries and the new top-level summary.""" - def test_schema_version_is_1_7(self): + def test_schema_version_is_1_8(self): # 0.6-M13 bumped 1.4 → 1.5 for ``source_entity_mappings``; 0.6-M18 # bumped 1.5 → 1.6 for ``parent_child_relations``; 0.6-M22 bumped - # 1.6 → 1.7 for the optional ``noise_config`` field. - assert MANIFEST_SCHEMA_VERSION == "1.7" + # 1.6 → 1.7 for the optional ``noise_config`` field; 0.6-M23 + # bumped 1.7 → 1.8 for ``noise_family`` / ``degrees_of_freedom`` + # on ``NoiseConfigInfo`` and broadened its emission criterion. + assert MANIFEST_SCHEMA_VERSION == "1.8" def test_no_phases_yields_empty_correlation_phases_list(self): cfg = _two_metric_config(