From ddd6391d52ceb505c795bb48a9daf674183d7425 Mon Sep 17 00:00:00 2001
From: mohossam01 <mail@mohossam.com>
Date: Fri, 15 May 2026 18:26:47 -0400
Subject: [PATCH] feat: record variance partition and gp kernel fit in the
 manifest

---
 docs/site/api-reference.md         |   6 +-
 docs/site/manifest-reference.md    | 224 ++++++++-
 docs/site/user-guide/archetypes.md |  29 ++
 plotsim/gp.py                      | 216 +++++++++
 plotsim/manifest.py                | 535 ++++++++++++++++++++
 tests/test_manifest_variance_gp.py | 752 +++++++++++++++++++++++++++++
 6 files changed, 1758 insertions(+), 4 deletions(-)
 create mode 100644 plotsim/gp.py
 create mode 100644 tests/test_manifest_variance_gp.py

diff --git a/docs/site/api-reference.md b/docs/site/api-reference.md
index b5734f0..11bd397 100644
--- a/docs/site/api-reference.md
+++ b/docs/site/api-reference.md
@@ -511,7 +511,9 @@ The manifest captures the *signal layer* a noisy fact table can't
 recover: archetype assignments, trajectory positions, event-firing
 periods, SCD band crossings, bridge associations, the engine's
 seasonal-strength inputs, per-pair regression summaries for declared
-correlations, and reproducibility metadata.
+correlations, nested-ANOVA variance partitions per metric, RBF
+Gaussian-process kernel fits over each archetype's trajectory shape,
+and reproducibility metadata.
 
 **Parameters**
 
@@ -523,7 +525,7 @@ correlations, and reproducibility metadata.
 | `sample_rate` | Override for `config.manifest.trajectory_sample_rate`. `None` reads the config value. |
 | `scd_state` | Pass `state.scd` to record SCD Type 2 band crossings. `None` leaves `manifest.scd_events` empty. |
 | `bridge_state` | Pass `state.bridges` to record M:N associations. `None` leaves `manifest.bridge_associations` empty. |
-| `entity_metrics` | Pass `state.entity_metrics` to populate `manifest.regression_pairs_global` and `manifest.regression_pairs_by_archetype` with pair-wise OLS summaries for every declared correlation pair. `None` leaves both sections at their empty defaults. |
+| `entity_metrics` | Pass `state.entity_metrics` to populate `manifest.regression_pairs_global` / `manifest.regression_pairs_by_archetype` with pair-wise OLS summaries for every declared correlation pair AND `manifest.variance_partitions` / `manifest.variance_partitions_by_segment` with nested-ANOVA decompositions per metric. `None` leaves all four sections at their empty defaults. (`manifest.gp_kernel_fits` is populated independently from `config.archetypes` whenever the config declares at least one metric — the section does not need `entity_metrics` threaded.) |
 
 The function is pure — same inputs produce a byte-identical manifest.
 No RNG, no clock, no filesystem.
diff --git a/docs/site/manifest-reference.md b/docs/site/manifest-reference.md
index eabfb6e..2b771ef 100644
--- a/docs/site/manifest-reference.md
+++ b/docs/site/manifest-reference.md
@@ -68,13 +68,16 @@ produces a byte-identical `manifest.json`. Encoding: UTF-8,
   "noise_config": {...} | null,
   "seasonal_decomposition": {...},
   "regression_pairs_global": [...],
-  "regression_pairs_by_archetype": {...}
+  "regression_pairs_by_archetype": {...},
+  "variance_partitions": [...],
+  "variance_partitions_by_segment": [...],
+  "gp_kernel_fits": [...]
 }
 ```
 
 | Field | Type | Description |
 |---|---|---|
-| `schema_version` | `str` | Wire-shape version. Currently `"1.10"` (bumped over time as new additive sections — `causal_graph`, `correlations`, `outlier_injections`, multi-source mappings, `parent_child_relations`, `noise_config` — landed; 1.7 → 1.8 extended `noise_config` with `noise_family` / `degrees_of_freedom`; 1.8 → 1.9 added the optional `target_metric` field on the per-entity `treatment` and per-cohort `treatment_cohorts` records; 1.9 → 1.10 added the `seasonal_decomposition` snapshot plus per-pair OLS summaries in `regression_pairs_global` / `regression_pairs_by_archetype`) |
+| `schema_version` | `str` | Wire-shape version. Currently `"1.10"` (bumped over time as new additive sections — `causal_graph`, `correlations`, `outlier_injections`, multi-source mappings, `parent_child_relations`, `noise_config` — landed; 1.7 → 1.8 extended `noise_config` with `noise_family` / `degrees_of_freedom`; 1.8 → 1.9 added the optional `target_metric` field on the per-entity `treatment` and per-cohort `treatment_cohorts` records; 1.9 → 1.10 added the `seasonal_decomposition` snapshot plus per-pair OLS summaries in `regression_pairs_global` / `regression_pairs_by_archetype`; the `variance_partitions` / `variance_partitions_by_segment` / `gp_kernel_fits` sections landed additively on 1.10 with no version bump) |
 | `seed` | `int` | The seed used for generation — `config.seed` |
 | `config_sha256` | `str` | Full SHA-256 hex of the JSON-serialized config. Detects config drift between generation and consumption |
 | `archetype_assignments` | array | One entry per entity; see below |
@@ -95,6 +98,9 @@ produces a byte-identical `manifest.json`. Encoding: UTF-8,
 | `seasonal_decomposition` | object | Snapshot of the seasonal-strength inputs the engine consumed. Always emitted; configs without `seasonal_effects` get the empty-sentinel shape (empty list / empty dicts) |
 | `regression_pairs_global` | array | Pair-wise OLS summary (slope, intercept, r², residual variance) for every declared correlation pair, pooled across every entity. Empty list when no correlations are configured |
 | `regression_pairs_by_archetype` | object | Same OLS summary as `regression_pairs_global` but grouped by `Entity.archetype`. Keys are archetype names; values mirror the global list shape. Empty dict when no correlations are configured |
+| `variance_partitions` | array | Nested-ANOVA variance decomposition per metric, with `Entity.archetype` as the between-group axis. One record per metric. Empty list when the config declares no metrics |
+| `variance_partitions_by_segment` | array | Same nested-ANOVA decomposition with curve segment as the between-group axis, computed per archetype. One record per `(metric, archetype)` pair. Segments are never pooled across archetypes. Empty list when the config declares no metrics |
+| `gp_kernel_fits` | array | RBF Gaussian-process kernel fits over each archetype's trajectory shape, plus per-entity records for entities that carry trajectory `overrides`. Empty list when the config declares no metrics |
 
 ---
 
@@ -848,6 +854,220 @@ pooled fit will mispredict for the archetype whose β diverges most.
 
 ---
 
+## `variance_partitions`
+
+Nested-ANOVA variance decomposition per metric, with `Entity.archetype`
+as the between-group axis.
+
+```json
+{
+  "variance_partitions": [
+    {
+      "metric": "engagement",
+      "scope": "archetype",
+      "scope_name": "all",
+      "ss_between": 12.4,
+      "ss_within_entity": 6.8,
+      "ss_residual": 41.0,
+      "fraction_between": 0.206,
+      "fraction_within_entity": 0.113,
+      "fraction_residual": 0.681,
+      "degrees_of_freedom_between": 1,
+      "degrees_of_freedom_within": 18,
+      "degrees_of_freedom_residual": 220,
+      "n_observations": 240,
+      "cold_start_entities_excluded": 0
+    }
+  ]
+}
+```
+
+| Field | Type | Description |
+|---|---|---|
+| `metric` | `str` | The metric this record decomposes (matches `config.metrics[i].name`) |
+| `scope` | `str` | Always `"archetype"` for this section |
+| `scope_name` | `str` | Always the literal sentinel `"all"` for this section — the partition spans every archetype the config declares |
+| `ss_between` | `float` | Sum-of-squares attributable to the grouping axis (variance between archetype means around the grand mean) |
+| `ss_within_entity` | `float` | Sum-of-squares between entity means within the same archetype |
+| `ss_residual` | `float` | Within-entity sum-of-squares over time (residual to each entity's own mean) |
+| `fraction_between` / `fraction_within_entity` / `fraction_residual` | `float` | Each `ss_*` divided by `ss_total = ss_between + ss_within_entity + ss_residual`. The three fractions sum to `1.0` (or to `0.0` for a fully constant metric) |
+| `degrees_of_freedom_between` | `int` | `n_groups - 1` where `n_groups` is the number of archetypes that contributed observations |
+| `degrees_of_freedom_within` | `int` | `n_cells - n_groups` where `n_cells` is the number of distinct `(archetype, entity)` pairs |
+| `degrees_of_freedom_residual` | `int` | `n_observations - n_cells` |
+| `n_observations` | `int` | Count of finite `(entity, period)` cells used. Cells with NaN (cold-start lead-ins, MCAR-rewritten values) are excluded |
+| `cold_start_entities_excluded` | `int` | Count of entities that contributed at least one NaN cell to this partition. Surfaces "this section dropped data" without forcing a re-derivation of the NaN tally |
+
+The three sums-of-squares satisfy `ss_between + ss_within_entity +
+ss_residual == ss_total` exactly (modulo floating-point rounding at
+`rtol≈1e-10`). One record per metric, sorted by `metric` for stable JSON
+output.
+
+**Use case** — diagnose how much of a metric's spread is explained by
+the latent archetype label vs. by entity-level idiosyncrasy vs. by
+within-entity time-series noise. A high `fraction_between` says the
+archetype is the dominant driver of metric values; a high
+`fraction_residual` with a low `fraction_between` says metric values
+are essentially noise on top of an entity-specific mean.
+
+---
+
+## `variance_partitions_by_segment`
+
+The same nested-ANOVA decomposition with curve segment as the between-
+group axis, computed per archetype.
+
+```json
+{
+  "variance_partitions_by_segment": [
+    {
+      "metric": "engagement",
+      "scope": "segment",
+      "scope_name": "growth",
+      "ss_between": 8.1,
+      "ss_within_entity": 4.6,
+      "ss_residual": 22.3,
+      "fraction_between": 0.231,
+      "fraction_within_entity": 0.131,
+      "fraction_residual": 0.638,
+      "degrees_of_freedom_between": 2,
+      "degrees_of_freedom_within": 27,
+      "degrees_of_freedom_residual": 90,
+      "n_observations": 120,
+      "cold_start_entities_excluded": 0
+    }
+  ]
+}
+```
+
+The schema is identical to `variance_partitions`. The differences:
+
+| Field | Difference |
+|---|---|
+| `scope` | Always `"segment"` |
+| `scope_name` | The archetype name. Each archetype's segments are decomposed in isolation; the section never pools observations across archetypes |
+| `degrees_of_freedom_between` | `n_curve_segments - 1` for the named archetype |
+| `n_observations` | Restricted to entities of `scope_name`'s archetype only |
+
+One record per `(metric, archetype)` pair whose entities contributed at
+least one finite observation. Sorted by `(metric, scope_name)` for
+stable JSON output.
+
+Each entity's segment membership is derived from its own boundary
+computation — entities with `start_period > 0` (cold-start) or with
+trajectory `overrides` contribute observations to the segment they
+actually occupied at each period, not to the archetype baseline
+segment. Period membership is reported as `segment_0`, `segment_1`, …
+internally and rolls up into the per-segment SS terms; downstream
+consumers see only the partition totals.
+
+**Use case** — locate where in an archetype's curve a metric's
+variance spreads most. A high `ss_between` for an archetype with three
+distinct curve segments says the metric tracks the curve; a low
+`ss_between` says the metric is decoupled from the archetype's
+narrative phase structure.
+
+---
+
+## `gp_kernel_fits`
+
+RBF Gaussian-process kernel fits over each archetype's trajectory
+shape. Surfaces a smoothness characterization the trajectory tape
+itself doesn't directly expose.
+
+```json
+{
+  "gp_kernel_fits": [
+    {
+      "scope_type": "archetype",
+      "scope_name": "growth",
+      "kernel_type": "rbf",
+      "hyperparameters": {
+        "length_scale": 4.7,
+        "signal_variance": 0.31,
+        "noise_variance": 0.0008
+      },
+      "log_marginal_likelihood": -3.2,
+      "n_train": 12,
+      "converged": true
+    },
+    {
+      "scope_type": "archetype",
+      "scope_name": "flat",
+      "kernel_type": "rbf",
+      "hyperparameters": {},
+      "log_marginal_likelihood": null,
+      "n_train": 12,
+      "converged": false
+    },
+    {
+      "scope_type": "entity",
+      "scope_name": "growers_001",
+      "kernel_type": "rbf",
+      "hyperparameters": {
+        "length_scale": 6.1,
+        "signal_variance": 0.28,
+        "noise_variance": 0.0012
+      },
+      "log_marginal_likelihood": -2.9,
+      "n_train": 10,
+      "converged": true
+    }
+  ]
+}
+```
+
+| Field | Type | Description |
+|---|---|---|
+| `scope_type` | `str` | Either `"archetype"` (one fit per declared archetype, against the archetype's clean trajectory) or `"entity"` (one fit per override-bearing entity, against that entity's specific trajectory) |
+| `scope_name` | `str` | The archetype name or the entity name, depending on `scope_type` |
+| `kernel_type` | `str` | Always `"rbf"` for now. Reserved as a discriminator for future kernel families |
+| `hyperparameters` | object | Three keys when `converged=true`: `length_scale`, `signal_variance`, `noise_variance`. All in the natural (unstandardized) scale — `length_scale` is in units of period indices. Empty `{}` when `converged=false` |
+| `log_marginal_likelihood` | `float` or `null` | The maximized value (positive sign — the fitter minimizes the negative log likelihood internally and negates before reporting). `null` when `converged=false` |
+| `n_train` | `int` | Count of finite `(period, position)` training pairs used. NaN cells (cold-start prefix periods) are excluded |
+| `converged` | `bool` | `true` when the optimizer reported success AND produced finite hyperparameters. `false` otherwise — see below for the failure modes |
+
+### Records emitted
+
+* One `scope_type="archetype"` record per archetype the config
+  declares. The fit consumes the archetype's *clean* trajectory (no
+  overrides, no cold-start shift) so the kernel characterizes the
+  archetype's intrinsic shape, not any individual entity's realized
+  data.
+* One `scope_type="entity"` record per entity carrying a non-`None`
+  `overrides` field. Default-trajectory entities do *not* produce
+  per-entity records — only override-bearing entities do.
+
+### When `converged=false`
+
+The optimizer's failure paths are surfaced as a non-fatal record (the
+manifest build never raises on a failed fit):
+
+* **Flat trajectory** — variance below the floor (≈ `1e-12`). The RBF
+  likelihood surface is degenerate when the signal has no variance to
+  fit.
+* **Sparse data** — fewer than three finite training points (the
+  kernel has three hyperparameters; under-determined fits are
+  short-circuited).
+* **Optimizer failure** — `scipy.optimize.minimize` reports
+  non-success or returns a non-finite NLL.
+* **Numerical blow-up** — Cholesky factorization fails on the
+  covariance matrix despite the noise-variance floor.
+
+`converged=false` records carry an empty `hyperparameters` dict and a
+`null` log marginal likelihood. Consumers should gate downstream
+usage on the flag rather than inspecting `hyperparameters` directly.
+
+**Use case** — characterize trajectory smoothness without re-fitting a
+GP downstream. A short `length_scale` (≪ `n_periods`) says the
+archetype oscillates or has fast transitions; a long `length_scale`
+(≈ `n_periods`) says it's gradual or monotone. Compare per-entity
+records against their parent archetype to detect override-driven
+shape divergence — a per-entity `length_scale` that disagrees with
+the archetype baseline is direct evidence that the override pushed
+the entity's curve onto a different smoothness regime.
+
+---
+
 ## Reading the manifest in Python
 
 ```python
diff --git a/docs/site/user-guide/archetypes.md b/docs/site/user-guide/archetypes.md
index 5737f1b..6eef991 100644
--- a/docs/site/user-guide/archetypes.md
+++ b/docs/site/user-guide/archetypes.md
@@ -231,6 +231,35 @@ controls peak height above `base`. Each cycle ramps from `base` to
 
 ---
 
+## What the manifest records about archetypes
+
+Every run emits three archetype-shaped sections in `manifest.json`
+([reference](../manifest-reference.md)) so a downstream consumer can
+characterize the engine's archetype mix without re-running it:
+
+* **`variance_partitions`** — one nested-ANOVA record per metric, with
+  `Entity.archetype` as the between-group axis. The
+  `fraction_between` value answers "how much of this metric's spread
+  comes from the latent archetype label vs. entity-level idiosyncrasy
+  vs. within-entity time-series noise?"
+* **`variance_partitions_by_segment`** — the same decomposition with
+  curve segment as the axis, computed per archetype (segments are
+  never pooled across archetypes). Surfaces which curve phases
+  contribute most to a metric's spread within an archetype.
+* **`gp_kernel_fits`** — RBF Gaussian-process fit over each
+  archetype's clean trajectory. The recovered `length_scale`
+  characterizes the archetype's smoothness — short for oscillating /
+  fast-transition shapes, long for monotone or gradual ones. Entities
+  carrying trajectory `overrides` (per-entity inflection shifts) get
+  their own per-entity record alongside the archetype baseline so
+  override-driven shape divergence is directly visible.
+
+Configs without metrics emit empty containers for all three sections.
+Flat trajectories produce a `converged: false` GP record with null
+hyperparameters — the build never aborts on a degenerate fit.
+
+---
+
 ## What to read next
 
 - [Metrics and connections](./metrics-and-connections.md) — the metric
diff --git a/plotsim/gp.py b/plotsim/gp.py
new file mode 100644
index 0000000..9a53790
--- /dev/null
+++ b/plotsim/gp.py
@@ -0,0 +1,216 @@
+"""plotsim.gp — RBF Gaussian-process kernel fit for trajectory characterization.
+
+Mission 026: emits a per-archetype (or per-entity for override-bearing
+entities) RBF kernel fit into the manifest so downstream consumers can
+characterize trajectory smoothness (length scale, signal variance) without
+re-fitting their own GP against the realized metric values.
+
+This module is analytical-only and stateless:
+
+  * No filesystem I/O.
+  * No engine state — callers translate trajectory arrays into ``(x, y)``
+    training data and pass them in.
+  * No RNG — same input → same fit result.
+
+The kernel is RBF (squared exponential) plus a homoscedastic noise term:
+
+    k(t, t') = signal_variance * exp(-0.5 * (t - t')² / length_scale²)
+             + noise_variance * I
+
+Hyperparameters are optimized in log-space (``L-BFGS-B``) for numerical
+stability — positivity is guaranteed by the parametrization. Degenerate
+inputs (constant trajectories, ``< 3`` finite training points,
+zero-range support, Cholesky failures, optimizer non-convergence) are
+caught and surfaced as ``RBFFitResult(converged=False, hyperparameters=None,
+log_marginal_likelihood=None)`` — never as exceptions. This is the
+contract the manifest builder relies on: a failed fit must not abort
+manifest emission.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional
+
+import numpy as np
+from scipy.optimize import minimize
+
+_LOG_TWO_PI = float(np.log(2.0 * np.pi))
+# Trajectories whose variance is below this floor are treated as flat —
+# the RBF likelihood surface is degenerate (signal_variance → 0 floor)
+# and fitting has no informational content.
+_FLAT_VARIANCE_FLOOR = 1e-12
+# Log-space bounds for each hyperparameter. Keeps the optimizer away from
+# numerically unstable regions where Cholesky would fail.
+_LOG_HYPER_MIN = -8.0
+_LOG_HYPER_MAX = 8.0
+# Jitter added to the kernel diagonal for numerical stability beyond the
+# noise term itself — keeps near-singular matrices factorizable.
+_JITTER = 1e-10
+
+
+@dataclass(frozen=True)
+class RBFFitResult:
+    """Outcome of one RBF kernel fit.
+
+    ``converged=True`` means the optimizer reported success AND produced
+    finite hyperparameters. ``converged=False`` is the catch-all for
+    every degenerate path; consumers should gate downstream usage on the
+    flag rather than inspecting ``hyperparameters`` directly.
+    """
+
+    converged: bool
+    hyperparameters: Optional[dict[str, float]]
+    log_marginal_likelihood: Optional[float]
+    n_train: int
+
+
+def _neg_log_marginal_likelihood(
+    log_theta: np.ndarray,
+    y_centered: np.ndarray,
+    d2: np.ndarray,
+) -> float:
+    """Negative log marginal likelihood for the RBF + noise kernel.
+
+    ``d2`` is the pairwise squared-distance matrix of the training inputs;
+    ``y_centered`` is the training targets with the empirical mean
+    subtracted (so the GP prior mean of zero is appropriate). Returns
+    ``1e10`` as a soft penalty for Cholesky failures so the optimizer
+    backs off the bad region instead of crashing.
+    """
+    log_length_scale, log_signal_var, log_noise_var = log_theta
+    length_scale = float(np.exp(log_length_scale))
+    signal_var = float(np.exp(log_signal_var))
+    noise_var = float(np.exp(log_noise_var))
+    n = y_centered.shape[0]
+    k = signal_var * np.exp(-0.5 * d2 / (length_scale * length_scale))
+    k = k + (noise_var + _JITTER) * np.eye(n)
+    try:
+        chol = np.linalg.cholesky(k)
+    except np.linalg.LinAlgError:
+        return 1e10
+    alpha = np.linalg.solve(chol.T, np.linalg.solve(chol, y_centered))
+    log_det = 2.0 * float(np.sum(np.log(np.diag(chol))))
+    nll = 0.5 * float(y_centered @ alpha) + 0.5 * log_det + 0.5 * float(n) * _LOG_TWO_PI
+    if not np.isfinite(nll):
+        return 1e10
+    return float(nll)
+
+
+def fit_rbf(x: np.ndarray, y: np.ndarray) -> RBFFitResult:
+    """Fit an RBF kernel + homoscedastic noise to ``(x, y)``.
+
+    Args:
+        x: 1-D training inputs (typically period indices as float64).
+        y: 1-D training targets (typically trajectory positions in [0, 1]).
+
+    Returns:
+        ``RBFFitResult``. Non-finite cells in either ``x`` or ``y`` are
+        masked out before fitting. Returns ``converged=False`` (and null
+        hyperparameters / likelihood) when:
+
+          * fewer than 3 finite training points remain after masking;
+          * the masked ``y`` has variance below ``_FLAT_VARIANCE_FLOOR``;
+          * the masked ``x`` has zero range (all inputs identical);
+          * the optimizer fails to converge OR returns a non-finite NLL.
+
+        On success ``hyperparameters`` carries the three keys
+        ``length_scale``, ``signal_variance``, ``noise_variance`` (in the
+        original unstandardized scale) and ``log_marginal_likelihood``
+        carries the maximized value (positive sign — the function
+        minimizes the *negative* log likelihood and the result is
+        negated before reporting).
+    """
+    x_arr = np.asarray(x, dtype=np.float64)
+    y_arr = np.asarray(y, dtype=np.float64)
+    if x_arr.shape != y_arr.shape or x_arr.ndim != 1:
+        return RBFFitResult(
+            converged=False,
+            hyperparameters=None,
+            log_marginal_likelihood=None,
+            n_train=0,
+        )
+    mask = np.isfinite(x_arr) & np.isfinite(y_arr)
+    xf = x_arr[mask]
+    yf = y_arr[mask]
+    n = int(xf.size)
+    if n < 3:
+        return RBFFitResult(
+            converged=False,
+            hyperparameters=None,
+            log_marginal_likelihood=None,
+            n_train=n,
+        )
+    y_var = float(np.var(yf))
+    if y_var < _FLAT_VARIANCE_FLOOR:
+        return RBFFitResult(
+            converged=False,
+            hyperparameters=None,
+            log_marginal_likelihood=None,
+            n_train=n,
+        )
+    x_range = float(xf.max() - xf.min())
+    if x_range <= 0.0:
+        return RBFFitResult(
+            converged=False,
+            hyperparameters=None,
+            log_marginal_likelihood=None,
+            n_train=n,
+        )
+
+    yf_centered = yf - float(np.mean(yf))
+    diff = xf[:, None] - xf[None, :]
+    d2 = diff * diff
+
+    init_length_scale = max(x_range / 4.0, 1.0)
+    init_signal_var = max(y_var, 1e-6)
+    init_noise_var = max(y_var * 1e-3, 1e-8)
+    log_theta0 = np.array(
+        [
+            float(np.log(init_length_scale)),
+            float(np.log(init_signal_var)),
+            float(np.log(init_noise_var)),
+        ],
+        dtype=np.float64,
+    )
+    bounds = [
+        (_LOG_HYPER_MIN, _LOG_HYPER_MAX),
+        (_LOG_HYPER_MIN, _LOG_HYPER_MAX),
+        (_LOG_HYPER_MIN, _LOG_HYPER_MAX),
+    ]
+    try:
+        result = minimize(
+            _neg_log_marginal_likelihood,
+            log_theta0,
+            args=(yf_centered, d2),
+            method="L-BFGS-B",
+            bounds=bounds,
+        )
+    except (ValueError, np.linalg.LinAlgError):
+        return RBFFitResult(
+            converged=False,
+            hyperparameters=None,
+            log_marginal_likelihood=None,
+            n_train=n,
+        )
+    if not bool(result.success) or not np.isfinite(result.fun):
+        return RBFFitResult(
+            converged=False,
+            hyperparameters=None,
+            log_marginal_likelihood=None,
+            n_train=n,
+        )
+    log_length_scale, log_signal_var, log_noise_var = result.x
+    return RBFFitResult(
+        converged=True,
+        hyperparameters={
+            "length_scale": float(np.exp(log_length_scale)),
+            "signal_variance": float(np.exp(log_signal_var)),
+            "noise_variance": float(np.exp(log_noise_var)),
+        },
+        log_marginal_likelihood=float(-result.fun),
+        n_train=n,
+    )
+
+
+__all__ = ["RBFFitResult", "fit_rbf"]
diff --git a/plotsim/manifest.py b/plotsim/manifest.py
index bcd695f..ab8e28d 100644
--- a/plotsim/manifest.py
+++ b/plotsim/manifest.py
@@ -722,6 +722,115 @@ class RegressionPair(_ManifestBase):
     n_observations: int
 
 
+class VariancePartition(_ManifestBase):
+    """Mission 026: nested-ANOVA variance partition for one metric.
+
+    A three-level decomposition of the realized metric series:
+
+      * ``ss_between`` — variance attributable to the grouping axis. For
+        ``scope="archetype"`` the axis is ``Entity.archetype`` (variance
+        between archetype means around the grand mean). For
+        ``scope="segment"`` the axis is the curve segment index within
+        ``scope_name``'s archetype (variance between segment means
+        around the archetype's grand mean).
+      * ``ss_within_entity`` — variance between entity means within the
+        same group. Captures entity-to-entity dispersion that the
+        grouping axis does not explain.
+      * ``ss_residual`` — within-entity variance over time, residual to
+        the entity's own mean within its group. The unexplained
+        time-series noise floor.
+
+    The three sum to ``ss_total = Σ (y - grand_mean)²`` exactly modulo
+    floating-point rounding (rtol ≈ 1e-10 holds in practice).
+    ``fraction_*`` fields are each ``ss_* / ss_total`` (or zero when
+    ``ss_total == 0``).
+
+    Degrees of freedom follow the standard nested-ANOVA convention:
+
+      * ``degrees_of_freedom_between = n_groups - 1``
+      * ``degrees_of_freedom_within = n_cells - n_groups`` (cells =
+        unique ``(group, entity)`` pairs that contributed observations)
+      * ``degrees_of_freedom_residual = n_observations - n_cells``
+
+    Cold-start entities (those with ``Entity.start_period > 0``) and
+    other NaN-cell sources are excluded cell-by-cell — only finite
+    observations contribute. ``cold_start_entities_excluded`` counts
+    entities that had at least one NaN cell in this partition's
+    domain; it surfaces "this section dropped data" to the consumer
+    without forcing them to re-derive the NaN count.
+
+    ``scope_name`` semantics:
+
+      * ``scope="archetype"`` — single record per metric covering every
+        archetype. ``scope_name`` is the literal sentinel ``"all"``.
+      * ``scope="segment"`` — one record per ``(metric, archetype)``.
+        ``scope_name`` is the archetype name, and ``ss_between`` /
+        DOF reflect the segments within that archetype only. Segments
+        are NEVER pooled across archetypes — the manifest guarantees
+        the archetype-locality of every segment-scope record.
+    """
+
+    metric: str
+    scope: str
+    scope_name: str
+    ss_between: float
+    ss_within_entity: float
+    ss_residual: float
+    fraction_between: float
+    fraction_within_entity: float
+    fraction_residual: float
+    degrees_of_freedom_between: int
+    degrees_of_freedom_within: int
+    degrees_of_freedom_residual: int
+    n_observations: int
+    cold_start_entities_excluded: int = 0
+
+
+class GPKernelFit(_ManifestBase):
+    """Mission 026: RBF Gaussian-process fit summary for one trajectory shape.
+
+    Emitted with ``scope_type="archetype"`` (one record per declared
+    archetype, fitted against the archetype's clean trajectory) plus
+    additional ``scope_type="entity"`` records for any entity carrying
+    a non-None ``overrides`` field (the entity's specific trajectory
+    diverges from the archetype baseline and warrants its own fit).
+
+      * ``scope_name`` — the archetype name (for archetype scope) or
+        the entity name (for entity scope).
+      * ``kernel_type`` — currently always ``"rbf"``. Reserved for a
+        future multi-kernel extension; consumers should still gate on
+        the value for forward compatibility.
+      * ``hyperparameters`` — three keys when ``converged=True``:
+        ``length_scale``, ``signal_variance``, ``noise_variance``. All
+        three are in the natural (unstandardized) scale —
+        ``length_scale`` is in units of period indices, so a value of
+        ``10.0`` means the kernel correlation length spans ten periods.
+        Empty dict when ``converged=False``.
+      * ``log_marginal_likelihood`` — the maximized value (positive
+        sign; the fitter minimizes the negative log likelihood
+        internally and negates before reporting). ``None`` when
+        ``converged=False``.
+      * ``n_train`` — count of finite ``(period, position)`` training
+        pairs used. NaN cells (cold-start prefix periods) are
+        excluded.
+      * ``converged`` — ``True`` when the optimizer reported success
+        AND produced finite hyperparameters. ``False`` for flat
+        trajectories (variance below ``1e-12``), trajectories with
+        fewer than three finite training points, optimizer failures,
+        and Cholesky-blow-up paths. Consumers should gate downstream
+        usage on this flag rather than inspecting hyperparameters
+        directly.
+    """
+
+    scope_type: str
+    scope_name: str
+    kernel_type: str
+    hyperparameters: dict[str, float] = Field(default_factory=dict)
+    log_marginal_likelihood: Optional[float] = None
+    n_train: int
+    converged: bool
+
+
 class HoldoutInfo(_ManifestBase):
     """M109: ground-truth record of the temporal holdout split.
 
@@ -883,6 +992,24 @@ class ManifestSchema(_ManifestBase):
     # OLS (rare; a single-entity archetype with cold-start NaN can
     # produce ``n_observations < 2``).
     regression_pairs_by_archetype: dict[str, list[RegressionPair]] = {}
+    # Mission 026: nested-ANOVA variance partitions with archetype as
+    # the between-group axis. One record per metric. Empty list when
+    # ``entity_metrics`` was not threaded or the config declares no
+    # metrics — matches the empty-section contract on no-metric runs.
+    variance_partitions: list[VariancePartition] = []
+    # Mission 026: nested-ANOVA variance partitions with curve segment
+    # as the between-group axis, computed per archetype (never pooled
+    # across archetypes). One record per ``(metric, archetype)`` pair
+    # whose entities contributed at least one finite observation.
+    # Empty list under the same conditions as ``variance_partitions``.
+    variance_partitions_by_segment: list[VariancePartition] = []
+    # Mission 026: per-archetype (and per-entity for entities with
+    # ``overrides``) RBF GP kernel fits over the trajectory shape.
+    # Empty list when ``config.metrics`` is empty (the section
+    # piggybacks on metric presence so no-metric configs stay
+    # byte-equivalent to the pre-026 lane modulo the schema version
+    # string and the new empty containers).
+    gp_kernel_fits: list[GPKernelFit] = []
 
 
 # --- Helpers -----------------------------------------------------------------
@@ -1348,6 +1475,388 @@ def _build_regression_pairs_by_archetype(
     return out
 
 
+# --- Variance partition / GP helpers (Mission 026) ---------------------------
+
+
+def _entity_segment_membership(
+    config: PlotsimConfig,
+    n_periods: int,
+) -> dict[str, list[int]]:
+    """Per-entity ``(period_index → segment_index)`` lookup table.
+
+    Returns one length-``n_periods`` list per entity. Entry ``p`` is the
+    curve segment index that period maps to under that entity's own
+    boundary computation (overrides + ``start_period`` applied), or
+    ``-1`` for periods before the entity's arrival (cold-start prefix)
+    and for entities whose archetype is unresolvable.
+
+    Reuses ``trajectory._segment_boundaries`` / ``_resolve_shift`` so
+    the segment definition is byte-equivalent to the trajectory the
+    engine actually generated.
+    """
+    # Local imports: ``plotsim.trajectory`` already imports from
+    # ``plotsim.config``, so importing it at module top wouldn't cycle —
+    # but keeping the imports local matches the pattern M25 used for
+    # ``_build_seasonal_factors``.
+    from plotsim.trajectory import _resolve_shift, _segment_boundaries
+
+    arch_by_name = {a.name: a for a in config.archetypes}
+    out: dict[str, list[int]] = {}
+    for entity in config.entities:
+        archetype = arch_by_name.get(entity.archetype)
+        membership = [-1] * n_periods
+        if archetype is None:
+            out[entity.name] = membership
+            continue
+        active_n = n_periods - entity.start_period
+        if active_n < 1:
+            out[entity.name] = membership
+            continue
+        overrides_dict = entity.overrides.model_dump() if entity.overrides is not None else None
+        shift = _resolve_shift(archetype, active_n, overrides_dict)
+        boundaries = _segment_boundaries(archetype, active_n, shift)
+        for seg_idx in range(len(archetype.curve_segments)):
+            local_start = boundaries[seg_idx]
+            local_end = boundaries[seg_idx + 1]
+            absolute_start = entity.start_period + local_start
+            absolute_end = entity.start_period + local_end
+            for p in range(absolute_start, absolute_end):
+                if 0 <= p < n_periods:
+                    membership[p] = seg_idx
+        out[entity.name] = membership
+    return out
+
+
+def _three_level_anova(
+    cells: list[tuple[str, str, np.ndarray]],
+) -> Optional[dict[str, float]]:
+    """Compute the three-level nested-ANOVA SS / DOF decomposition.
+
+    ``cells`` is a list of ``(group_label, entity_label, values)``
+    tuples. Each tuple represents one ``(group, entity)`` cell with at
+    least one finite observation. The decomposition is:
+
+        ss_total = ss_between + ss_within_entity + ss_residual
+
+    where
+
+      * ``ss_between`` is the variance between group means around the
+        grand mean (weighted by per-group N);
+      * ``ss_within_entity`` is the variance between entity means
+        around their group mean (weighted by per-cell N);
+      * ``ss_residual`` is the within-cell residual variance.
+
+    Returns ``None`` when ``cells`` is empty or carries zero
+    observations in total. The caller filters those out — the manifest
+    section omits records with no data rather than emitting zero-only
+    entries.
+    """
+    if not cells:
+        return None
+    all_values = np.concatenate([c[2] for c in cells])
+    n_total = int(all_values.size)
+    if n_total == 0:
+        return None
+    grand_mean = float(np.mean(all_values))
+    ss_total = float(np.sum((all_values - grand_mean) ** 2))
+
+    cell_n: list[int] = []
+    cell_mean: list[float] = []
+    cell_ss_residual: list[float] = []
+    group_labels: list[str] = []
+    for group_label, _entity_label, values in cells:
+        n = int(values.size)
+        if n == 0:
+            continue
+        mean = float(np.mean(values))
+        cell_n.append(n)
+        cell_mean.append(mean)
+        cell_ss_residual.append(float(np.sum((values - mean) ** 2)))
+        group_labels.append(group_label)
+
+    if not cell_n:
+        return None
+
+    group_totals: dict[str, tuple[float, int]] = {}
+    for label, n, mean in zip(group_labels, cell_n, cell_mean):
+        prev_sum, prev_n = group_totals.get(label, (0.0, 0))
+        group_totals[label] = (prev_sum + n * mean, prev_n + n)
+    group_mean_by_label: dict[str, float] = {
+        label: (s / n if n > 0 else 0.0) for label, (s, n) in group_totals.items()
+    }
+
+    ss_between = 0.0
+    for label, (_, n) in group_totals.items():
+        diff = group_mean_by_label[label] - grand_mean
+        ss_between += float(n) * diff * diff
+
+    ss_within_entity = 0.0
+    for label, n, mean in zip(group_labels, cell_n, cell_mean):
+        diff = mean - group_mean_by_label[label]
+        ss_within_entity += float(n) * diff * diff
+
+    ss_residual = float(sum(cell_ss_residual))
+
+    n_groups = len(group_totals)
+    n_cells = len(cell_n)
+    df_between = max(n_groups - 1, 0)
+    df_within = max(n_cells - n_groups, 0)
+    df_residual = max(n_total - n_cells, 0)
+
+    return {
+        "ss_total": ss_total,
+        "ss_between": ss_between,
+        "ss_within_entity": ss_within_entity,
+        "ss_residual": ss_residual,
+        "df_between": float(df_between),
+        "df_within": float(df_within),
+        "df_residual": float(df_residual),
+        "n_observations": float(n_total),
+    }
+
+
+def _make_variance_partition(
+    metric: str,
+    scope: str,
+    scope_name: str,
+    anova: dict[str, float],
+    cold_start_entities_excluded: int,
+) -> VariancePartition:
+    """Translate an ``_three_level_anova`` result dict into a manifest record."""
+    ss_total = anova["ss_total"]
+    if ss_total > 0.0:
+        f_between = anova["ss_between"] / ss_total
+        f_within = anova["ss_within_entity"] / ss_total
+        f_residual = anova["ss_residual"] / ss_total
+    else:
+        f_between = 0.0
+        f_within = 0.0
+        f_residual = 0.0
+    return VariancePartition(
+        metric=metric,
+        scope=scope,
+        scope_name=scope_name,
+        ss_between=float(anova["ss_between"]),
+        ss_within_entity=float(anova["ss_within_entity"]),
+        ss_residual=float(anova["ss_residual"]),
+        fraction_between=float(f_between),
+        fraction_within_entity=float(f_within),
+        fraction_residual=float(f_residual),
+        degrees_of_freedom_between=int(anova["df_between"]),
+        degrees_of_freedom_within=int(anova["df_within"]),
+        degrees_of_freedom_residual=int(anova["df_residual"]),
+        n_observations=int(anova["n_observations"]),
+        cold_start_entities_excluded=int(cold_start_entities_excluded),
+    )
+
+
+def _build_variance_partitions_archetype(
+    config: PlotsimConfig,
+    entity_metrics: dict[str, dict[str, np.ndarray]],
+) -> list[VariancePartition]:
+    """One ``VariancePartition`` per metric with archetype as the group axis.
+
+    Groups every finite ``(entity, period)`` observation by
+    ``Entity.archetype``. Drops metrics with no finite observations
+    entirely — the section omits records with no data rather than
+    emitting zero-only entries (mirrors the pre-existing manifest
+    convention for sparse derived sections).
+    """
+    if not config.metrics or not entity_metrics:
+        return []
+    out: list[VariancePartition] = []
+    for metric in config.metrics:
+        cells: list[tuple[str, str, np.ndarray]] = []
+        cold_start = 0
+        for entity in config.entities:
+            per_metric = entity_metrics.get(entity.name)
+            if per_metric is None:
+                continue
+            arr = per_metric.get(metric.name)
+            if arr is None:
+                continue
+            arr64 = np.asarray(arr, dtype=np.float64)
+            mask = np.isfinite(arr64)
+            n_kept = int(np.sum(mask))
+            if n_kept < int(arr64.size):
+                cold_start += 1
+            if n_kept == 0:
+                continue
+            cells.append((entity.archetype, entity.name, arr64[mask]))
+        anova = _three_level_anova(cells)
+        if anova is None:
+            continue
+        out.append(
+            _make_variance_partition(
+                metric.name,
+                scope="archetype",
+                scope_name="all",
+                anova=anova,
+                cold_start_entities_excluded=cold_start,
+            )
+        )
+    out.sort(key=lambda v: v.metric)
+    return out
+
+
+def _build_variance_partitions_by_segment(
+    config: PlotsimConfig,
+    entity_metrics: dict[str, dict[str, np.ndarray]],
+    n_periods: int,
+) -> list[VariancePartition]:
+    """One ``VariancePartition`` per ``(metric, archetype)`` with segment as the group axis.
+
+    Restricted to entities of one archetype at a time — the section
+    never pools observations across archetypes. The segment index is
+    derived from each entity's own boundary computation
+    (``_entity_segment_membership``), so cold-start entities and
+    override-bearing entities contribute observations to the segment
+    they actually occupied at each period, not to the archetype
+    baseline segment.
+    """
+    if not config.metrics or not entity_metrics:
+        return []
+    arch_by_name = {a.name: a for a in config.archetypes}
+    entities_by_archetype: dict[str, list[Any]] = {}
+    for entity in config.entities:
+        entities_by_archetype.setdefault(entity.archetype, []).append(entity)
+
+    membership = _entity_segment_membership(config, n_periods)
+
+    out: list[VariancePartition] = []
+    for metric in config.metrics:
+        for archetype_name in sorted(entities_by_archetype.keys()):
+            if archetype_name not in arch_by_name:
+                continue
+            entities = entities_by_archetype[archetype_name]
+            grouped: dict[tuple[int, str], list[float]] = {}
+            cold_start = 0
+            for entity in entities:
+                per_metric = entity_metrics.get(entity.name)
+                if per_metric is None:
+                    continue
+                arr = per_metric.get(metric.name)
+                if arr is None:
+                    continue
+                arr64 = np.asarray(arr, dtype=np.float64)
+                seg_map = membership.get(entity.name, [-1] * n_periods)
+                has_nan = False
+                n_periods_local = int(arr64.size)
+                for p in range(n_periods_local):
+                    val = arr64[p]
+                    if not np.isfinite(val):
+                        has_nan = True
+                        continue
+                    seg_idx = seg_map[p] if p < len(seg_map) else -1
+                    if seg_idx < 0:
+                        continue
+                    grouped.setdefault((seg_idx, entity.name), []).append(float(val))
+                if has_nan:
+                    cold_start += 1
+            if not grouped:
+                continue
+            cells = [
+                (
+                    f"segment_{seg_idx}",
+                    entity_name,
+                    np.asarray(values, dtype=np.float64),
+                )
+                for (seg_idx, entity_name), values in grouped.items()
+            ]
+            anova = _three_level_anova(cells)
+            if anova is None:
+                continue
+            out.append(
+                _make_variance_partition(
+                    metric.name,
+                    scope="segment",
+                    scope_name=archetype_name,
+                    anova=anova,
+                    cold_start_entities_excluded=cold_start,
+                )
+            )
+    out.sort(key=lambda v: (v.metric, v.scope_name))
+    return out
+
+
+def _build_gp_kernel_fits(
+    config: PlotsimConfig,
+    n_periods: int,
+) -> list[GPKernelFit]:
+    """Per-archetype (and per-override-entity) RBF GP fits.
+
+    ``scope_type="archetype"`` for every archetype: fit against the
+    archetype's clean trajectory (no overrides, no cold-start shift).
+    ``scope_type="entity"`` for every entity carrying a non-None
+    ``overrides`` field — those entities have trajectories that diverge
+    from their archetype baseline, so a per-entity record captures the
+    actual shape the engine generated.
+
+    The piggyback on ``config.metrics`` (an empty list skips emission
+    entirely) matches the byte-equivalence contract: configs with no
+    metrics emit no GP records.
+
+    Records are emitted even when the fit fails to converge — the
+    ``converged=False`` path carries null hyperparameters / likelihood
+    so consumers can distinguish "fit failed" from "fit not attempted."
+    """
+    if not config.metrics or n_periods < 1:
+        return []
+    # Local imports: ``plotsim.gp`` is a leaf analytical module that
+    # depends on scipy; importing at module top would force scipy load
+    # on every manifest build, including no-metric runs.
+    from plotsim.gp import fit_rbf
+    from plotsim.trajectory import compute_trajectory
+
+    out: list[GPKernelFit] = []
+    arch_by_name = {a.name: a for a in config.archetypes}
+    x = np.arange(n_periods, dtype=np.float64)
+
+    for archetype_name in sorted(arch_by_name.keys()):
+        archetype = arch_by_name[archetype_name]
+        traj = compute_trajectory(archetype, n_periods)
+        result = fit_rbf(x, traj)
+        out.append(
+            GPKernelFit(
+                scope_type="archetype",
+                scope_name=archetype_name,
+                kernel_type="rbf",
+                hyperparameters=result.hyperparameters or {},
+                log_marginal_likelihood=result.log_marginal_likelihood,
+                n_train=result.n_train,
+                converged=result.converged,
+            )
+        )
+
+    for entity in config.entities:
+        if entity.overrides is None:
+            continue
+        entity_archetype = arch_by_name.get(entity.archetype)
+        if entity_archetype is None:
+            continue
+        overrides_dict = entity.overrides.model_dump()
+        traj = compute_trajectory(
+            entity_archetype,
+            n_periods,
+            overrides_dict,
+            entity.start_period,
+        )
+        result = fit_rbf(x, traj)
+        out.append(
+            GPKernelFit(
+                scope_type="entity",
+                scope_name=entity.name,
+                kernel_type="rbf",
+                hyperparameters=result.hyperparameters or {},
+                log_marginal_likelihood=result.log_marginal_likelihood,
+                n_train=result.n_train,
+                converged=result.converged,
+            )
+        )
+
+    return out
+
+
 # --- Build / write -----------------------------------------------------------
 
 
@@ -1736,6 +2245,27 @@ def build_manifest(
         regression_pairs_global = []
         regression_pairs_by_archetype = {}
 
+    # Mission 026: nested-ANOVA variance partitions (archetype-level and
+    # segment-within-archetype) plus per-archetype / per-override-entity
+    # GP kernel fits. All three sections piggyback on the same call-site
+    # contract: variance partitions need ``entity_metrics``; GP fits
+    # piggyback on ``config.metrics`` being non-empty so a no-metric
+    # config emits empty sections (the byte-equivalence contract).
+    if entity_metrics is not None:
+        variance_partitions = _build_variance_partitions_archetype(
+            config,
+            entity_metrics,
+        )
+        variance_partitions_by_segment = _build_variance_partitions_by_segment(
+            config,
+            entity_metrics,
+            n_periods,
+        )
+    else:
+        variance_partitions = []
+        variance_partitions_by_segment = []
+    gp_kernel_fits = _build_gp_kernel_fits(config, n_periods)
+
     return ManifestSchema(
         schema_version=MANIFEST_SCHEMA_VERSION,
         seed=int(config.seed),
@@ -1761,6 +2291,9 @@ def build_manifest(
         seasonal_decomposition=seasonal_decomposition,
         regression_pairs_global=regression_pairs_global,
         regression_pairs_by_archetype=regression_pairs_by_archetype,
+        variance_partitions=variance_partitions,
+        variance_partitions_by_segment=variance_partitions_by_segment,
+        gp_kernel_fits=gp_kernel_fits,
     )
 
 
@@ -1794,6 +2327,7 @@ def write_manifest(manifest: ManifestSchema, output_dir: Path) -> Path:
     "CorrelationCompensation",
     "CorrelationEntry",
     "EntityArchetypeAssignment",
+    "GPKernelFit",
     "TreatmentAssignment",
     "TreatmentCohort",
     "EventFiring",
@@ -1808,6 +2342,7 @@ def write_manifest(manifest: ManifestSchema, output_dir: Path) -> Path:
     "SeasonalDecomposition",
     "SourceEntityMapping",
     "TrajectorySample",
+    "VariancePartition",
     "build_manifest",
     "config_sha256",
     "write_manifest",
diff --git a/tests/test_manifest_variance_gp.py b/tests/test_manifest_variance_gp.py
new file mode 100644
index 0000000..8664427
--- /dev/null
+++ b/tests/test_manifest_variance_gp.py
@@ -0,0 +1,752 @@
+"""Manifest enrichment: nested-ANOVA variance partition + RBF GP kernel fit.
+
+Mission 026 adds three additive sections to ``ManifestSchema`` (no schema
+version bump — still 1.10):
+
+* ``variance_partitions`` — nested-ANOVA decomposition per metric with
+  ``Entity.archetype`` as the between-group axis. ``ss_between +
+  ss_within_entity + ss_residual == ss_total`` exactly (modulo
+  floating-point rounding).
+* ``variance_partitions_by_segment`` — same decomposition with curve-
+  segment-within-archetype as the axis. Per-archetype only; segments are
+  never pooled across archetypes.
+* ``gp_kernel_fits`` — per-archetype RBF kernel fits over the trajectory
+  shape, plus per-entity records for entities carrying ``overrides``.
+
+This module locks in:
+
+* ANOVA identity (``ss_between + ss_within_entity + ss_residual ==
+  ss_total``) for the archetype scope at ``rtol=1e-10``.
+* Per-archetype within-segment computation never pools across archetypes
+  (a multi-archetype config emits one record per archetype, each
+  restricted to its own entities).
+* Cold-start NaN cells are excluded from the partition and the
+  ``cold_start_entities_excluded`` counter surfaces the dropped count.
+* Empty sections for no-metric configs.
+* RBF length scale recovered on a sinusoidal trajectory is shorter than
+  the total period span (captures the periodicity).
+* Sigmoid trajectory fit converges with a length scale consistent with
+  monotone smoothness.
+* Flat trajectory: ``converged=False`` with null hyperparameters; the
+  manifest build does not raise.
+* Entity-level trajectory overrides emit ``scope_type="entity"`` records
+  alongside the archetype baseline.
+* No new ``ManifestSchema`` field outside the three Mission 026
+  additions — guards against future additive drift.
+"""
+
+from __future__ import annotations
+
+import warnings
+
+import numpy as np
+
+from plotsim import generate_tables_with_state
+from plotsim.config import (
+    Archetype,
+    Column,
+    CurveSegment,
+    Domain,
+    Entity,
+    EntityOverrides,
+    Metric,
+    OutputConfig,
+    PlotsimConfig,
+    SurrogateKeyWarning,
+    Table,
+    TimeWindow,
+)
+from plotsim.gp import fit_rbf
+from plotsim.manifest import (
+    GPKernelFit,
+    MANIFEST_SCHEMA_VERSION,
+    VariancePartition,
+    build_manifest,
+)
+
+
+# --- Fixtures --------------------------------------------------------------
+
+
+def _config_with_archetypes(
+    archetypes: list[Archetype],
+    entities: list[Entity],
+    *,
+    metrics: list[Metric] | None = None,
+) -> PlotsimConfig:
+    """Engine config with caller-supplied archetypes / entities.
+
+    Defaults to a single beta-distributed metric ``m1`` so the realized
+    ``entity_metrics`` arrays have well-conditioned variance for the
+    ANOVA decomposition. Pass ``metrics=[]`` to drive a no-metric run
+    (variance partitions and GP fits both empty in that path).
+    """
+    if metrics is None:
+        metrics = [
+            Metric(
+                name="m1",
+                label="m1",
+                distribution="beta",
+                params={"alpha": 2.0, "beta": 5.0},
+                polarity="positive",
+            ),
+        ]
+    if metrics:
+        fct_columns = [
+            Column(name="date_key", dtype="id", source="fk:dim_date.date_key"),
+            Column(name="entity_id", dtype="id", source="fk:dim_entity.entity_id"),
+        ] + [Column(name=m.name, dtype="float", source=f"metric:{m.name}") for m in metrics]
+        fct = Table(
+            name="fct_m",
+            type="fact",
+            grain="per_entity_per_period",
+            primary_key=["date_key", "entity_id"],
+            foreign_keys=["dim_date.date_key", "dim_entity.entity_id"],
+            columns=fct_columns,
+        )
+        tables = [
+            Table(
+                name="dim_date",
+                type="dim",
+                grain="per_period",
+                primary_key="date_key",
+                columns=[
+                    Column(name="date_key", dtype="id", source="pk"),
+                    Column(name="date", dtype="date", source="generated:date_key"),
+                ],
+            ),
+            Table(
+                name="dim_entity",
+                type="dim",
+                grain="per_entity",
+                primary_key="entity_id",
+                columns=[Column(name="entity_id", dtype="id", source="pk")],
+            ),
+            fct,
+        ]
+    else:
+        tables = [
+            Table(
+                name="dim_date",
+                type="dim",
+                grain="per_period",
+                primary_key="date_key",
+                columns=[
+                    Column(name="date_key", dtype="id", source="pk"),
+                    Column(name="date", dtype="date", source="generated:date_key"),
+                ],
+            ),
+            Table(
+                name="dim_entity",
+                type="dim",
+                grain="per_entity",
+                primary_key="entity_id",
+                columns=[Column(name="entity_id", dtype="id", source="pk")],
+            ),
+        ]
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", SurrogateKeyWarning)
+        return PlotsimConfig(
+            domain=Domain(
+                name="t",
+                description="t",
+                entity_type="entity",
+                entity_label="Entities",
+            ),
+            time_window=TimeWindow(
+                start="2024-01",
+                end="2024-12",
+                granularity="monthly",
+            ),
+            seed=0,
+            metrics=metrics,
+            archetypes=archetypes,
+            entities=entities,
+            tables=tables,
+            output=OutputConfig(format="csv", directory="out/m26"),
+        )
+
+
+def _three_segment_archetype(name: str, levels: tuple[float, float, float]) -> Archetype:
+    """Three-segment plateau archetype with caller-specified levels.
+
+    Used as the variance-partition workhorse: distinct per-segment
+    levels guarantee positive between-segment variance, so the segment-
+    scope partition has non-trivial structure.
+    """
+    return Archetype(
+        name=name,
+        label=name,
+        description=f"three-segment plateau {levels}",
+        curve_segments=[
+            CurveSegment(
+                curve="plateau",
+                params={"level": levels[0]},
+                start_pct=0.0,
+                end_pct=0.34,
+            ),
+            CurveSegment(
+                curve="plateau",
+                params={"level": levels[1]},
+                start_pct=0.34,
+                end_pct=0.67,
+            ),
+            CurveSegment(
+                curve="plateau",
+                params={"level": levels[2]},
+                start_pct=0.67,
+                end_pct=1.0,
+            ),
+        ],
+    )
+
+
+def _flat_archetype(name: str = "flat", level: float = 0.5) -> Archetype:
+    return Archetype(
+        name=name,
+        label=name,
+        description="constant plateau",
+        curve_segments=[
+            CurveSegment(
+                curve="plateau",
+                params={"level": level},
+                start_pct=0.0,
+                end_pct=1.0,
+            ),
+        ],
+    )
+
+
+def _sigmoid_archetype(name: str = "sigmoid") -> Archetype:
+    """S-curve archetype: monotone rise from 0 to 1 over the window.
+
+    Sigmoid takes a ``rising: bool`` parameter; default ``True`` produces
+    the canonical 0→1 transition centered mid-window. The RBF GP fit on
+    this trajectory should converge with a length scale consistent with
+    monotone smoothness — neither very short (flat) nor very long
+    (periodic).
+    """
+    return Archetype(
+        name=name,
+        label=name,
+        description="sigmoid rise",
+        curve_segments=[
+            CurveSegment(
+                curve="sigmoid",
+                params={"rising": True},
+                start_pct=0.0,
+                end_pct=1.0,
+            ),
+        ],
+    )
+
+
+def _oscillating_archetype(name: str = "oscillating", period: int = 6) -> Archetype:
+    """Oscillating archetype: sinusoidal cycle.
+
+    ``period`` is the cycle length in periods. With ``n_periods=12`` and
+    ``period=6`` the trajectory exhibits two full cycles, so the RBF GP
+    fit's recovered length scale should be measurably shorter than the
+    full window length.
+    """
+    return Archetype(
+        name=name,
+        label=name,
+        description="sinusoidal cycle",
+        curve_segments=[
+            CurveSegment(
+                curve="oscillating",
+                params={"period": period},
+                start_pct=0.0,
+                end_pct=1.0,
+            ),
+        ],
+    )
+
+
+# --- Variance partition: ANOVA identity ------------------------------------
+
+
+def test_variance_partition_archetype_anova_identity():
+    """Acceptance #1: ``ss_between + ss_within_entity + ss_residual ==
+    ss_total`` (rtol=1e-10) for a config with two archetypes and one
+    metric. ``ss_total`` is computed from the same finite observations
+    the partition saw — guards against a decomposition that drops or
+    double-counts mass.
+    """
+    archetypes = [
+        _three_segment_archetype("a_low", (0.2, 0.4, 0.6)),
+        _three_segment_archetype("a_high", (0.5, 0.7, 0.9)),
+    ]
+    entities = [Entity(name=f"lo_{i}", archetype="a_low", size=1) for i in range(8)] + [
+        Entity(name=f"hi_{i}", archetype="a_high", size=1) for i in range(8)
+    ]
+    cfg = _config_with_archetypes(archetypes, entities)
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    assert len(manifest.variance_partitions) == 1
+    rec = manifest.variance_partitions[0]
+    assert rec.metric == "m1"
+    assert rec.scope == "archetype"
+    assert rec.scope_name == "all"
+    pool = np.concatenate([state.entity_metrics[e.name]["m1"] for e in cfg.entities])
+    pool = pool[np.isfinite(pool)]
+    ss_total = float(np.sum((pool - float(np.mean(pool))) ** 2))
+    ss_sum = rec.ss_between + rec.ss_within_entity + rec.ss_residual
+    assert np.isclose(ss_sum, ss_total, rtol=1e-10, atol=1e-10)
+    # Fractions sum to 1 within the same tolerance.
+    fraction_sum = rec.fraction_between + rec.fraction_within_entity + rec.fraction_residual
+    assert np.isclose(fraction_sum, 1.0, rtol=1e-10, atol=1e-10)
+    # n_observations equals the pool size after NaN masking.
+    assert rec.n_observations == int(pool.size)
+
+
+def test_variance_partition_by_segment_groups_within_archetype():
+    """Acceptance #2: ``variance_partitions_by_segment`` emits one record
+    per ``(metric, archetype)`` and ``scope_name`` is the archetype name.
+    Segments are grouped within their parent archetype — the section
+    never pools observations across archetypes.
+    """
+    archetypes = [
+        _three_segment_archetype("a_low", (0.2, 0.4, 0.6)),
+        _three_segment_archetype("a_high", (0.5, 0.7, 0.9)),
+    ]
+    entities = [Entity(name=f"lo_{i}", archetype="a_low", size=1) for i in range(8)] + [
+        Entity(name=f"hi_{i}", archetype="a_high", size=1) for i in range(8)
+    ]
+    cfg = _config_with_archetypes(archetypes, entities)
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    by_segment = manifest.variance_partitions_by_segment
+    # One record per (metric, archetype); one metric × two archetypes.
+    assert len(by_segment) == 2
+    scope_names = {r.scope_name for r in by_segment}
+    assert scope_names == {"a_low", "a_high"}
+    for rec in by_segment:
+        assert rec.metric == "m1"
+        assert rec.scope == "segment"
+        # The archetype-restricted observation count equals the per-
+        # archetype pool size (after NaN masking) — proves the segment
+        # scope never pools across archetypes.
+        names = [e.name for e in cfg.entities if e.archetype == rec.scope_name]
+        pool = np.concatenate([state.entity_metrics[n]["m1"] for n in names])
+        pool = pool[np.isfinite(pool)]
+        assert rec.n_observations == int(pool.size)
+        ss_total = float(np.sum((pool - float(np.mean(pool))) ** 2))
+        ss_sum = rec.ss_between + rec.ss_within_entity + rec.ss_residual
+        assert np.isclose(ss_sum, ss_total, rtol=1e-10, atol=1e-10)
+
+
+def test_variance_partition_segment_between_is_positive_for_distinct_levels():
+    """Within an archetype, three distinct plateau levels produce
+    positive between-segment variance. A bug that pooled segments into
+    a single group would yield ``ss_between == 0``.
+    """
+    archetypes = [_three_segment_archetype("layered", (0.1, 0.5, 0.9))]
+    entities = [Entity(name=f"e_{i}", archetype="layered", size=1) for i in range(10)]
+    cfg = _config_with_archetypes(archetypes, entities)
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    by_segment = manifest.variance_partitions_by_segment
+    assert len(by_segment) == 1
+    rec = by_segment[0]
+    assert rec.scope_name == "layered"
+    assert rec.ss_between > 0.0
+    # df_between for three segments = 3 - 1 = 2.
+    assert rec.degrees_of_freedom_between == 2
+
+
+# --- Cold-start handling ----------------------------------------------------
+
+
+def test_variance_partition_excludes_cold_start_periods():
+    """Acceptance #3: a config with cold-start entities (arrival offset
+    > 0) excludes NaN-padded periods. ``n_observations`` is less than
+    ``n_entities × n_periods`` and ``cold_start_entities_excluded``
+    counts the cold-start entities that contributed at least one NaN
+    cell.
+    """
+    archetypes = [_flat_archetype("flat", level=0.5)]
+    entities = [
+        Entity(name="e_warm", archetype="flat", size=1, start_period=0),
+        Entity(name="e_cold_1", archetype="flat", size=1, start_period=3),
+        Entity(name="e_cold_2", archetype="flat", size=1, start_period=6),
+    ]
+    cfg = _config_with_archetypes(archetypes, entities)
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    n_periods = len(tables["dim_date"])
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    rec = manifest.variance_partitions[0]
+    assert rec.n_observations < len(entities) * n_periods
+    # Exact: e_warm contributes n_periods, e_cold_1 contributes
+    # n_periods - 3, e_cold_2 contributes n_periods - 6.
+    expected = n_periods + (n_periods - 3) + (n_periods - 6)
+    assert rec.n_observations == expected
+    # Two of the three entities had at least one NaN cell.
+    assert rec.cold_start_entities_excluded == 2
+
+
+# --- Empty section guard ----------------------------------------------------
+
+
+def test_variance_partition_empty_when_no_metrics():
+    """Acceptance #7: a config with no metrics emits empty
+    ``variance_partitions`` and empty ``variance_partitions_by_segment``.
+    Same guarantee for ``gp_kernel_fits`` — the section piggybacks on
+    metric presence so no-metric configs are byte-equivalent to the
+    pre-M26 lane modulo the new empty containers.
+    """
+    archetypes = [_flat_archetype()]
+    entities = [Entity(name=f"e_{i}", archetype="flat", size=1) for i in range(3)]
+    cfg = _config_with_archetypes(archetypes, entities, metrics=[])
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    assert manifest.variance_partitions == []
+    assert manifest.variance_partitions_by_segment == []
+    assert manifest.gp_kernel_fits == []
+
+
+# --- GP fits: shape characterization ---------------------------------------
+
+
+def test_gp_fit_archetype_record_emitted_per_archetype():
+    """One ``scope_type="archetype"`` record per declared archetype in
+    sorted-name order. The fit consumes the archetype's clean
+    trajectory (no overrides, no cold-start shift) so the kernel
+    characterizes the archetype's shape rather than any individual
+    entity's realized data.
+    """
+    archetypes = [
+        _flat_archetype("flat", level=0.5),
+        _sigmoid_archetype("sigmoid"),
+    ]
+    entities = [Entity(name=f"e_{i}", archetype="flat", size=1) for i in range(3)] + [
+        Entity(name=f"s_{i}", archetype="sigmoid", size=1) for i in range(3)
+    ]
+    cfg = _config_with_archetypes(archetypes, entities)
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    archetype_fits = [f for f in manifest.gp_kernel_fits if f.scope_type == "archetype"]
+    assert {f.scope_name for f in archetype_fits} == {"flat", "sigmoid"}
+    for fit in archetype_fits:
+        assert fit.kernel_type == "rbf"
+
+
+def test_gp_fit_sigmoid_converges_and_length_scale_is_smooth():
+    """Acceptance #5 (variant): a sigmoid archetype produces
+    ``converged=True`` with a length scale on the order of the
+    transition's smoothness — not a short cycle, not the full window
+    span. The exact value is optimizer-dependent; pin only the
+    monotone-smoothness signal: length scale > 1 (not collapsed to the
+    period grid) AND length scale < total span (not infinite).
+    """
+    cfg = _config_with_archetypes(
+        archetypes=[_sigmoid_archetype("sigmoid")],
+        entities=[Entity(name="e_0", archetype="sigmoid", size=1)],
+    )
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    n_periods = len(tables["dim_date"])
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    fit = next(f for f in manifest.gp_kernel_fits if f.scope_name == "sigmoid")
+    assert fit.converged is True
+    assert fit.hyperparameters is not None
+    length_scale = fit.hyperparameters["length_scale"]
+    assert 1.0 < length_scale < float(n_periods)
+
+
+def test_gp_fit_oscillating_recovers_short_length_scale():
+    """Acceptance #4: an oscillating archetype's recovered length scale
+    is measurably shorter than the total period span — the kernel
+    captures the periodicity rather than averaging it out.
+    """
+    cfg = _config_with_archetypes(
+        archetypes=[_oscillating_archetype("oscillating", period=4)],
+        entities=[Entity(name="e_0", archetype="oscillating", size=1)],
+    )
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    n_periods = len(tables["dim_date"])
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    fit = next(f for f in manifest.gp_kernel_fits if f.scope_name == "oscillating")
+    assert fit.converged is True
+    assert fit.hyperparameters is not None
+    length_scale = fit.hyperparameters["length_scale"]
+    # Periodicity ≪ window span — recovered length scale must reflect
+    # the cycle, not the full window. n_periods = 12 (Jan–Dec 2024).
+    assert length_scale < float(n_periods) / 2.0
+
+
+def test_gp_fit_flat_trajectory_does_not_converge():
+    """Acceptance #5 (variant): a flat archetype produces
+    ``converged=False`` with empty hyperparameters and ``None`` log
+    marginal likelihood. The manifest build does not raise — the
+    failed fit is recorded as a non-fatal record.
+    """
+    cfg = _config_with_archetypes(
+        archetypes=[_flat_archetype("flat", level=0.5)],
+        entities=[Entity(name="e_0", archetype="flat", size=1)],
+    )
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    fit = next(f for f in manifest.gp_kernel_fits if f.scope_name == "flat")
+    assert fit.converged is False
+    assert fit.hyperparameters == {}
+    assert fit.log_marginal_likelihood is None
+
+
+def test_gp_fit_entity_override_emits_entity_scope_record():
+    """Acceptance #6: a config with at least one entity carrying
+    ``overrides`` emits a ``scope_type="entity"`` record for that
+    entity AND keeps the ``scope_type="archetype"`` baseline record
+    for the archetype. Other entities sharing the archetype do NOT
+    produce per-entity records — only override-bearing entities do.
+    """
+    cfg = _config_with_archetypes(
+        archetypes=[_sigmoid_archetype("sigmoid")],
+        entities=[
+            Entity(
+                name="e_override",
+                archetype="sigmoid",
+                size=1,
+                overrides=EntityOverrides(inflection_month=2),
+            ),
+            Entity(name="e_default", archetype="sigmoid", size=1),
+        ],
+    )
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    by_scope = {(f.scope_type, f.scope_name) for f in manifest.gp_kernel_fits}
+    assert ("archetype", "sigmoid") in by_scope
+    assert ("entity", "e_override") in by_scope
+    # Default-entity does NOT get its own per-entity record.
+    assert ("entity", "e_default") not in by_scope
+
+
+# --- Byte-equivalence guard ------------------------------------------------
+
+
+def test_no_metric_config_emits_empty_m26_sections():
+    """Configs that don't trigger either Mission 026 section produce
+    a manifest with all three new fields at their empty defaults. The
+    test is structurally identical to the M25 byte-equivalence guard:
+    pop the new fields and assert they were the only delta from the
+    pre-M26 wire shape.
+    """
+    cfg = _config_with_archetypes(
+        archetypes=[_flat_archetype()],
+        entities=[Entity(name=f"e_{i}", archetype="flat", size=1) for i in range(3)],
+        metrics=[],
+    )
+    rng = np.random.default_rng(cfg.seed)
+    tables, state = generate_tables_with_state(cfg, rng)
+    manifest = build_manifest(
+        cfg,
+        state.trajectories,
+        tables,
+        scd_state=state.scd,
+        bridge_state=state.bridges,
+        entity_metrics=state.entity_metrics,
+    )
+    payload = manifest.model_dump(mode="json")
+    assert payload.pop("variance_partitions") == []
+    assert payload.pop("variance_partitions_by_segment") == []
+    assert payload.pop("gp_kernel_fits") == []
+    assert payload["schema_version"] == "1.10"
+
+
+# --- Schema-pin guard ------------------------------------------------------
+
+
+def test_schema_version_still_pinned_at_1_10():
+    """Mission 026 adds fields without bumping the schema version. Pins
+    the constant so a stray bump would surface as an import-time typed
+    assertion failure rather than a downstream consumer rejecting the
+    payload at parse time.
+    """
+    assert MANIFEST_SCHEMA_VERSION == "1.10"
+
+
+def test_variance_partition_model_carries_required_fields():
+    """``VariancePartition`` exposes the documented field set. Guards
+    against a partial rename or a dropped field on a refactor.
+    """
+    rec = VariancePartition(
+        metric="m1",
+        scope="archetype",
+        scope_name="all",
+        ss_between=1.0,
+        ss_within_entity=1.0,
+        ss_residual=1.0,
+        fraction_between=0.33,
+        fraction_within_entity=0.33,
+        fraction_residual=0.34,
+        degrees_of_freedom_between=1,
+        degrees_of_freedom_within=10,
+        degrees_of_freedom_residual=100,
+        n_observations=120,
+        cold_start_entities_excluded=2,
+    )
+    payload = rec.model_dump(mode="json")
+    assert set(payload.keys()) == {
+        "metric",
+        "scope",
+        "scope_name",
+        "ss_between",
+        "ss_within_entity",
+        "ss_residual",
+        "fraction_between",
+        "fraction_within_entity",
+        "fraction_residual",
+        "degrees_of_freedom_between",
+        "degrees_of_freedom_within",
+        "degrees_of_freedom_residual",
+        "n_observations",
+        "cold_start_entities_excluded",
+    }
+
+
+def test_gp_kernel_fit_model_carries_required_fields():
+    """``GPKernelFit`` exposes the documented field set."""
+    rec = GPKernelFit(
+        scope_type="archetype",
+        scope_name="flat",
+        kernel_type="rbf",
+        hyperparameters={"length_scale": 1.0, "signal_variance": 1.0, "noise_variance": 0.1},
+        log_marginal_likelihood=-10.0,
+        n_train=12,
+        converged=True,
+    )
+    payload = rec.model_dump(mode="json")
+    assert set(payload.keys()) == {
+        "scope_type",
+        "scope_name",
+        "kernel_type",
+        "hyperparameters",
+        "log_marginal_likelihood",
+        "n_train",
+        "converged",
+    }
+
+
+# --- Direct GP module smoke ------------------------------------------------
+
+
+def test_fit_rbf_flat_signal_does_not_converge():
+    """``fit_rbf`` on a constant input returns
+    ``converged=False`` / null hyperparameters / null likelihood.
+    Direct unit test of the leaf module — guards against a future
+    refactor that drops the flat-variance short-circuit.
+    """
+    x = np.arange(20, dtype=np.float64)
+    y = np.full(20, 0.5, dtype=np.float64)
+    result = fit_rbf(x, y)
+    assert result.converged is False
+    assert result.hyperparameters is None
+    assert result.log_marginal_likelihood is None
+    assert result.n_train == 20
+
+
+def test_fit_rbf_sinusoidal_converges_with_short_length_scale():
+    """``fit_rbf`` on a sinusoidal signal recovers a length scale much
+    smaller than the total span. The exact value is optimizer-
+    dependent; pin only the order-of-magnitude relationship.
+    """
+    n = 60
+    x = np.arange(n, dtype=np.float64)
+    y = np.sin(2.0 * np.pi * x / 10.0).astype(np.float64)
+    result = fit_rbf(x, y)
+    assert result.converged is True
+    assert result.hyperparameters is not None
+    length_scale = result.hyperparameters["length_scale"]
+    assert length_scale < float(n) / 2.0
+
+
+def test_fit_rbf_fewer_than_three_points_does_not_converge():
+    """Edge case: under 3 finite training points produces
+    ``converged=False``. The kernel has 3 hyperparameters; fewer
+    observations than hyperparameters is degenerate.
+    """
+    x = np.array([0.0, 1.0], dtype=np.float64)
+    y = np.array([0.0, 1.0], dtype=np.float64)
+    result = fit_rbf(x, y)
+    assert result.converged is False
+    assert result.n_train == 2