From 92787c08ec22ff6a5969405adef3d2dea0de675b Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 00:19:42 +1000 Subject: [PATCH 01/10] docs(product): capability ladder, temporal-correlation spine + COP, PMO proposal adopted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture the Warpline capability ladder (Rung 0-4) as the roadmap backbone, the temporal-correlation spine and its consumer surface (the temporal common operating picture), and the PMO proposal that took the spine to the foundation. - roadmap.md: capability ladder; correlation spine RATIFIED (hub PDR-0025); the COP read surface; PM conditions inlined (squash-merge demo, episode ~= work-session, sequencing fence behind launch cutover + base impl). - PDR-0002: adopt the capability ladder as roadmap backbone. - PDR-0003: relay the hub ruling (PDR-0025) sponsoring warpline's temporal-episode axis into warpline's decision log, with all three PM conditions. - PMO proposal (adopted): §7 corrected — SHA-rewrite (squash/rebase) is distinct from rename (PDR-0021); the rename feed carries no rewrite reconciliation. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...temporal-correlation-spine-pmo-proposal.md | 183 +++++++++++++++++ .../0002-capability-ladder-roadmap.md | 60 ++++++ .../0003-hub-ruling-pdr-0025-relay.md | 69 +++++++ docs/product/roadmap.md | 184 ++++++++++++++++-- 4 files changed, 475 insertions(+), 21 deletions(-) create mode 100644 docs/integration/2026-06-16-temporal-correlation-spine-pmo-proposal.md create mode 100644 docs/product/decisions/0002-capability-ladder-roadmap.md create mode 100644 docs/product/decisions/0003-hub-ruling-pdr-0025-relay.md diff --git a/docs/integration/2026-06-16-temporal-correlation-spine-pmo-proposal.md b/docs/integration/2026-06-16-temporal-correlation-spine-pmo-proposal.md new file mode 100644 index 0000000..b3c1cf2 --- /dev/null +++ b/docs/integration/2026-06-16-temporal-correlation-spine-pmo-proposal.md @@ -0,0 +1,183 @@ +# PROPOSAL (for PMO / owner review) — Temporal Correlation Spine + +**Date:** 2026-06-16 +**Author:** Warpline product-owner session +**Status:** **ADOPTED — sponsored in full by the foundation (PDR-0025, 2026-06-16), +with three PM conditions.** Warpline is ratified as owner of the temporal-episode +axis (domain extension, not a new member, not a second identity authority). +Steps 1–2 (repo-local capture + reconstruction demo) are authorized as Warpline +autonomy under the ratified contract; the cross-member stamping convention remains +hub-authored and incrementally adopted (no sibling obligation freezes until the hub +authors it against Warpline's demonstrated reconstruction). The demo is now a +**validation/shaping gate, not go/no-go**. See `~/weft/pm/product/decisions/0025-sponsor-warpline-temporal-correlation-contract.md` +and warpline `docs/product/decisions/0003-hub-ruling-pdr-0025-relay.md`. + +> **PM conditions (must hold):** (1) the demo must exercise the **squash-merge/rebase** +> case, not a clean-history fixture; (2) define the **episode boundary** (resolving +> toward *episode ≈ work-session*) with an honest dirty-tree/detached-HEAD fallback; +> (3) **sequencing fence** — this is Rung-3/token-tier and sits *behind* the +> four-member launch cutover and Warpline's base-impl fast-follow. +**Audience:** weft PMO / hub; owner (admission & authority-split decisions) +**Related:** `~/weft/doctrine.md` (PDR-0023 seams hub-authored; §5 enrich-only; +§10 honesty invariant), `~/weft/sei-standard.md` (locked identity spine), +`~/weft/uri-scheme.md` (no registry/broker — closed by SEI), +`~/weft/pm/2026-06-13-warpline-interface-lock.md` (proven-need gate), +`~/warpline/docs/product/roadmap.md` (temporal correlation spine; Rung 3). + +--- + +## 1. Summary + +A single logical change ripples across the federation: code moves, a Filigree +issue changes state, a Wardline finding appears, a Legis attestation is recorded, +the Loomweave graph shifts. Today **no member can reconstruct that bundle** — +"these meta-changes were attached to *this* change" — because the events siblings +emit do not carry the originating change's git anchor. + +We propose a **hub-authored, hub-blessed convention**: *every member stamps the +originating `branch@sha` on the events it emits*, as optional metadata. Warpline — +the federation's temporal / change-impact authority — owns the **temporal- +correlation contract** (what the anchor means, what granularity, how the bundle +reconstructs) and performs the read-time join. **Git owns the key value; no member +mirrors another's data; no new identifier is minted.** + +This is the mechanism behind Warpline roadmap Rung 3 (empirical blast radius): it +turns *"what actually broke when X changed"* from fuzzy time-window correlation +into a deterministic join. + +**The payoff is a federation temporal common operating picture (COP).** With the +anchor in place, Warpline — as the temporal authority — answers a single +situational question for any frame (an edit, a rev range, a time window, a +`branch@sha`, a SEI): *"within this range, here's what everyone tells me they +changed"* — code, work-state, findings, attestations, graph deltas, **each +attributed to its owning member and composed at read time, never mirrored.** This +is the existing hub-blessed `include_federation` consult generalized from the +reverify worklist to the whole picture. Its honesty requirement is load-bearing: +the COP always renders coverage (who answered, who was unreachable, how stale) so +an unmonitored source never reads as "nothing changed." + +## 2. Problem / opportunity + +- The federation's value is the **seam** (PDR-0023). "What happened together" is a + first-class seam question and currently unanswerable end-to-end. +- Warpline can already see *code* change anchored to a commit, but the sibling + consequences are stranded in each member's own store with no shared correlation + key, so a human/agent must eyeball timestamps to relate them. +- The fix is cheap because the key **already exists**: every member is operating + inside a git working context (`branch@sha`) when it emits an event. We are not + inventing identity — we are asking everyone to *write down the SHA they already + have.* + +## 3. The proposal + +**The convention (hub-authored):** when a member emits a federation-relevant event +(issue state change, finding, attestation, graph delta), it records the +**originating `branch@sha`** — the working-context anchor at the moment the change +that triggered the event was made — as optional event metadata. + +**Ownership split (doctrine-clean):** + +| Concern | Owner | +|---|---| +| The key *value* (`branch@sha`) | **git** (no member mints it) | +| The temporal-correlation *contract* (anchor semantics, granularity, reconstruction) | **Warpline** (its temporal domain) | +| Each event's *payload* (the issue, finding, attestation, delta) | **the emitting member** (unchanged authority) | +| Entity identity (which entity) | **Loomweave / SEI** (unchanged) | + +**Why this is not a second identity authority.** SEI identifies an *entity* (a +noun — *which* function; spatial; Loomweave's, LOCKED). The correlation anchor +identifies a *change episode* (a verb-moment — *which act of changing*; temporal). +Orthogonal axes that compose: an episode touches a set of SEIs. Warpline claims +the temporal axis only and refuses, as ever, to become a second entity-identity +authority. + +## 4. Doctrine compliance + +- **Enrich-only (§5).** The stamp is **optional metadata**. A sibling's core flow + (filing an issue, emitting a finding) never depends on Warpline to produce the + key, and absence degrades to "uncorrelated," never to a failure. Removing + Warpline breaks no member. +- **Hub-authored / hub-blessed (PDR-0023).** Warpline does **not** dictate this to + siblings peer-to-peer. It is proposed *to the foundation*; the hub authors the + convention and blesses the seam. +- **Proven-need gate.** Warpline earns the ask before billing four members for it + (see §6 sequencing). No sibling obligation freezes on a merely-claimed need. +- **Honesty invariant (§10 / `weft-reason`).** An unstamped or unjoinable event + reconstructs as *honestly partial* (`cause + reason_class + fix`), never as "no + related changes." +- **Not a registry/broker (`uri-scheme.md`).** This adds no `weft://` scheme, no + central broker, and no shared store. It is a *convention to stamp an existing git + value* plus a read-time join. This adjacency to the owner-closed decision is + called out explicitly for the owner to confirm the distinction holds. + +## 5. The ask of each member (illustrative — subject to hub authoring) + +| Member | Event it emits | Stamp | +|---|---|---| +| Filigree | issue state change, claim, close | originating `branch@sha` on the event/annotation | +| Wardline | finding, waiver, judge label | originating `branch@sha` on the finding record | +| Legis | attestation, sign-off, CI/check context | originating `branch@sha` on the attestation | +| Loomweave | analyze run / graph delta | the analyzed `branch@sha` (already partly present as `git_sha`) | + +In every case the member already holds the SHA; the ask is to *persist it as +correlation metadata*, not to compute anything new. + +## 6. Sequencing & gates (proven-need) + +1. **Warpline-local capture** *(repo-local; Warpline autonomy — no escalation):* + record `branch + HEAD SHA + detection timestamp` as the working-context anchor + on each detected change. Today the store keeps only the *introducing* + `commit_sha`, with no `branch` and no detection-context anchor. +2. **Demonstrate reconstruction** from the anchors Warpline can already see (its + own change events + any sibling events that happen to carry a commit), proving + the bundle is useful even before universal stamping. +3. **Take the proven need to the PMO** — *this document.* +4. **Hub authors + blesses the convention**; members adopt incrementally + (enrich-only, so partial adoption already yields partial bundles). + +Steps 1–2 are Warpline's to execute now; steps 3–4 are the owner/hub decision this +proposal requests. + +## 7. Cost & risk + +- **Cost: low.** Each member persists a SHA it already has. No new service, no + runtime dependency, no schema authority transfer. +- **Risk — history rewrite (the real one; PDR-0025 condition 1).** Squash-merge and + rebase **rewrite SHAs**: squash collapses N feature-branch commits into one *new* + mainline SHA and the branch is usually deleted, so **every `branch@sha` stamped + during the episode is orphaned the instant the PR merges** — and squash is a + *default* merge mode, not an edge case. **Correction (PM, PDR-0025):** the earlier + draft's candidate — "reconcile via the Legis→Loomweave rename/rewrite signal" — + **conflated two different operations.** A *rename* (path→locator, Loomweave-owned, + PDR-0021) is **not** a *SHA-rewrite*; the rename feed carries no rewrite + reconciliation. SHA-rewrite reconciliation is the genuinely-unowned, load-bearing + question, and **the demo must bite there** (a representative squash-merge fixture, + not clean history). **Candidate to test (not prescribed — Warpline's contract to + shape):** Legis is the PR/CI authority that actually *observes the merge*, so it + could emit a **merge-mapping** — `{squashed-away SHAs} → {new mainline SHA}` — + distinct from the rename feed and on-charter for Legis (itself a future + hub-blessed seam under prove-the-need). The demo's job is to show whether + reconstruction *needs* that signal or survives on `branch` + episode-boundary + alone. +- **Risk — granularity.** Per-commit vs per-push vs per-work-session. Too fine + fragments a logical change; too coarse blurs unrelated work. +- **Risk — dirty tree / detached HEAD.** Uncommitted work and detached HEAD have no + clean `branch@sha`; the contract needs an honest fallback (and an honest + `reason_class`). + +## 8. Decision requested + +1. Does the PMO/owner sponsor a **Warpline-owned temporal-correlation contract** as + a domain extension (temporal-episode axis, orthogonal to SEI)? +2. Approve the **proven-need sequence** (§6) — Warpline ships local capture + + reconstruction *before* any sibling ask. +3. Bless, *in principle and pending Warpline's demonstration*, the hub-authored + **"stamp the originating SHA"** convention, to be authored by the hub (not by + Warpline) when the need is proven. + +## 9. Out of scope / what this is NOT + +- Not a shared cross-member store; not a federation aggregator. +- Not a new minted identifier or a `weft://` scheme or a broker. +- Not a Warpline mandate on siblings — the hub authors the convention. +- Not a gate: correlation is advisory enrichment, never an allow/deny verdict. diff --git a/docs/product/decisions/0002-capability-ladder-roadmap.md b/docs/product/decisions/0002-capability-ladder-roadmap.md new file mode 100644 index 0000000..3578e85 --- /dev/null +++ b/docs/product/decisions/0002-capability-ladder-roadmap.md @@ -0,0 +1,60 @@ +# PDR-0002 - Capture The Warpline Capability Ladder As Roadmap Backbone + +Date: 2026-06-15 +Status: accepted +Author: Claude (product owner session) +Owner sign-off: roadmap maintenance is autonomous under the `vision.md` authority +grant ("maintain these product artifacts and append Product Decision Records"); +no vision/strategy/authority change is made here. +Supersedes: none +Related: `roadmap.md` (capability ladder + Now/Next/Later), `vision.md`, +`current-state.md`, `~/weft/members/warpline.md`, `~/weft/pm/2026-06-13-warpline-interface-lock.md` + +## Context + +Warpline was admitted as the 5th Weft member (owner, PDR-0022, 2026-06-14), pulled +into the launch envelope earlier than planned because it closed a gap the release +needed. Its seam *contracts* are frozen; its consumer *implementations* and much of +its diagnostic/predictive value are fast-follow. A live orientation found Warpline +"wobbly" on both integration (inbound seams mostly RESERVED-SHAPE / unimplemented) +and functionality (SEI resolved at ingest only; snapshots never auto-captured; two +of the four reverify enrichment dimensions inert). The product workspace had a +tactical Now/Next/Later roadmap but no durable statement of *where the temporal +authority is going* — so each session risked re-deriving the direction. + +## The call + +Adopt a five-rung **capability ladder** as the roadmap's directional backbone, and +record `commands.py` decomposition as **Rung 0**: + +- **Rung 0** — modularity foundation (split the 959-LOC `commands.py`). +- **Rung 1** — descriptive, made complete & self-healing (SEI re-resolution; auto + snapshot capture; honesty completion). +- **Rung 2** — diagnostic (co-change graph; verification freshness; light up the + Wardline/Legis reverify enrichment). +- **Rung 3** — predictive (empirical blast radius; preflight prediction; risk + trajectory). +- **Rung 4** — temporal fabric (counterfactual queries; ownership drift; fleet-wide + temporal impact; semantic change typing). + +The ladder is intent, not a schedule; horizons map to Now/Next/Later in `roadmap.md`. + +## Rationale + +The rungs are not a separate backlog from stabilization — they are the same road. +Rung 0/1 *are* the wobble fixes; Rung 2+ are the capabilities only a member with +cross-run history keyed on SEI can own. Capturing them durably keeps the direction +stable across sessions and frames every near-term fix as the first step of the +long-horizon bet, without crossing the authority boundary (no vision, federation +authority-split, admission, or sibling-repo change is decided here). + +## Reversal trigger + +Reopen this decision if any of the following happens: + +- The owner redirects Warpline's domain away from temporal/change-impact. +- A rung's premise is falsified in practice (e.g. the co-change graph proves too + noisy to be advisory-useful on real repos), in which case that rung is cut or + reshaped, not silently carried. +- Rung 1 re-resolution + auto-capture do not materially raise SEI-join coverage or + reduce `NO_SNAPSHOT` answers, indicating the spine problem is mis-diagnosed. diff --git a/docs/product/decisions/0003-hub-ruling-pdr-0025-relay.md b/docs/product/decisions/0003-hub-ruling-pdr-0025-relay.md new file mode 100644 index 0000000..30bae32 --- /dev/null +++ b/docs/product/decisions/0003-hub-ruling-pdr-0025-relay.md @@ -0,0 +1,69 @@ +# PDR-0003 - Hub Ruling Relay: PDR-0025 Sponsors The Temporal-Correlation Contract + +Date: 2026-06-16 +Status: accepted +Author: Claude (product owner session) +Owner sign-off: this is a relay of the foundation's owner ruling (PDR-0025); no new +warpline authority is claimed here. Recording the ruling in warpline's decision log +is autonomous under the `vision.md` grant ("append Product Decision Records"). +Supersedes: none +Related: `~/weft/pm/product/decisions/0025-sponsor-warpline-temporal-correlation-contract.md`, +`docs/integration/2026-06-16-temporal-correlation-spine-pmo-proposal.md`, +`roadmap.md` (temporal correlation spine + COP), PDR-0002 (capability ladder), +`~/weft` PDR-0021 / PDR-0023 / PDR-0024, `~/weft/uri-scheme.md`. + +## Context + +Warpline authored the "Temporal Correlation Spine" proposal (2026-06-16) and took it +to the foundation. The foundation answered with **PDR-0025**. This PDR carries that +ruling back into warpline's workspace so the warpline session operates under the +ratified contract and none of the attached conditions is lost. + +## The ruling (PDR-0025, owner, 2026-06-16) + +- **Sponsored in full, now.** Warpline is ratified as **owner of the + temporal-episode axis** — the temporal-correlation contract (anchor semantics, + granularity, reconstruction). A **domain extension** within warpline's + already-admitted temporal/change-impact authority; **NOT** a new member and + **NOT** a second entity-identity authority (SEI stays Loomweave's single + authority; the episode axis is orthogonal). +- **Owner overrode the PM's "bless-in-principle-pending-demo" default.** The + authority is granted ahead of the demonstration, so **the demo is a + validation/shaping gate, not a go/no-go gate.** +- **No-broker line holds (owner-confirmed).** Stamping an existing git value on each + member's own events + a decentralized read-time join is enrich-only metadata — no + central store, no minted identifier, no broker, no `weft://` scheme. The + owner-closed `weft://` decision is not reopened. + +## What this authorizes vs. what still gates + +- **Authorized now (no further sign-off):** steps 1–2 — repo-local capture + (`branch + HEAD SHA + detection timestamp` working-context anchor; today the store + keeps only the introducing `commit_sha`) and a reconstruction demonstration. Sits + *behind* the launch cutover and warpline's base-impl fast-follow (see condition 3). +- **Still gated:** the cross-member "stamp the originating SHA" convention is + authored **hub-side** and adopted **incrementally**; no sibling obligation freezes + until the hub authors it against warpline's demonstrated reconstruction. + +## PM conditions carried (all three) + +1. **Squash-merge/rebase is the demo's load-bearing case** — not clean history. + Squash collapses N commits into one new mainline SHA (branch deleted), orphaning + every stamped anchor at merge. This is a *SHA-rewrite*, distinct from a *rename* + (PDR-0021, Loomweave-owned, path→locator) — the rename feed carries no rewrite + reconciliation. (Corrects the proposal's original §7 conflation.) Candidate to + test, not prescribed: a **Legis merge-mapping** `{squashed-away SHAs} → {new + mainline SHA}` (Legis observes the merge; a future hub-blessed seam under + prove-the-need) vs. surviving on `branch` + episode-boundary alone. +2. **Define the episode boundary** — resolves toward *episode ≈ work-session* (not + per-commit), with an honest `weft-reason` fallback for dirty-tree / detached-HEAD. +3. **Sequencing fence** — Rung-3 / token-tier; must not compete with the four-member + launch cutover or warpline's base-impl fast-follow; steps 1–2 are cheap and + parallelizable, capacity permitting, behind both. + +## Reversal trigger + +Inherits PDR-0025: revisit if warpline's reconstruction demonstration fails to +produce a useful bundle on a **rewritten-history (squash-merge)** fixture, or if the +read-time join cannot stay decentralized (a member forced to become a central join +store would breach the no-broker confirmation and reopen PDR-0025). diff --git a/docs/product/roadmap.md b/docs/product/roadmap.md index 3a72a43..ae938be 100644 --- a/docs/product/roadmap.md +++ b/docs/product/roadmap.md @@ -1,37 +1,179 @@ # Roadmap - Warpline -Updated: 2026-06-13 (PDR-0001) +Updated: 2026-06-15 (PDR-0002 — the capability ladder) Sequencing, WSJF / cost-of-delay, and dated forecasts are produced by program-management. This file records bets as intent, not a delivery schedule. Do not compute WSJF here; hand the committed bet over for sequencing. +## The capability ladder (the directional backbone) + +Warpline's value comes from the one thing no other federation member can hold: +**cross-run history keyed on a stable identity (SEI).** Loomweave is amnesiac by +design (it owns *now*); Warpline is the only member that can *remember*. The +ladder below is how that monopoly compounds — each rung enriches the per-entity +**temporal dossier** an agent reaches by SEI before it claims "done." Every rung +stays advisory, enrich-only, and honest (`cause + reason_class + fix`); none of +them ever gate. + +> The near-term stabilization gaps and the long-horizon vision are the *same +> road*. Rungs are capability tiers, not a delivery schedule — see Now/Next/Later +> for horizon intent. + +### Rung 0 — Modularity foundation *(hygiene; unblocks the rest)* + +- **Split `commands.py`** (959 LOC; Loomweave emits a weak-modularity finding and + pyright's reference-resolution times out on it). All six tool bodies live in + this one module, so it is the chokepoint every later rung edits. Behaviour- + preserving decomposition into a cohesive command package (per-tool / per-seam + modules) before new capability lands on top of it. + +### Rung 1 — Descriptive, made complete & self-healing *(stabilize the spine)* + +- **Self-healing SEI re-resolution.** SEI is resolved at ingest only, so any event + ingested while Loomweave was unreachable is stored `sei: null` *permanently* — + silently degrading every join to the fragile `locator`. Add a re-resolution + sweep that re-keys `sei: null` change-events whenever Loomweave is reachable. + The spine is only worth what fraction of it is joinable. +- **Auto edge-snapshot capture.** The post-commit hook ingests change events but + never captures a snapshot, so `impact_radius` / `reverify` return `NO_SNAPSHOT` + by default. Wire capture (or lazy on-demand capture) into the ingest path so the + headline reads are non-empty in normal use. +- **Honesty completion.** Every enrichment dimension carries `cause + reason_class + + fix` (the `weft-reason` contract) so an inert seam reads as inert, never as a + true-negative. (G1 in flight.) + +### Rung 2 — Diagnostic *(behaviour over time — capabilities only Warpline can own)* + +- **Temporal coupling / co-change graph (SEI-keyed).** "These entities change + together 84% of the time — with *no* call edge between them." Structural + analysis physically cannot see this. Powers the completeness check no other tool + can make: *"you touched X; history says Y moves with it 9/10 times and you + didn't."* +- **Verification freshness — staleness-of-trust.** Track `last_verified` (CI green, + test pass, Legis attestation, Filigree closure), not just `last_changed`. + Reverify shifts from "changed since HEAD~1" to *"changed since last proven-good,"* + with a trust-decay signal. +- **Light up the flagship's inert dimensions.** The reverify worklist freezes four + enrichment slots; only work (Filigree, SEAM 2) is live. Implement SEAM 3 (Wardline + risk-by-SEI) and SEAM 4 (Legis governance/provenance) inbound reads so the + worklist sorts by risk/governance, not just depth. (Both RESERVED-SHAPE — proving + consumption is what freezes them.) + +### Rung 3 — Predictive *(forecast — the "throw tokens at it" tier)* + +- **Empirical blast radius.** Static blast radius says what *could* be affected; + Warpline learns what *was* — when X changed historically, which Wardline finding + appeared, which Filigree issue reopened, which test went red, which attestation + failed. Yields a historical regression rate per entity. +- **Preflight prediction.** Co-change graph + causality run *forward*: "if you + touch X, history predicts you'll also touch {Y, Z} and must re-verify {A, B}, + ~N% confidence." +- **Risk trajectory.** Fuse churn × findings-over-time × fan-in into a *time-aware* + hotspot score with a slope: "highest-risk entity this quarter, and getting + worse." + +### Rung 4 — The temporal fabric *(super-future)* + +- **Time-travel / counterfactual queries.** Reconstruct the full impact graph as of + any commit — replay for architecture. +- **Ownership & abandonment over time.** Churn velocity per maintainer, bus-factor + drift, agent-only-churn detection. +- **Fleet-wide temporal impact.** Federate temporal facts across the suite — + "this SEI's change in repo A historically precedes breakage in repo B" (the + PDR-0024 fleet frame). Must not violate the no-shared-store anti-goal: federate + by SEI join at read time, never a central mirror. +- **Semantic change typing.** Classify change kind (signature / behaviour / + refactor-only / doc-only) so reverify is proportionate to the risk of the change. + +## Temporal correlation spine *(cross-cutting — enables Rung 3)* + +The SHA *is* the timeframe. A code change happens inside a working context +(`branch@sha` at the moment warpline detects it); the sibling "meta-changes" +attached to that change — an issue that moved in Filigree, a finding that appeared +in Wardline, an attestation in Legis, a graph delta in Loomweave — share that same +git anchor. If every event carries the anchor, the full bundle reconstructs by a +read-time join, with **no member mirroring another's data and no new minted +identifier** (git already owns the key — enrich-only by construction). This is the +mechanism that turns Rung 3's "what actually broke when X changed" from fuzzy +temporal correlation into a trivial join. + +- **Warpline-local capture *(within repo autonomy)*.** The store records the + *introducing* commit (`change_events.commit_sha`) but has **no `branch`** and no + **detection/working-context** anchor distinct from it. Capture `branch + HEAD + SHA + detection timestamp` as the working-context anchor on each detected change. + A fact about warpline's own observation; imposes nothing on siblings. +- **Federation correlation contract *(RATIFIED — PDR-0025, 2026-06-16)*.** The + foundation **sponsored the contract in full**: warpline owns the + **temporal-episode axis** (a domain extension, *not* a second identity authority; + orthogonal to SEI). The no-broker line is owner-confirmed (enrich-only metadata + + decentralized read-time join). **Steps 1–2 (local capture + reconstruction demo) + are authorized as warpline autonomy under the ratified contract**; the demo is now + a *validation/shaping* gate, not go/no-go. The cross-member "stamp the originating + `branch@sha`" convention is still **hub-authored and adopted incrementally** — no + sibling obligation (Filigree/Wardline/Legis/Loomweave) freezes until the hub + authors it against warpline's demonstrated reconstruction. Enrich-only throughout. +- **PM conditions (PDR-0025 — load-bearing):** + - **Squash-merge is the headline failure, not an edge case.** Squash/rebase + *rewrite* SHAs, orphaning every stamped anchor when a PR merges. This is distinct + from a *rename* (PDR-0021; the rename feed carries no rewrite reconciliation). + The demo MUST run on a real squash-merge fixture. Candidate to test (not + prescribed): a **Legis merge-mapping** `{squashed-away SHAs} → {new mainline + SHA}` (Legis observes the merge) — vs. surviving on `branch` + episode-boundary + alone. + - **Episode boundary ≈ work-session** (the desk/employee model), not per-commit; + with an honest `weft-reason` fallback for dirty-tree / detached-HEAD. + - **Sequencing fence:** Rung-3 / token-tier; sits *behind* the four-member launch + cutover (`weft-4b2f948f70`) and warpline's base-impl fast-follow. + +## Temporal common operating picture *(the consumer surface of the spine)* + +The spine is plumbing; the **temporal COP** is the product. As the federation's +temporal authority, warpline's headline job becomes: *given a frame — an edit, a +rev range, a time window, a `branch@sha`, or a SEI — return the assembled +cross-member picture, "within this range, here's what everyone tells me they +changed."* Code changes (warpline-owned), work-state moves (Filigree), findings +(Wardline), attestations (Legis), graph deltas (Loomweave) — **each fact +attributed to its owning member, composed at read time, never mirrored.** + +- **Mechanism:** the `include_federation` consult (`federation.py`) *generalized* — + from "enrich the reverify worklist" to "assemble the full temporal picture over a + range." The read surface is warpline-local (autonomy); its richness scales with + stamping adoption (enrich-only ramp — useful on whoever is reachable today, + fuller as the convention lands). +- **Coverage is part of the picture (non-negotiable honesty).** The COP always + renders *who answered, who was unreachable, and how stale* (`cause + reason_class + + fix` per source). A COP with a dark sector that *looks* empty is worse than no + COP — an unmonitored frame must never read as "nothing changed." + ## Now (committed, in-flight) -- **Owner-gated federation admission** - product-candidate readiness evidence is - in place; admission, glossary freeze, and sibling ticket dispatch remain the - owner's call. -- **Evidence freshness** - keep dogfood, productization, lint/type/test, and +- **Post-admission seam fast-follow** — Warpline was admitted as the 5th member + (owner, PDR-0022, 2026-06-14); seam *contracts* are frozen, consumer + *implementations* are an admitted fast-follow outside the launch cutover. The + PDR-0023 honesty work is landing (`weft-reason` G1, list-ergonomics G2, the + `include_federation` hub-blessed consult). +- **Rung 0 — `commands.py` refactor** — modularity foundation; behaviour-preserving. +- **Evidence freshness** — keep dogfood, productization, lint/type/test, and member-diff gates aligned as Warpline evolves. -- **MCP operator documentation** - keep README and evidence docs aligned with - the shipped MCP workflow, smoke command, and remaining P1 contract gaps. ## Next (shaped, decreasing certainty) -- **MCP contract refactor** - add namespaced aliases, specific output schemas, - list filters/sort/pagination, broader recoverable error taxonomy, and - resource contracts before glossary freeze. `structuredContent` and live - mutability/idempotency metadata are already present for current tools. -- **Bounded live-repo ingestion strategy** - replace unbounded live-member - backfill with explicit bounded, incremental, and resumable workflows. -- **Post-admission consumer package** - turn Warpline-owned draft contracts into - owner-approved sibling tickets only after admission. +- **Rung 1 — spine completion** — self-healing SEI re-resolution and auto + edge-snapshot capture, so SEI-keyed joins and the headline reads stop silently + degrading. +- **Rung 2 — diagnostic capabilities** — the co-change graph, verification-freshness + tracking, and lighting up the Wardline (risk) and Legis (governance) enrichment + on the reverify worklist. +- **Federation conformance oracle inclusion** — Warpline's 14 golden vectors join + the GS-7 four-member oracle as a fifth producer. ## Later (directional bets, no order, no dates) -- **Federation conformance oracle inclusion** - add Warpline MCP and JSON fixtures - to the federation contract corpus after glossary clearance. -- **Richer verification hints** - infer likely test commands from history and - project metadata without owning work state. -- **Rename and lineage continuity** - improve key-upgrade lineage when - Loomweave/SEI continuity evidence is available. +- **Rung 3 — predictive** — empirical blast radius, preflight prediction, and + time-aware risk-trajectory scoring. +- **Rung 4 — temporal fabric** — counterfactual graph reconstruction, ownership + drift, fleet-wide temporal impact, and semantic change typing. +- **Rename and lineage continuity** — settle the C′/A′ locator-rename source once a + proven need exists; largely subsumed by Rung 1 re-resolution (re-resolving to the + current SEI makes renames Loomweave's problem, already solved). From 947f8b73342d14ff9a4b6c64856c4e925b16caa2 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 00:55:57 +1000 Subject: [PATCH 02/10] docs(plans): Rung 0/1/2 implementation plan (2 ultracode review rounds) Execution plan for the capability-ladder work, produced by a multi-agent design workflow (map -> specialist design -> reality/architecture/doctrine review -> synthesis) and hardened by a second adversarial build-readiness review. - Folds PDR-0025 conditions: squash-merge reconstruction demo as the load-bearing acceptance criterion, episode ~= work-session, sequencing fence behind launch. - Owner-session enhancements E1-E6 (demo deliverable, fence, full verification gate incl. wardline, dirty-tree honest fallback, co-change kill-switch, 1.1.0). - Round-2 resolutions (authoritative): fix a Rung 0 NameError blocker (_EDGES_FOR_COMPLETENESS), DEFER Track B (no wardline resolved-finding surface), pin migration v2 --- ...26-06-16-rung-0-1-2-implementation-plan.md | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 docs/plans/2026-06-16-rung-0-1-2-implementation-plan.md diff --git a/docs/plans/2026-06-16-rung-0-1-2-implementation-plan.md b/docs/plans/2026-06-16-rung-0-1-2-implementation-plan.md new file mode 100644 index 0000000..36ce88f --- /dev/null +++ b/docs/plans/2026-06-16-rung-0-1-2-implementation-plan.md @@ -0,0 +1,378 @@ +# Warpline Rung 0/1/2 + Working-Context Capture — Execution Plan + +**Status:** execution-ready for the base-impl tracks after the round-2 resolutions below. A second adversarial build-readiness review (reality / doctrine+completeness / sequencing) returned **fix-before-build**; its resolutions are folded in at §Build-readiness review (round 2) and **override the body below where they conflict**. Net effect: **Track B (verification freshness) is deferred** (no wardline resolved-finding surface exists), the COP public surface remains INTERFACE-PENDING, and two cross-member confirmations are newly required (wardline resolved-finding read; filigree `closed_at`). +**Scope:** behaviour-preserving decomposition of `commands.py` (Rung 0), spine stabilization (Rung 1: anchor capture + SEI re-resolution + auto-snapshot), diagnostic + COP read surface (Rung 2). +**Doctrine floor:** enrich-only / never-gating · SEI orthogonality · no-mirror · honesty invariant (present|absent|unavailable) · proven-need · frozen v1 contracts (6 MCP tools + envelope + error vocab; additive only, v2 = new URI never a mutation). + +--- + +## Owner-session review & enhancements (on top of the synthesized plan) + +The multi-agent design + 3-dimension review (reality / architecture / doctrine) is sound and the reconciliation log resolves the real blockers. The following are added on review against **PDR-0025** and the repo's shipping discipline — amendments to the plan below, not replacements. + +- **E1 — The reconstruction demo is a first-class deliverable, and squash-merge is its acceptance criterion (PDR-0025 cond 1).** The plan builds the *capture* (Rung 1b) and the *reconstruction surface* (Track D `cop.py`) but does not name the **demonstration** itself. Add a deliverable: **`tests/integration/test_reconstruction_demo.py`** that builds a **squash-merge fixture** (N feature-branch commits → one new mainline SHA, feature branch deleted) and asserts the bundle still reconstructs — either via a Legis merge-mapping `{squashed SHAs}→{new SHA}` (if shipped) **or** honestly degrades to `branch + episode-boundary` with an explicit `weft-reason` class. This fixture is the load-bearing pass/fail; a clean-history fixture does **not** satisfy PDR-0025. +- **E2 — Sequencing fence (PDR-0025 cond 3).** The "start NOW" list is correct for *readiness* but must respect priority: the **spine-specific** work (Rung 1b anchor, Track A co-change, Track D COP, the E1 demo) sits **behind** the four-member launch cutover (`weft-4b2f948f70`) and warpline's base-impl fast-follow. The **base-impl stabilization** (Rung 0 refactor, 1a runner, 1c re-resolution, 1d snapshot) is **not** fenced and is the right place to start. Treat the fence as priority, not a hard block. +- **E3 — Per-PR verification gate (repo discipline).** Every track lands behind the full gate, not just "suite green": `uv run ruff check .` · `uv run mypy` (strict) · `uv run pytest -q` · **`wardline scan . --fail-on ERROR`** (Rung 1b/1c/1d/Track A/D all touch git subprocess + loomweave-client input — trust-boundary surface). Fix findings at the boundary. +- **E4 — Dirty-tree honest fallback (PDR-0025 cond 2).** Rung 1b handles detached-HEAD (branch=NULL); add the uncommitted/dirty-tree case: when detection occurs against a dirty work tree, record the anchor with an honest `weft-reason` class (e.g. `working_tree_dirty`) rather than a clean-looking but unstable `head_sha`. Never emit a false-precise anchor. +- **E5 — Co-change ingest kill-switch.** Track A adds derivation to the hot commit/ingest path. Beyond the `>30` fan-out cap and fail-soft `try/except`, add a config/env switch (e.g. `WARPLINE_COCHANGE=0`) to disable derivation entirely, so a pathological repo can opt out without a code change. +- **E6 — Version + changelog.** These are additive **new capabilities**, so when they ship they warrant a **1.1.0** minor bump (settles the earlier open "next version" question for this line of work) with a CHANGELOG entry per track. Track in the eventual release PR, not per-commit. + +--- + +## Build-readiness review (round 2) — applied resolutions + +A second adversarial pass (reality / doctrine+completeness / build-sequencing) returned **fix-before-build**. These resolutions are authoritative and **override the body below where they conflict**. Grouped by severity. + +### Blockers (must, before build) + +- **B1 — `_EDGES_FOR_COMPLETENESS` NameError (Rung 0).** `capture_snapshot` (commands.py:933) reads `_EDGES_FOR_COMPLETENESS`, which stays in `commands.py` while Step 0.3 removes it. Fix: the Step 0.2 rename list is **five** items — add `EDGES_FOR_COMPLETENESS` — and Step 0.3 imports it: `from warpline._enrichment import EDGES_FOR_COMPLETENESS, completeness_warnings, edges_enrichment, is_stale, staleness_warnings`. Update `capture_snapshot`'s reference and add a characterization test covering its dict access. +- **B2 — E1 demo must be end-to-end, and needs a degradation signal.** The COP MCP tool is interface-pending, so the demo can't go through it. Fix: add a **minimal non-frozen internal CLI verb** `warpline cop --repo --frame …` (distinct from the future public MCP tool) so `test_reconstruction_demo.py` exercises a real bundle (satisfies PDR-0025's "useful bundle on rewritten history" reversal clause); and add a `weft_reason_class` field to the COP/frame output so the squash-merge honest-degradation path is assertable. *(Owner: confirm a non-frozen demo CLI is acceptable.)* +- **B3 — backfill anchor semantics (Rung 1b).** Resolve the contradiction: **backfill leaves all three anchor columns NULL** (reconstruction ≠ detection). Drop the "`detected_at` present" claim and assert all-NULL for backfilled rows in `test_anchor_capture.py`. (`INSERT OR IGNORE` means re-backfill won't touch existing rows — acknowledged.) +- **B4 — Track A kill-switch + skip-record missing from steps.** Add an explicit Track A step: read `WARPLINE_COCHANGE` in `update_co_change_pairs` (falsy/zero → skip + return); test `WARPLINE_COCHANGE=0` yields zero `co_change_pairs` rows. Record `coupling_skipped=high_fanout` via `store.log_health` **and** a count in the return dict; test it. +- **B5 — migration version ordering is a HARD gate.** Track A's `co_change_pairs` (v3) **must not merge before** Rung 1b's anchor columns (v2) are on `main` — else a DB opened in the gap lands at `user_version=3` and permanently skips v2. Treat Rung 1b → Track A merge order as a code dependency, not a soft priority. +- **B6 — Track B is DEFERRED (WardlineVerificationClient unbuildable).** Verified against wardline source: `dossier.trust` exposes only `active_findings` (`SuppressionState` ∈ ACTIVE/BASELINED/WAIVED; no RESOLVED, no resolution timestamp; suppressed is a count). There is no verification-event surface, so the fresh/stale/unverified temporal split can't be built. Resolution: **drop Track B from "start now."** The only honest piece available today is a binary *active-findings-present → risk-unverified* signal (no temporal freshness) — and that already overlaps Track C's risk dimension. True verification-freshness moves to **interface-pending**, gated on a new wardline resolved-finding+timestamp read (cross-member confirmation #1). + +### Majors (must) + +- **M1 — counts.** 5 importers of `warpline.commands` (4 test files + `dogfood.py`), not 4. Rung 0 LOC target is **~780** (assert `< 800`), not ~700; ~700 needs the optional SEAM-8 extraction (Q-pyright). +- **M2 — `_rev_range_commits` sites.** 2 remain in `commands.py` bodies (`change_list`:271, `entity_churn_count`:466); the 3rd (216) moves with `_resolve_changed_inputs` to `_blast.py` and is **not** a separate update site. +- **M3 — Track C insertion point.** Merge fed `risk`/`governance` into items **immediately after `consult_federation` and before `apply_overflow`/`apply_page`** (the full filtered+sorted list). Test a page-2 item is still enriched when `include_federation=True`. +- **M4 — Track D frame-kind specs.** Only `rev_range`/`sei` are specified. Add resolution for `time_window` (add `since`/`until` to `list_change_events`), `edit` (define the git command, e.g. `git diff HEAD`), and `branch_sha` fallback (define the rev-range). Any frame kind with no store support → add the store method or drop it from Track D scope. +- **M5 — reresolve delete target (Rung 1c).** On `IntegrityError` from the repoint UPDATE, **DELETE the null-keyed `change_event` row** (`entity_key_id=null_id`) that conflicts; the resolved-keyed row is canonical. Add a `test_reresolve.py` case where null- and resolved-keyed rows differ on `hunk_summary` and assert the resolved row's data is preserved (data-loss made explicit; Q7). +- **M6 — Rung 1d is always-on internally (pin Option A).** Lazy capture fires internally whenever loomweave is available; **no `auto_capture` inputSchema field is added in this PR** (keeps `test_fastfollow_dead_set_is_empty_for_every_tool` green). The opt-out toggle is interface-pending item 3 only. +- **M7 — co-change call placement (Track A).** Accumulate `entity_key_id`s into a `set[int]` across the full path+locator loop; call `update_co_change_pairs(repo_id, resolved, sorted(ids))` **once per commit after** the loop (and once per sha in backfill) — so the `>30` fan-out cap is per-commit, not per-locator. +- **M8 — E4 dirty-tree: add a context column.** Extend Rung 1b migration v2 with `detected_context TEXT` taking `{NULL/clean, working_tree_dirty, detached_head}` — the honest signal carrier (subsumes the detached-HEAD case and avoids overloading `detected_head_sha=NULL`). +- **M9 — migration reconcile edges + WAL note (Rung 1a).** Handle `user_version==0` with `meta.schema_version != '1'` (warn + adopt the meta value before running v>that). Document that `SCHEMA`'s `PRAGMA journal_mode=WAL` runs via `executescript` implicit-commit for fresh DBs (intentional, outside the `BEGIN IMMEDIATE` migration pattern). CI-test the legacy-reconcile and the `user_version > highest-known` warn paths. +- **M10 — list_change_events/timeline additive-column non-regression.** Add a test that `change_list`/`entity_timeline` return valid output with the new fields present on a v1-then-migrated DB; confirm `append_change_event` names columns explicitly (it does — store.py:233-250). + +### Minors / nits (should) + +- `test_store.py` has **two** `schema_version()==1` assertions (lines 26 and 35) — update **both** to `==2`. +- `user_version > highest-known` warn path: emit to `health_log`; add a test. +- After Rung 1a, **`SCHEMA` DDL is frozen** — all schema changes go through `MIGRATIONS` (never add columns to `SCHEMA`). +- SQLite ≥3.35 assertion justification is the **`RETURNING` clause** already used in `create_edge_snapshot`, not `DROP COLUMN` (no migration here drops a column). +- Characterization-test helpers: lift `_init_repo`/`_commit` into `tests/conftest.py` rather than importing across test modules. +- **git.py merge coordination:** Rung 1b and Track A both edit `ingest_commit`; land Rung 1b first (the v230` changed entities skip pair generation and record `coupling_skipped=high_fanout` (low signal-to-noise when everything changes together). All co-change writes are `try/except` fail-soft. | +| R9 | **`compose_temporal_cop` in `federation.py`** (MAJOR, arch): violates that module's reverify-scoped docstring. | New module **`warpline/cop.py`**. `cop.py` imports the three `_consult_*` from `federation.py`; `federation.py` does not import `cop.py` (unidirectional). | +| R10 | **`VerificationClient` premature abstraction** (MAJOR, arch): `FiligreeVerificationClient` needs `closed_at` (not in frozen shape); legis has no transport. | Ship `WardlineVerificationClient` first (resolved-finding = verification event, already available). `FiligreeVerificationClient` deferred behind the `closed_at`-availability confirmation. Legis verification stays honestly `unavailable`. | +| R11 | **SEI-merge survivor ambiguity / UPDATE collision** (MINOR→raised, arch): `UPDATE … OR IGNORE` is not valid; a repoint can hit the `change_events` UNIQUE constraint. | Explicit handler: attempt repoint `UPDATE`; on `IntegrityError` (UNIQUE), `DELETE` the null-keyed duplicate row instead (resolved-keyed row is the survivor). Carry `min(first_seen)`/`max(last_seen)` onto the survivor. Integration test for the twin-collision case is mandatory. | +| D1 | **Commit-keyed temporal axis vs ratified work-session episode** (MAJOR, doctrine): proposal ratifies a work-session episode boundary with squash/rebase fallback. | Per-event anchor columns (v2) are kept as the **substrate**, but the design explicitly records `detected_head_sha` as the working-context key distinct from `commit_sha`, and documents the **v4 `change_episodes` superset** as the work-session collapse target. Episode-boundary semantics are an OPEN QUESTION to confirm before any episode table is built — not built in this plan. Co-change (Rung 2) is keyed on commit for now and flagged as needing episode-collapse re-keying once the boundary is ratified (OPEN QUESTION Q6). | +| D2 | **Rung 2 overclaims risk/governance as "promised-frozen"** (MAJOR, doctrine): wardline/legis inbound are RESERVED-SHAPE pending proven-need. | Reworded: the enrichment merge pass is the **proven-need demonstration that earns the freeze**, not a pre-promised contract. It is additive and reversible; it does not lock the wardline/legis inbound shape. | +| r-minor | Misc reality nits: `_as_int` not called in `_rev_range_from_refs`; "30 test files" → 4 files import `commands` (31 total test files); `install_support.py:307-319` is `check_git_hook` not the body; `loomweave timeout=10` is per-tool-call. | Corrected inline throughout. | + +--- + +## Sequencing & dependency graph + +``` +Rung 0 (refactor, no schema) ── unblocks clean module boundaries for everything + │ + ├─► Rung 1a Migration runner (store.py) ── PREREQUISITE GATE for all schema work + │ ├─► Rung 1b Anchor columns (v2) + git.py population + │ ├─► Rung 1c SEI re-resolution sweep (reresolve.py + store merge core + CLI) + │ └─► Rung 1d Auto/lazy snapshot capture (tool bodies + hook + doctor) + │ + └─► Rung 2 (each track independent PR): + Track C Enrichment merge pass (reverify) ── SMALL, ship FIRST (tracer bullet) + Track A Co-change graph (v3) ── depends on Rung 1a runner + Track B Verification freshness (wardline-only first) + Track D COP internals (cop.py) ── public surface INTERFACE-PENDING +``` + +Hard dependencies: Rung 1b/c/d and Rung 2 Track A all depend on **Rung 1a (migration runner)**. Track C depends only on Rung 0. Tracks B and D depend only on Rung 0. The COP public tool and the anchor-output-on-frozen-tools surface depend on the user's concrete interface (see §INTERFACE PENDING). + +--- + +## RUNG 0 — Behaviour-preserving decomposition of `commands.py` + +**Goal:** drop `commands.py` from 959 LOC by extracting two internal helper modules. Zero change to `cli.py`, `mcp.py`, the 6 `SCHEMA_*` constants, and the 7 tool signatures. + +### Module layout (3 modules total) + +``` +src/warpline/ + commands.py # ~700 LOC: 7 tool bodies + 6 SCHEMA_* + local helpers + _enrichment.py # NEW ~85 LOC: pure staleness/completeness helpers + _blast.py # NEW ~130 LOC: blast-pipeline prep helpers +``` + +Underscore-prefixed module names signal internal-API status; no `__init__.py` re-export. Dependency is strictly one-way: `commands.py → {_enrichment, _blast}`; neither imports `commands`. + +> Naming note (arch minor): the reviewer suggested `_staleness.py`/`_resolver.py` as more cohesive. Acceptable either way; this plan keeps `_enrichment`/`_blast` for continuity with the doctrine vocabulary ("enrichment") and the `blast_radius` consumer. Decide at implementation; it is a rename, not a structural change. + +### Step 0.1 — Characterization tests (prerequisite, locks behaviour before any move) + +- **New:** `tests/test_enrichment_helpers.py` — unit tests for `is_stale` (commits_behind=0/>0/None+snapshot/None+no-snapshot), `edges_enrichment` (each completeness × stale/fresh), `staleness_warnings` (known/unknown commits_behind), `completeness_warnings` (NO_SNAPSHOT/SKIPPED/DELTA/FULL). Pure functions, no fixtures. Import from `commands` initially. +- **New:** `tests/test_blast_helpers.py` — tests for `rev_range_commits` (BadRevisionError on bad range, None passthrough), `resolve_changed_inputs` (known/unknown key ids, sei ref resolution, rev_range filtering — reuse `_init_repo`/`_commit` fixture pattern from `test_honesty_invariant.py`), `enrich_blast` (raw dict → (changed, affected) shape). Import from `commands` initially. +- Run full suite (166 tests + new) green. Baseline. +- **Doctrine:** these are the safety net proving extraction is behaviour-preserving; no contract touched. + +### Step 0.2 — Create `_enrichment.py` + +- Move `_is_stale`, `_edges_enrichment`, `_staleness_warnings`, `_completeness_warnings`, and the `_EDGES_FOR_COMPLETENESS` constant. Rename public (drop leading underscore): `is_stale`, `edges_enrichment`, `staleness_warnings`, `completeness_warnings`. +- **R2 fix:** in `is_stale`, replace `return _as_int(behind) > 0` with `return int(behind) > 0` (reached only after `behind is None` guard). Module imports: `from __future__ import annotations`, `from typing import Any`. **No `_as_int`, no store, no git, no I/O.** +- Retarget `tests/test_enrichment_helpers.py` imports to `warpline._enrichment`. Suite green. Commit. +- **Doctrine (enrich-only):** `_enrichment.py` is structurally incapable of gating — verified by its import list (only `typing.Any`). + +### Step 0.3 — Wire `_enrichment.py` into `commands.py` + +- Add `from warpline._enrichment import completeness_warnings, edges_enrichment, is_stale, staleness_warnings`. Update 8 call sites. Remove the 4 bodies + constant from `commands.py`. Suite green. Commit. + +### Step 0.4 — Create `_blast.py` + +- Move `_rev_range_commits` → `rev_range_commits`, `_resolve_changed_inputs` → `resolve_changed_inputs`, `_enrich_blast` → `enrich_blast`. Keep a **private strict-assert `_as_int`** copy in `_blast.py` (for `resolve_changed_inputs`). +- **R2 note in code:** add a one-line comment that `_blast._as_int` is the strict-`assert isinstance(value, int)` form (matching the original `commands._as_int`), deliberately distinct from `propagation._as_int` (permissive int|str). Do not import either across modules. +- Imports: `from __future__ import annotations`, `subprocess`, `pathlib.Path`, `typing.Any`, `warpline.errors.BadRevisionError`, `warpline.store.WarplineStore`, `warpline.refs.entity_view`. `WarplineStore` is a **parameter**, never opened inside. +- Retarget `tests/test_blast_helpers.py` to `warpline._blast`. Suite green. +- **Doctrine (no-mirror / SEI-orthogonality):** `_blast.py` reads the store passed in, calls git rev-list, writes nothing, mints no identifier; operates on warpline-local `entity_key_id` integers and SEI strings supplied by the store. + +### Step 0.5 — Wire `_blast.py` into `commands.py` + +- Add `from warpline._blast import enrich_blast, resolve_changed_inputs, rev_range_commits`. Update call sites: `_rev_range_commits` → `rev_range_commits` (2: `change_list`, `entity_churn_count`); `_resolve_changed_inputs` → `resolve_changed_inputs` (2: `impact_radius`, `reverify_worklist`); `_enrich_blast` → `enrich_blast` (2). `commands._as_int` **stays** (still used by `entity_churn_count`:473). Remove the 3 moved bodies. Suite green. Commit. + +### Step 0.6 — Verify + +- `wc -l src/warpline/commands.py` (~700 target); `python -c 'import warpline.commands, warpline._enrichment, warpline._blast'` clean; full suite green; run pyright on the module. +- **Open watch (Q-pyright):** if pyright still times out at ~700 LOC, the deferred SEAM-8 `_process_blast_result()` extraction becomes a Rung 0.5; track the pyright output and decide then. Not done now (the post-step divergence between `impact_radius` and `reverify_worklist` makes a unified helper a parameter-explosion until Rung 1 lands). + +**What stays in `commands.py`:** all 7 tool bodies (unchanged signatures), all 6 `SCHEMA_*`, `session_context`, `_rev_range_from_refs`, `_coerce_max_entities`, `_coerce_if_stale_after`, `_federation_warnings`, `_page`, `_filters_echo`, `_unresolved_warnings`, `_as_int`. + +**Caller impact:** 4 files import `warpline.commands` (3 test files + `dogfood.py`); none import private helpers. Zero import-path changes. `mcp.py` `TOOL_SPECS` references `commands.SCHEMA_*` — unaffected. + +--- + +## RUNG 1 — Stabilize the spine + +### Rung 1a — Migration runner (PREREQUISITE GATE) + +`store.py:open()` currently runs `executescript(SCHEMA)` unconditionally and never consults `schema_version()`. Add a real ordered runner. + +**Changes — `store.py`:** +- Connection setup: keep `journal_mode=WAL`; add `PRAGMA foreign_keys=ON; PRAGMA busy_timeout=5000; PRAGMA synchronous=NORMAL`. Assert `sqlite3.sqlite_version_info >= (3,35,0)` (ALTER … DROP COLUMN floor; CI must verify the deployment Python's bundled SQLite — OPEN QUESTION Q-sqlite). +- Keep `executescript(SCHEMA)` for fresh-DB base tables (idempotent `IF NOT EXISTS`). +- **R3:** introduce `MIGRATIONS: list[Migration]`, each `(version:int, apply:Callable[[Connection],None])`. Runner: + 1. Read `PRAGMA user_version`. Legacy reconcile: if `user_version==0` and `meta.schema_version=='1'`, set `user_version=1` once. + 2. For each step with `version > user_version`: `conn.execute("BEGIN IMMEDIATE")`; run the step's `conn.execute(...)` statements (**never `executescript`**); `conn.execute(f"PRAGMA user_version = {N}")`; `UPDATE meta SET value=? WHERE key='schema_version'` to `N`; `conn.execute("COMMIT")`. + 3. Concurrent `open()`: second writer blocks on RESERVED lock, re-reads `user_version`, skips applied steps (idempotent). +- Add guard: if on-disk `user_version` > highest known, **warn (do not fail)** — reads remain safe (additive-only history). + +**Tests — `tests/test_store_migrations.py` (NEW):** fresh-DB lands at highest version; legacy-v1 DB (only base tables + `meta.schema_version='1'`) upgrades on open; idempotent re-open is a no-op; two concurrent `open()` calls do not double-apply (thread/`busy_timeout` test). **Update** existing `test_store.py` `schema_version()` assertion `1 → 2`. + +**Doctrine (no-mirror / honesty):** runner only manages warpline's own store under `.weft/warpline/`; additive columns read NULL = `unavailable`, never a clean default. + +### Rung 1b — Working-context anchor columns (schema v2) + +The anchor identifies a **change episode** (verb-moment), orthogonal to SEI (entity, noun) — so it lives on `change_events`, **not** `entity_keys`. + +**Migration 2 DDL** (all NULLable, no default, O(1) metadata-only): +``` +ALTER TABLE change_events ADD COLUMN detected_branch TEXT; -- git symbolic-ref short name; NULL if detached +ALTER TABLE change_events ADD COLUMN detected_head_sha TEXT; -- HEAD sha AT DETECTION (working context; distinct from commit_sha = introducing commit) +ALTER TABLE change_events ADD COLUMN detected_at TEXT; -- ISO-8601 UTC detection timestamp (distinct from changed_at = author time) +``` + +**Changes:** +- `store.py:append_change_event` — add optional kwargs `detected_branch/detected_head_sha/detected_at` (NULL when unsupplied → backward compatible). Add the 3 columns to `list_change_events` and `timeline` SELECT lists so reads can surface the anchor (existing callers ignore extra keys). +- `git.py:ingest_commit` and `backfill` — compute the anchor **once per call**: `head_sha = git rev-parse HEAD`; `branch = git symbolic-ref --short -q HEAD` (None on detached); `detected_at = datetime.now(UTC).isoformat()`. Thread into `append_change_event`. **`backfill` sets branch/head = NULL** (reconstruction, not detection), `detected_at = now` as a reconstruction marker → historical rows read `unavailable` working-context (honest). + +**D1 note:** per-event columns are the **substrate**; the work-session episode boundary (ratified in the proposal with squash/rebase fallback) is recorded as the target of a future **`change_episodes` table (v4)** that the per-event triple collapses into cleanly. No episode table built here. Confirm episode semantics (Q5) before building it. + +**Tests — `tests/test_anchor_capture.py` (NEW):** ingest on a branch records branch + head_sha + detected_at; detached HEAD records branch=NULL; backfill records branch/head=NULL + detected_at present; `list_change_events`/`timeline` surface the new fields; v1-DB-opened-by-v2-client migrates and old rows read NULL. + +**Doctrine (SEI-orthogonality):** no new identifier minted — branch/head are git's values, `detected_at` is a clock reading; warpline owns only the contract of recording them. Anchor on `change_events` (the detection act), never on `entity_keys`. + +### Rung 1c — Self-healing SEI re-resolution sweep + +The bug is the `UNIQUE(repo_id, locator, COALESCE(sei,''))` index (store.py:30-31): a `sei=NULL` row and a resolved-sei row for the same locator are distinct identities, so a row minted while loomweave was down stays null forever. Fix = idempotent **UPDATE-or-merge** (never re-mint). + +**Changes:** +- `store.py` — new `null_sei_entity_keys(repo, limit) -> list[{id, locator}]`: select `WHERE sei IS NULL`, bounded, ordered by id (deterministic, resumable). +- `store.py` — new `reresolve_entity_key_sei(repo_id, null_key_id, locator, resolved_sei) -> {action}`. **R11 explicit merge:** inside `BEGIN IMMEDIATE`: + 1. `UPDATE entity_keys SET sei=? WHERE id=? AND sei IS NULL`. + 2. On `sqlite3.IntegrityError` (twin exists): repoint `UPDATE change_events SET entity_key_id=:twin WHERE entity_key_id=:null_id`; for each row that hits the `change_events` UNIQUE constraint, catch and **`DELETE` the null-keyed duplicate** (resolved-keyed row is the survivor); then `DELETE` the orphan null `entity_keys` row. + 3. Carry `first_seen = min`, `last_seen = max` onto the survivor. + - Convergent: re-running on a healed key matches no null rows → no-op. +- `reresolve.py` (NEW) — `sweep_reresolve_sei(store, repo, client, limit) -> {scanned, resolved, merged, still_null, loomweave: present|absent|unavailable}`. Pages null keys, calls `loomweave.resolve_sei_for_locator` per locator, applies the merge core. **No-op + honest report when `client is None`** (never marks a key resolved-to-null). +- `cli.py` — new `reresolve-sei` subparser (`--repo`, `--limit` default 200, `--resolve-sei/--no-resolve-sei`, `--loomweave-command`), reusing `_optional_sei_client` (cli.py:31). Fail-soft; emits JSON with loomweave posture. **Not** one of the 6 frozen tools. + +**Tests — `tests/test_reresolve.py` (NEW):** null row → resolved when loomweave returns a SEI; **twin-collision** (resolved twin already exists, with and without a duplicate change_event) merges and deletes the orphan, survivor keeps resolved sei and merged first/last_seen; double-run is a no-op; loomweave absent → no rows mutated + `loomweave: unavailable`. + +**Doctrine (SEI-orthogonality / honesty):** re-uses loomweave's minted SEI via `resolve_sei_for_locator`; never invents one. Sweep reports loomweave posture explicitly; absence never reads as "resolved". + +### Rung 1d — Auto / lazy edge-snapshot capture + +Today the hook only ingests, so `latest_snapshot` is None → `blast_radius` returns NO_SNAPSHOT. Hybrid: **lazy-on-read** (correctness floor) + **opportunistic-on-commit** (freshness). + +**R7 — `blast_radius` stays pure (no new parameter).** Lazy capture lives entirely in the tool bodies. + +**Changes:** +- `commands.py:impact_radius` and `reverify_worklist` — before computing blast radius, if `store.latest_snapshot(repo)` is None/SKIPPED **and** a loomweave client is available (decided by the existing `mcp.py`/`cli.py` dispatcher gate), attempt one scoped `capture_edge_snapshot(scope_locators=)`, then re-read `latest_snapshot`. If still missing (or no loomweave), fall through to the **unchanged** NO_SNAPSHOT path. Reuses existing `snapshot.py:capture_edge_snapshot` and its FULL/DELTA/SKIPPED honesty — new trigger, no new capture logic. +- `install.py:hook_body()` — append two fail-soft lines inside the managed block, after ingest: + ``` + {executable} reresolve-sei --limit 25 >/dev/null 2>&1 || true + {executable} capture-snapshot --commit HEAD >/dev/null 2>&1 || true + ``` + (Bounded sweep heals incrementally; capture keeps the snapshot on HEAD so `commits_behind` stays 0.) Capture stays in the **hook**, not inside `ingest_commit` — keeps the per-tool-call loomweave latency (`loomweave.py:91` `timeout=10` **per tool call**, i.e. per entity) out of the commit critical path. +- **R5 — existing-hook migration:** `install_support.py:apply_git_hook()` already rewrites the managed block, so changing `hook_body()` reaches installed repos only via re-run. Add a `doctor` check "post-commit hook missing reresolve/capture lines" and have `--fix` reinstall the hook. Document: users run `warpline install --hooks` or `warpline doctor --fix` to pick up the new lines. +- `install_support.py:run_doctor` — add (non-fixable-by-default) checks: still-null SEI count, snapshot presence/staleness, hook-out-of-date. `--fix` runs an unbounded `reresolve-sei` + a `capture-snapshot` + hook reinstall. + +**Tests — `tests/test_lazy_capture.py` (NEW):** impact_radius with no snapshot + fake loomweave client captures then returns a populated affected set; with no loomweave client, returns NO_SNAPSHOT unchanged (no error, no gate); `blast_radius` signature unchanged (pure). **`tests/test_install.py`** updated: hook body contains the two new fail-soft lines; doctor flags an old hook and `--fix` reinstalls. + +**Doctrine (enrich-only / honesty):** capture is fail-soft and loomweave-conditional — absence falls through to honest NO_SNAPSHOT, never an error or a block. Existing completeness vocab preserved. + +--- + +## RUNG 2 — Diagnostic capabilities + COP read surface + +Four independent tracks. Ship **Track C first** (tracer bullet). All additive; zero frozen-contract mutation. + +### Track C — Light up inert `risk`/`governance` enrichment (SHIP FIRST, ~15 LOC) + +The federation block (`federation.entities[].risk/governance`) is never merged back into per-item `item.enrichment.risk/governance`, leaving them perpetually empty. + +**Changes — `commands.py:reverify_worklist`** (after the `consult_federation` call, before overflow/page): +- Build `fed_by_locator` from `federation['entities']` keyed on locator. For each item, if its locator is present, copy `fed_entity['risk']` → `item['enrichment']['risk']` and `fed_entity['governance']` → `item['enrichment']['governance']`. +- **R6 scalar rule** (mirrors existing `work_state` at lines ~728-731): compute `risk_state`/`gov_state` — + - `federation is None` (include_federation=False) or member `disabled`/`unreachable` → `"unavailable"`; + - member reachable, no findings for any item → `"absent"`; + - findings present → `"present"`. +- Pass `risk=risk_state, governance=gov_state` into the `enrichment_state()` call in `build_envelope`. +- `reverify.py` unchanged (its `_empty_enrichment()` scaffold is the correct target the merge fills). +- `mcp.py:_h_reverify` still passes `legis_client=None` — that single line is the only change when legis transport lands. + +**Tests — `tests/test_enrichment_merge.py` (NEW):** with a fake wardline client returning findings, `item.enrichment.risk` is populated and envelope `enrichment.risk == "present"`; `include_federation=False` → `"unavailable"` (not `"absent"`); wardline reachable but empty → `"absent"`. + +**Doctrine (D2 — proven-need):** this is the **demonstration that earns** freezing the wardline/legis inbound shape — not a pre-promised contract. Additive, reversible, advisory-only; does not lock the RESERVED-SHAPE inbound. Absence is explicit per the closed vocab. + +### Track A — Co-change coupling graph (schema v3) + +**Migration 3 DDL:** +``` +CREATE TABLE IF NOT EXISTS co_change_pairs ( + repo_id TEXT NOT NULL, + entity_key_id_a INTEGER NOT NULL, -- canonical a < b + entity_key_id_b INTEGER NOT NULL, + co_change_count INTEGER NOT NULL, + last_co_change TEXT, + last_commit_sha TEXT, + PRIMARY KEY (repo_id, entity_key_id_a, entity_key_id_b) +); +``` +(Per-entity totals come from `change_events` aggregation at read time, so no `total_a/total_b` columns needed; if read cost demands it, add denormalized totals in a later additive migration.) + +**Changes:** +- `coupling.py` (NEW) — `derive_pairs_from_commit(entity_key_ids) -> list[(a,b)]` (canonical a 'low'|'medium'|'high'` (<5 low, 5–19 medium, ≥20 high); `coupling_rate(co_change_count, total) -> float|None` (None when total<5). No import from `commands`. +- `store.py` — `update_co_change_pairs(repo_id, commit_sha, entity_key_ids)` (one atomic upsert of all pairs); `co_change_partners(repo, entity_key_id, min_count=2) -> list[{entity_key_id, locator, sei, co_change_count, coupling_rate, sample_size, last_co_change}]` (joins `entity_keys` for SEI at read time). +- **R8 cap:** `update_co_change_pairs` skips pair generation when `len(entity_key_ids) > 30` and records nothing for that commit (high-fanout commits carry near-zero coupling signal). All co-change writes wrapped `try/except` fail-soft — never blocks ingest. +- `git.py:ingest_commit` + `backfill` — after `append_change_event`, call `update_co_change_pairs(...)` (fail-soft). +- `cli.py` — `rebuild-coupling` (rescans `change_events` grouped by `commit_sha`, idempotent, interruptible) and `co-change` (read surface for partners). Both read-only advisory. + +**Tests — `tests/test_coupling.py` (NEW):** pair derivation canonical ordering; confidence thresholds; rate suppression <5; high-fanout commit (>30 entities) skipped; `rebuild-coupling` idempotent (run twice → same counts); SEI-sparse pairs emit `sei:null` + `enrichment.sei:absent`. + +**Doctrine (SEI-orthogonality / no-mirror):** pairs keyed on warpline-local `entity_key_id` (a co-occurrence fact warpline owns, derived from its own `change_events`); SEI joined at read time, never minted, never mirrored. Honesty: rate suppressed + `confidence:low` below sample floor. + +**D1/Q6 caveat:** co-change is **commit-keyed** now. Once the work-session episode boundary is ratified, the denominator/grouping must re-key to episode (two commits in one session = one co-change episode). Flagged as OPEN QUESTION Q6; re-key is an additive read-path change, not a schema break. + +### Track B — Verification freshness (read-time compose, wardline-only first) + +**Changes — `verification.py` (NEW):** +- `VerificationClient` Protocol: `last_verified_for_sei(sei) -> {verified_at, kind, actor, event_ref} | None`. +- **R10 — `WardlineVerificationClient` first** (wraps `WardlineDossierClient`): a `finding_state == 'resolved'` finding is a verification event. Ships now. +- `FiligreeVerificationClient` **deferred** behind confirming `closed_at` availability (Q4). Legis stays honestly `unavailable`. +- `compose_verification_freshness(last_changed_at, events) -> {verification_state: 'fresh'|'stale'|'unverified', last_verified_at, last_verified_kind, sources}` — `fresh` (verified ≥ changed), `stale` (verified < changed), `unverified` (no event). Never defaults to `fresh`. +- Surfaces in `item.enrichment.requirements[]` (the fourth frozen slot); envelope `enrichment.requirements` scalar follows the same R6 rule. + +**Tests — `tests/test_verification.py` (NEW):** resolved wardline finding after last_changed → `fresh`; before → `stale`; none → `unverified`; wardline unreachable → `requirements: unavailable` (not `fresh`/`absent`). + +**Doctrine (no-mirror):** composed at read time from sibling reads; warpline stores no `last_verified`. + +### Track D — Temporal COP internals (`cop.py`; public surface INTERFACE-PENDING) + +**R9 — new module `warpline/cop.py`** (imports `_consult_filigree/_wardline/_legis` from `federation.py`; `federation.py` never imports `cop.py`). + +**Changes — `cop.py` (NEW):** +- `resolve_frame(store, repo, frame_spec) -> (items, frame_echo, warnings)` — dispatch on `frame_spec['kind']`: `rev_range`, `time_window`, `sei`, `branch_sha`, `edit`, using existing store methods (`list_change_events`, `resolve_ref`, `rev_range_commits`). `branch_sha` falls back to `rev_range` resolution **with a warning** until `detected_branch` (Rung 1b) is populated. +- `compose_temporal_cop(items, frame, *, work_client, risk_client, legis_client) -> {members, entities, coverage, frame}` — reuses the three `_consult_*` verbatim. `coverage = {members_consulted, members_total, dark_sectors: [members with reason_class disabled|unreachable]}`. **`consult_federation` is not modified.** + +**Tests — `tests/test_cop.py` (NEW):** `resolve_frame` per kind (rev_range, sei resolve real items; branch_sha emits the fallback warning); `compose_temporal_cop` lists every member in `coverage` with correct `dark_sectors`; an unreachable member appears as `dark_sector`, never silently dropped. + +**MCP/CLI wiring — DEFERRED to INTERFACE-PENDING.** When the public shape arrives: add `SCHEMA_TEMPORAL_COP` to `commands.py`, a `TOOL_SPECS` entry, `_h_cop` handler (`resolve_frame` → `compose_temporal_cop`), `_HANDLER_CONSUMES` entry; `assert_inputschema_consumed()` enforces correctness at import. + +**Doctrine (enrich-only / honesty / frozen):** COP composes at read time, never gates; `dark_sectors` is the load-bearing coverage-honesty surface (unmonitored domain ≠ empty). New tool = new URI, never a mutation of a frozen v1 contract. + +--- + +## INTERFACE PENDING — fill from user's concrete interface + +Everything here is **blocked** on the user's concrete public surface for the correlation reads + COP. Design slots are prepared so each drops in additively without mutating a frozen v1 contract. + +1. **COP public tool/CLI shape** (Track D wiring): MCP tool name, `inputSchema` (frame vocabulary + structure — which of rev_range/time_window/sei/branch_sha/edit are in scope, pagination strategy), output top-level fields beyond `{members, entities, coverage, frame}`. *Internals (`cop.py`) ship without it; only the `mcp.py`/`cli.py` handler waits.* +2. **Working-context anchor exposure on frozen read tools**: which frozen tool's response renders `detected_branch/head_sha/detected_at`, under which optional output field. *Capture + storage (Rung 1b) ship without it; output rendering waits.* Additive output fields only. +3. **Lazy auto-capture trigger policy** (Rung 1d): default-on vs gated by a new **optional** `auto_capture` input field on `impact_radius`/`reverify`. *Lazy machinery in the tool bodies is built; the default-on-vs-opt-in toggle and any new optional input property wait.* +4. **Verification-freshness record shape** inside `requirements[]` (Track B): the per-event dict (`verified_at/kind/actor/event_ref`) is placeholder until legis attestation transport defines its shape. *Wardline-sourced freshness ships now; the locked record shape waits.* +5. **Legis governance/attestation per-SEI read transport** (Track C governance dim + Track B legis source): no CLI/MCP exists. `_consult_legis` and the legis verification source stay honestly `unavailable` until it lands; the single `legis_client=None` line in `_h_reverify` is the only flip needed. + +**What blocks on each:** (1) COP cannot be called by an agent. (2) Anchor data is captured but not yet visible to agents through a frozen tool. (3) Lazy capture cannot be defaulted-on or exposed as a toggle. (4) `requirements[]` event records cannot be frozen. (5) Governance enrichment + legis verification stay `unavailable`. + +--- + +## OPEN QUESTIONS — confirm with the user + +- **Q1 (Rung 0 naming):** `_enrichment.py`/`_blast.py` vs `_staleness.py`/`_resolver.py`? (Rename only; no structural impact.) +- **Q2 (Rung 1 hook cadence):** fire `reresolve-sei` + `capture-snapshot` on **every** commit, or throttle capture (e.g. only when `commits_behind` crosses a threshold) on high-commit-rate repos? Needs a commit-rate figure to tune. +- **Q3 (`detected_at` format):** ISO-8601 **UTC** assumed (matches `changed_at`). Confirm UTC vs local — affects cross-member temporal correlation later. +- **Q4 (filigree `closed_at`):** is `closed_at` already in filigree CLI issue JSON (then `FiligreeVerificationClient` ships now), or does it need a new filigree surface (then it's a cross-member negotiation, deferred)? +- **Q5 (episode granularity / D1):** confirm per-event anchor columns (v2) are acceptable as the substrate, with the work-session `change_episodes` table as a clean **v4 superset** — and ratify the work-session boundary semantics (dirty-tree / detached-HEAD / squash-rebase fallback) **before** any episode table is built. +- **Q6 (co-change re-key):** when the episode boundary is ratified, co-change must re-key from commit to episode (additive read-path change). Confirm commit-keying is acceptable as the interim. +- **Q7 (SEI merge survivor rule):** on twin-collision, keep the resolved-keyed `change_event` and drop the null-keyed duplicate (R11). Confirm this is the desired survivor and that dropping a null-keyed dup with differing `hunk_summary`/`actor` is acceptable. +- **Q8 (co-change backfill cost):** acceptable table size for the target repo? The >30-entity per-commit cap (R8) bounds it; confirm 30 is the right threshold (or a max-pairs-per-commit cap instead). +- **Q-sqlite:** confirm all deployment targets ship Python with bundled SQLite ≥ 3.35 (ALTER … DROP COLUMN rollback path); add a CI assertion. +- **Q-pyright:** after Rung 0 (~700 LOC), does pyright stop timing out on `commands.py`? If not, schedule the deferred SEAM-8 extraction as Rung 0.5. + +--- + +## What I can start NOW vs what waits for the interface + +**Start NOW (no interface dependency):** +- **Rung 0** in full (Steps 0.1–0.6) — pure refactor, unblocks everything. +- **Rung 1a** migration runner + PRAGMA hardening (prerequisite gate). +- **Rung 1b** anchor columns (v2) + `git.py` population + `store.py` read surfacing. +- **Rung 1c** SEI re-resolution sweep (`reresolve.py`, store merge core, `reresolve-sei` CLI — not a frozen tool). +- **Rung 1d** lazy-capture machinery in the tool bodies + hook lines + doctor checks (the *trigger* is built; default-on policy waits on item 3). +- **Rung 2 Track C** enrichment merge pass (tracer bullet — ship first). +- **Rung 2 Track A** co-change graph (v3) + `rebuild-coupling`/`co-change` CLI. +- **Rung 2 Track B** `verification.py` + `WardlineVerificationClient` (filigree/legis sources deferred). +- **Rung 2 Track D** `cop.py` internals (`resolve_frame` + `compose_temporal_cop`) and their unit tests. + +**WAITS for the user's concrete interface:** +- COP public MCP tool/CLI wiring (`SCHEMA_TEMPORAL_COP`, `TOOL_SPECS`, `_h_cop`) — internals are ready; only the handler waits (item 1). +- Rendering the working-context anchor through a frozen read tool's output (item 2). +- Lazy auto-capture **default-on vs opt-in** decision and any new optional `auto_capture` input field (item 3). +- Freezing the `requirements[]` verification-event record shape (item 4). +- Legis governance/attestation inbound + the `_h_reverify` `legis_client` flip (item 5). +- Building the `change_episodes` (v4) table — waits on Q5 ratification, not on the public interface. \ No newline at end of file From d8fd491ea975bc98ad44d76caa62db73c135545d Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 01:54:23 +1000 Subject: [PATCH 03/10] refactor(commands): extract _enrichment and _blast helpers (Rung 0) Behaviour-preserving decomposition of commands.py: characterization tests first, then extract the pure staleness/completeness helpers into warpline._enrichment and the blast-pipeline prep helpers into warpline._blast. - _enrichment.py: EDGES_FOR_COMPLETENESS + is_stale/edges_enrichment/ staleness_warnings/completeness_warnings. Import-free except typing.Any (structurally incapable of gating). is_stale uses int(behind) (R2). - _blast.py: rev_range_commits/resolve_changed_inputs/enrich_blast with a private strict-assert _as_int. WarplineStore is always a parameter. - commands.py 959 -> 771 LOC (<800, M1). capture_snapshot keeps using EDGES_FOR_COMPLETENESS (B1); _as_int and the 2 rev_range_commits sites stay (M2). Zero change to the 7 tool signatures, 6 SCHEMA_* constants, cli.py, mcp.py behaviour. - Lift _init_repo/_commit into tests/conftest.py (conftest-helper minor). - New characterization suites: test_enrichment_helpers.py (incl. B1 dict access) + test_blast_helpers.py. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/warpline/_blast.py | 157 +++++++++++++++++++++ src/warpline/_enrichment.py | 76 ++++++++++ src/warpline/commands.py | 230 +++---------------------------- tests/conftest.py | 34 +++++ tests/test_blast_helpers.py | 181 ++++++++++++++++++++++++ tests/test_enrichment_helpers.py | 152 ++++++++++++++++++++ tests/test_honesty_invariant.py | 25 +--- 7 files changed, 623 insertions(+), 232 deletions(-) create mode 100644 src/warpline/_blast.py create mode 100644 src/warpline/_enrichment.py create mode 100644 tests/conftest.py create mode 100644 tests/test_blast_helpers.py create mode 100644 tests/test_enrichment_helpers.py diff --git a/src/warpline/_blast.py b/src/warpline/_blast.py new file mode 100644 index 0000000..1b7d4ef --- /dev/null +++ b/src/warpline/_blast.py @@ -0,0 +1,157 @@ +"""Blast-pipeline prep helpers (internal API). + +Extracted from ``commands.py`` (Rung 0). Dependency is strictly one-way: +``commands.py -> _blast``; this module never imports ``commands``. + +Doctrine (no-mirror / SEI-orthogonality): reads the store passed in, calls git +rev-list, writes nothing, mints no identifier; operates on warpline-local +``entity_key_id`` integers and SEI strings supplied by the store. ``WarplineStore`` +is always a parameter — never opened inside. +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path +from typing import Any + +from warpline.errors import BadRevisionError +from warpline.refs import entity_view +from warpline.store import WarplineStore + + +def _as_int(value: object) -> int: + # Strict-assert form (matching the original ``commands._as_int``), deliberately + # distinct from ``propagation._as_int`` (permissive int|str). Not imported across + # modules: each owner keeps the variant its call sites need. + assert isinstance(value, int) + return value + + +def rev_range_commits(repo: Path, rev_range: str | None) -> set[str] | None: + if rev_range is None: + return None + try: + proc = subprocess.run( + ["git", "rev-list", rev_range], + cwd=repo, + check=True, + text=True, + capture_output=True, + ) + except subprocess.CalledProcessError as exc: + detail = exc.stderr.strip() or exc.stdout.strip() or str(exc) + raise BadRevisionError(f"invalid rev_range {rev_range!r}: {detail}") from exc + return {line for line in proc.stdout.splitlines() if line} + + +def resolve_changed_inputs( + store: WarplineStore, + repo: Path, + *, + rev_range: str | None, + changed_refs: list[dict[str, str]], + changed_entity_key_ids: list[int], +) -> tuple[list[int], list[dict[str, Any]], list[dict[str, Any]]]: + """Resolve the caller's change-set into stored entity keys. + + Returns ``(key_ids, resolved, unresolved)``. The miss-set is the honesty + surface for the resolve join: a ``changed_ref`` that does not map to any + stored entity_key — or a raw ``entity_key_id`` that is unknown to this repo's + store — was, before this change, silently dropped, so an agent asking "does + my change break anything?" got a confident affected-set computed over an + *incomplete* seed set with no signal that half its refs never resolved. + Every unresolved input now appears in ``unresolved`` with a machine-readable + ``reason`` so the caller can ask "did my SEI actually resolve into the + snapshot?" and get a yes/no, not a silent omission. + """ + + ids: set[int] = set() + resolved: list[dict[str, Any]] = [] + unresolved: list[dict[str, Any]] = [] + + # Raw entity_key_ids are a compatibility seed, not a federation key; still, + # an id unknown to this store is a miss the caller must see. + known_ids = store.entity_keys_by_ids(repo, sorted(set(changed_entity_key_ids))) + for key_id in changed_entity_key_ids: + if key_id in known_ids: + ids.add(key_id) + row = known_ids[key_id] + resolved.append( + { + "ref": {"kind": "warpline_entity_key_id", "value": key_id}, + "entity_key_id": key_id, + "sei": row.get("sei"), + "locator": row.get("locator"), + } + ) + else: + unresolved.append( + { + "ref": {"kind": "warpline_entity_key_id", "value": key_id}, + "reason": "unknown_entity_key_id", + } + ) + + for ref in changed_refs: + resolved_row = store.resolve_ref(repo, ref["kind"], ref["value"]) + if resolved_row is not None: + resolved_id = _as_int(resolved_row["id"]) + ids.add(resolved_id) + resolved.append( + { + "ref": ref, + "entity_key_id": resolved_id, + "sei": resolved_row.get("sei"), + "locator": resolved_row.get("locator"), + } + ) + else: + reason = "sei_not_in_snapshot" if ref.get("kind") == "sei" else "ref_not_in_snapshot" + unresolved.append({"ref": ref, "reason": reason}) + + if rev_range is not None: + commit_shas = rev_range_commits(repo, rev_range) + for event in store.list_change_events(repo, commit_shas=commit_shas): + event_key_id = event.get("entity_key_id") + if isinstance(event_key_id, int): + ids.add(event_key_id) + + return sorted(ids), resolved, unresolved + + +def enrich_blast( + store: WarplineStore, repo: Path, result: dict[str, Any] +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + ids: set[int] = set() + for row in result.get("changed", []): + if isinstance(row.get("entity_key_id"), int): + ids.add(row["entity_key_id"]) + for row in result.get("affected", []): + if isinstance(row.get("entity_key_id"), int): + ids.add(row["entity_key_id"]) + for edge in row.get("via_edges", []): + for end in ("from", "to"): + if isinstance(edge.get(end), int): + ids.add(edge[end]) + key_rows = store.entity_keys_by_ids(repo, sorted(ids)) + + def view(key_id: Any) -> dict[str, Any]: + return entity_view(key_rows.get(int(key_id)) if isinstance(key_id, int) else None) + + changed = [{"entity": view(row.get("entity_key_id"))} for row in result.get("changed", [])] + affected = [] + for row in result.get("affected", []): + via = [ + { + "from": str(edge.get("from")), + "to": str(edge.get("to")), + "kind": edge.get("kind"), + "confidence": edge.get("confidence"), + } + for edge in row.get("via_edges", []) + ] + affected.append( + {"entity": view(row.get("entity_key_id")), "depth": row.get("depth"), "via_edges": via} + ) + return changed, affected diff --git a/src/warpline/_enrichment.py b/src/warpline/_enrichment.py new file mode 100644 index 0000000..a7296ba --- /dev/null +++ b/src/warpline/_enrichment.py @@ -0,0 +1,76 @@ +"""Pure staleness/completeness enrichment helpers (internal API). + +Extracted from ``commands.py`` (Rung 0). Dependency is strictly one-way: +``commands.py -> _enrichment``; this module imports nothing from warpline and is +structurally incapable of gating (enrich-only doctrine, verified by its import +list: only ``typing.Any``). No store, no git, no I/O. +""" + +from __future__ import annotations + +from typing import Any + +# enrichment.edges value for each completeness level. +EDGES_FOR_COMPLETENESS = { + "FULL": "present", + "DELTA": "partial", + "NO_SNAPSHOT": "absent", + "SKIPPED": "skipped", +} + + +def is_stale(staleness: dict[str, Any]) -> bool: + """The snapshot was captured at a commit behind HEAD. + + ``commits_behind`` is the live answer to ``snapshot_commit..HEAD``; any + positive count means the stored edge graph no longer describes HEAD. A + ``None`` count means we could not ask git (detached snapshot commit, shallow + clone) — we treat that as *unknown, therefore not-proven-fresh* and surface + it as stale rather than silently claiming completeness. + """ + + behind = staleness.get("commits_behind") + if behind is None: + return staleness.get("snapshot_commit") is not None + return int(behind) > 0 + + +def edges_enrichment(completeness: str, staleness: dict[str, Any]) -> str: + """Map (completeness, staleness) → the closed ``enrichment.edges`` vocab. + + A FULL-or-DELTA snapshot that is *behind HEAD* downgrades to the live + ``"stale"`` value: the edge graph is real but no longer describes the + working tree, so completeness must NOT be claimed. Without this, a stale- + but-FULL snapshot would emit ``edges:"present"`` and hand an agent a + confident affected-set with zero freshness warning (PDR-0023: the quiet + segfault). NO_SNAPSHOT / SKIPPED are already-honest "we have nothing" states + and are reported as-is regardless of staleness. + """ + + base = EDGES_FOR_COMPLETENESS.get(completeness, "absent") + if completeness in {"FULL", "DELTA"} and is_stale(staleness): + return "stale" + return base + + +def staleness_warnings(completeness: str, staleness: dict[str, Any]) -> list[str]: + if completeness in {"FULL", "DELTA"} and is_stale(staleness): + behind = staleness.get("commits_behind") + commit = str(staleness.get("snapshot_commit") or "unknown")[:8] + if behind is None: + tail = "snapshot commit is not on HEAD's history; freshness unknown" + else: + tail = f"{behind} commit(s) behind HEAD" + return [ + f"STALE: edge snapshot @ {commit} is {tail}; affected set is not complete for " + "HEAD — recapture (warpline capture-snapshot) before trusting completeness" + ] + return [] + + +def completeness_warnings(completeness: str) -> list[str]: + return { + "NO_SNAPSHOT": ["NO_SNAPSHOT: downstream traversal unavailable; changed set only"], + "SKIPPED": ["SKIPPED: graph snapshot was skipped; changed set only"], + "DELTA": ["DELTA: graph snapshot is partial; inspect failed_entities or staleness"], + }.get(completeness, []) diff --git a/src/warpline/commands.py b/src/warpline/commands.py index 9bb85f6..ecc8f94 100644 --- a/src/warpline/commands.py +++ b/src/warpline/commands.py @@ -1,10 +1,16 @@ from __future__ import annotations import os -import subprocess from pathlib import Path from typing import Any +from warpline._blast import enrich_blast, resolve_changed_inputs, rev_range_commits +from warpline._enrichment import ( + EDGES_FOR_COMPLETENESS, + completeness_warnings, + edges_enrichment, + staleness_warnings, +) from warpline.envelope import build_envelope, enrichment_state from warpline.errors import BadRevisionError, InvalidChangedRefsError from warpline.federation import LegisClient, RiskClient, consult_federation @@ -36,63 +42,6 @@ SCHEMA_REVERIFY_WORKLIST = "warpline.reverify_worklist.v1" SCHEMA_EDGE_SNAPSHOT = "warpline.edge_snapshot.v1" -# enrichment.edges value for each completeness level. -_EDGES_FOR_COMPLETENESS = { - "FULL": "present", - "DELTA": "partial", - "NO_SNAPSHOT": "absent", - "SKIPPED": "skipped", -} - - -def _is_stale(staleness: dict[str, Any]) -> bool: - """The snapshot was captured at a commit behind HEAD. - - ``commits_behind`` is the live answer to ``snapshot_commit..HEAD``; any - positive count means the stored edge graph no longer describes HEAD. A - ``None`` count means we could not ask git (detached snapshot commit, shallow - clone) — we treat that as *unknown, therefore not-proven-fresh* and surface - it as stale rather than silently claiming completeness. - """ - - behind = staleness.get("commits_behind") - if behind is None: - return staleness.get("snapshot_commit") is not None - return _as_int(behind) > 0 - - -def _edges_enrichment(completeness: str, staleness: dict[str, Any]) -> str: - """Map (completeness, staleness) → the closed ``enrichment.edges`` vocab. - - A FULL-or-DELTA snapshot that is *behind HEAD* downgrades to the live - ``"stale"`` value: the edge graph is real but no longer describes the - working tree, so completeness must NOT be claimed. Without this, a stale- - but-FULL snapshot would emit ``edges:"present"`` and hand an agent a - confident affected-set with zero freshness warning (PDR-0023: the quiet - segfault). NO_SNAPSHOT / SKIPPED are already-honest "we have nothing" states - and are reported as-is regardless of staleness. - """ - - base = _EDGES_FOR_COMPLETENESS.get(completeness, "absent") - if completeness in {"FULL", "DELTA"} and _is_stale(staleness): - return "stale" - return base - - -def _staleness_warnings(completeness: str, staleness: dict[str, Any]) -> list[str]: - if completeness in {"FULL", "DELTA"} and _is_stale(staleness): - behind = staleness.get("commits_behind") - commit = str(staleness.get("snapshot_commit") or "unknown")[:8] - if behind is None: - tail = "snapshot commit is not on HEAD's history; freshness unknown" - else: - tail = f"{behind} commit(s) behind HEAD" - return [ - f"STALE: edge snapshot @ {commit} is {tail}; affected set is not complete for " - "HEAD — recapture (warpline capture-snapshot) before trusting completeness" - ] - return [] - def session_context(repo: Path) -> str: """A one-line temporal snapshot for the SessionStart hook (fail-soft).""" @@ -112,23 +61,6 @@ def session_context(repo: Path) -> str: return f"warpline: {len(events)} change events tracked; {snap}" -def _rev_range_commits(repo: Path, rev_range: str | None) -> set[str] | None: - if rev_range is None: - return None - try: - proc = subprocess.run( - ["git", "rev-list", rev_range], - cwd=repo, - check=True, - text=True, - capture_output=True, - ) - except subprocess.CalledProcessError as exc: - detail = exc.stderr.strip() or exc.stdout.strip() or str(exc) - raise BadRevisionError(f"invalid rev_range {rev_range!r}: {detail}") from exc - return {line for line in proc.stdout.splitlines() if line} - - def _page(limit: int) -> dict[str, Any]: return {"limit": limit, "next_cursor": None, "has_more": False} @@ -147,81 +79,6 @@ def _as_int(value: object) -> int: return value -def _resolve_changed_inputs( - store: WarplineStore, - repo: Path, - *, - rev_range: str | None, - changed_refs: list[dict[str, str]], - changed_entity_key_ids: list[int], -) -> tuple[list[int], list[dict[str, Any]], list[dict[str, Any]]]: - """Resolve the caller's change-set into stored entity keys. - - Returns ``(key_ids, resolved, unresolved)``. The miss-set is the honesty - surface for the resolve join: a ``changed_ref`` that does not map to any - stored entity_key — or a raw ``entity_key_id`` that is unknown to this repo's - store — was, before this change, silently dropped, so an agent asking "does - my change break anything?" got a confident affected-set computed over an - *incomplete* seed set with no signal that half its refs never resolved. - Every unresolved input now appears in ``unresolved`` with a machine-readable - ``reason`` so the caller can ask "did my SEI actually resolve into the - snapshot?" and get a yes/no, not a silent omission. - """ - - ids: set[int] = set() - resolved: list[dict[str, Any]] = [] - unresolved: list[dict[str, Any]] = [] - - # Raw entity_key_ids are a compatibility seed, not a federation key; still, - # an id unknown to this store is a miss the caller must see. - known_ids = store.entity_keys_by_ids(repo, sorted(set(changed_entity_key_ids))) - for key_id in changed_entity_key_ids: - if key_id in known_ids: - ids.add(key_id) - row = known_ids[key_id] - resolved.append( - { - "ref": {"kind": "warpline_entity_key_id", "value": key_id}, - "entity_key_id": key_id, - "sei": row.get("sei"), - "locator": row.get("locator"), - } - ) - else: - unresolved.append( - { - "ref": {"kind": "warpline_entity_key_id", "value": key_id}, - "reason": "unknown_entity_key_id", - } - ) - - for ref in changed_refs: - resolved_row = store.resolve_ref(repo, ref["kind"], ref["value"]) - if resolved_row is not None: - resolved_id = _as_int(resolved_row["id"]) - ids.add(resolved_id) - resolved.append( - { - "ref": ref, - "entity_key_id": resolved_id, - "sei": resolved_row.get("sei"), - "locator": resolved_row.get("locator"), - } - ) - else: - reason = "sei_not_in_snapshot" if ref.get("kind") == "sei" else "ref_not_in_snapshot" - unresolved.append({"ref": ref, "reason": reason}) - - if rev_range is not None: - commit_shas = _rev_range_commits(repo, rev_range) - for event in store.list_change_events(repo, commit_shas=commit_shas): - event_key_id = event.get("entity_key_id") - if isinstance(event_key_id, int): - ids.add(event_key_id) - - return sorted(ids), resolved, unresolved - - def _unresolved_warnings(unresolved: list[dict[str, Any]]) -> list[str]: if not unresolved: return [] @@ -268,7 +125,7 @@ def change_list( limit: int = 50, ) -> dict[str, Any]: effective_range = _rev_range_from_refs(rev_range, base_ref, head_ref) - commit_shas = _rev_range_commits(repo, effective_range) + commit_shas = rev_range_commits(repo, effective_range) with WarplineStore.open(default_store_path(repo)) as store: events = store.list_change_events(repo, commit_shas=commit_shas) items: list[dict[str, Any]] = [] @@ -463,7 +320,7 @@ def entity_churn_count( until = window.get("until") rev_range = window.get("rev_range") with WarplineStore.open(default_store_path(repo)) as store: - commit_shas = _rev_range_commits(repo, rev_range) if rev_range else None + commit_shas = rev_range_commits(repo, rev_range) if rev_range else None items: list[dict[str, Any]] = [] has_sei = False for ref in refs: @@ -526,43 +383,6 @@ def entity_churn_count( ) -def _enrich_blast( - store: WarplineStore, repo: Path, result: dict[str, Any] -) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: - ids: set[int] = set() - for row in result.get("changed", []): - if isinstance(row.get("entity_key_id"), int): - ids.add(row["entity_key_id"]) - for row in result.get("affected", []): - if isinstance(row.get("entity_key_id"), int): - ids.add(row["entity_key_id"]) - for edge in row.get("via_edges", []): - for end in ("from", "to"): - if isinstance(edge.get(end), int): - ids.add(edge[end]) - key_rows = store.entity_keys_by_ids(repo, sorted(ids)) - - def view(key_id: Any) -> dict[str, Any]: - return entity_view(key_rows.get(int(key_id)) if isinstance(key_id, int) else None) - - changed = [{"entity": view(row.get("entity_key_id"))} for row in result.get("changed", [])] - affected = [] - for row in result.get("affected", []): - via = [ - { - "from": str(edge.get("from")), - "to": str(edge.get("to")), - "kind": edge.get("kind"), - "confidence": edge.get("confidence"), - } - for edge in row.get("via_edges", []) - ] - affected.append( - {"entity": view(row.get("entity_key_id")), "depth": row.get("depth"), "via_edges": via} - ) - return changed, affected - - # --------------------------------------------------------------------------- # warpline_impact_radius_get — warpline.impact_radius.v1 # --------------------------------------------------------------------------- @@ -581,7 +401,7 @@ def impact_radius( ) -> dict[str, Any]: refs = parse_changed_refs(changed_refs) with WarplineStore.open(default_store_path(repo)) as store: - key_ids, resolved, unresolved = _resolve_changed_inputs( + key_ids, resolved, unresolved = resolve_changed_inputs( store, repo, rev_range=rev_range, @@ -589,7 +409,7 @@ def impact_radius( changed_entity_key_ids=changed_entity_key_ids or [], ) result = compute_blast_radius(store, repo, key_ids, depth) - changed, affected = _enrich_blast(store, repo, result) + changed, affected = enrich_blast(store, repo, result) completeness = result["completeness"] staleness = result["staleness"] # The affected set is the list surface (changed is the seed, kept whole). @@ -627,24 +447,16 @@ def impact_radius( SCHEMA_IMPACT_RADIUS, query=query, data=data, - enrichment=enrichment_state(edges=_edges_enrichment(completeness, staleness)), + enrichment=enrichment_state(edges=edges_enrichment(completeness, staleness)), warnings=( - _completeness_warnings(completeness) - + _staleness_warnings(completeness, staleness) + completeness_warnings(completeness) + + staleness_warnings(completeness, staleness) + _unresolved_warnings(unresolved) + overflow_warnings ), ) -def _completeness_warnings(completeness: str) -> list[str]: - return { - "NO_SNAPSHOT": ["NO_SNAPSHOT: downstream traversal unavailable; changed set only"], - "SKIPPED": ["SKIPPED: graph snapshot was skipped; changed set only"], - "DELTA": ["DELTA: graph snapshot is partial; inspect failed_entities or staleness"], - }.get(completeness, []) - - # --------------------------------------------------------------------------- # warpline_reverify_worklist_get — warpline.reverify_worklist.v1 # --------------------------------------------------------------------------- @@ -668,7 +480,7 @@ def reverify_worklist( ) -> dict[str, Any]: refs = parse_changed_refs(changed_refs) with WarplineStore.open(default_store_path(repo)) as store: - key_ids, resolved, unresolved = _resolve_changed_inputs( + key_ids, resolved, unresolved = resolve_changed_inputs( store, repo, rev_range=rev_range, @@ -676,7 +488,7 @@ def reverify_worklist( changed_entity_key_ids=changed_entity_key_ids or [], ) result = compute_blast_radius(store, repo, key_ids, depth) - changed, affected = _enrich_blast(store, repo, result) + changed, affected = enrich_blast(store, repo, result) completeness = result["completeness"] staleness = result["staleness"] items, work_seen, filigree_candidates = render_reverify_worklist( @@ -748,13 +560,13 @@ def reverify_worklist( query=query, data=data, enrichment=enrichment_state( - edges=_edges_enrichment(completeness, staleness), + edges=edges_enrichment(completeness, staleness), work=work_state, ), next_actions={"filigree": filigree_candidates}, warnings=( - _completeness_warnings(completeness) - + _staleness_warnings(completeness, staleness) + completeness_warnings(completeness) + + staleness_warnings(completeness, staleness) + _unresolved_warnings(unresolved) + _federation_warnings(federation) + overflow_warnings @@ -930,7 +742,7 @@ def capture_snapshot( "idempotency": result["idempotency"], "idempotency_key": idem_key, } - edges_state = _EDGES_FOR_COMPLETENESS.get(str(data["completeness"]), "absent") + edges_state = EDGES_FOR_COMPLETENESS.get(str(data["completeness"]), "absent") # capture touches the SEI authority (loomweave). When it is unreachable, # the SEI fact is unavailable (peer down) — never an implied clean state. sei_state = "unavailable" if client is None else "absent" @@ -955,5 +767,5 @@ def capture_snapshot( query=query, data=data, enrichment=enrichment_state(edges=edges_state, sei=sei_state), - warnings=_completeness_warnings(str(data["completeness"])) + warnings, + warnings=completeness_warnings(str(data["completeness"])) + warnings, ) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..32b3b34 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,34 @@ +"""Shared test helpers. + +`init_repo`/`commit` build a throwaway git repo with deterministic identity — +the fixture pattern the characterization and honesty-invariant suites both need. +Lifted here (round-2 conftest-helper minor) so test modules call one definition +instead of importing private helpers across files. +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + + +def git(repo: Path, *args: str) -> str: + return subprocess.run( + ["git", *args], cwd=repo, check=True, text=True, stdout=subprocess.PIPE + ).stdout.strip() + + +def init_repo(tmp_path: Path) -> Path: + repo = tmp_path / "repo" + repo.mkdir() + git(repo, "init") + git(repo, "config", "user.email", "agent@example.test") + git(repo, "config", "user.name", "Agent") + return repo + + +def commit(repo: Path, name: str, body: str) -> str: + (repo / name).write_text(body, encoding="utf-8") + git(repo, "add", name) + git(repo, "commit", "-m", f"write {name}") + return git(repo, "rev-parse", "HEAD") diff --git a/tests/test_blast_helpers.py b/tests/test_blast_helpers.py new file mode 100644 index 0000000..6234f1c --- /dev/null +++ b/tests/test_blast_helpers.py @@ -0,0 +1,181 @@ +"""Characterization tests for the blast-pipeline prep helpers. + +Locks ``rev_range_commits`` (bad-range error, None passthrough), +``resolve_changed_inputs`` (known/unknown key ids, sei ref resolution, +rev_range filtering), and ``enrich_blast`` (raw blast dict -> (changed, +affected) shape) BEFORE the Rung 0 extraction moves these bodies into +``warpline._blast``. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from conftest import commit, init_repo + +from warpline._blast import enrich_blast, resolve_changed_inputs, rev_range_commits +from warpline.errors import BadRevisionError +from warpline.store import WarplineStore + + +# --------------------------------------------------------------------------- rev_range_commits +def test_rev_range_commits_none_passthrough(tmp_path: Path) -> None: + repo = init_repo(tmp_path) + assert rev_range_commits(repo, None) is None + + +def test_rev_range_commits_resolves_range(tmp_path: Path) -> None: + repo = init_repo(tmp_path) + first = commit(repo, "a.py", "a = 1\n") + second = commit(repo, "a.py", "a = 2\n") + shas = rev_range_commits(repo, f"{first}..{second}") + assert shas == {second} + + +def test_rev_range_commits_bad_range_raises(tmp_path: Path) -> None: + repo = init_repo(tmp_path) + commit(repo, "a.py", "a = 1\n") + with pytest.raises(BadRevisionError): + rev_range_commits(repo, "no-such-ref..also-bad") + + +# --------------------------------------------------------------------------- resolve_changed_inputs +def test_resolve_changed_inputs_known_key_id(tmp_path: Path) -> None: + repo = tmp_path / "store_repo" + repo.mkdir() + with WarplineStore.open(tmp_path / "warpline.db") as store: + repo_id = store.ensure_repo(repo) + key = store.ensure_entity_key( + repo_id, locator="python:function:a", sei="sei-a", commit_sha="c1" + ) + ids, resolved, unresolved = resolve_changed_inputs( + store, + repo, + rev_range=None, + changed_refs=[], + changed_entity_key_ids=[key], + ) + assert ids == [key] + assert unresolved == [] + assert resolved[0]["entity_key_id"] == key + assert resolved[0]["sei"] == "sei-a" + + +def test_resolve_changed_inputs_unknown_key_id_is_a_miss(tmp_path: Path) -> None: + repo = tmp_path / "store_repo" + repo.mkdir() + with WarplineStore.open(tmp_path / "warpline.db") as store: + store.ensure_repo(repo) + ids, resolved, unresolved = resolve_changed_inputs( + store, + repo, + rev_range=None, + changed_refs=[], + changed_entity_key_ids=[999], + ) + assert ids == [] + assert resolved == [] + assert unresolved == [ + { + "ref": {"kind": "warpline_entity_key_id", "value": 999}, + "reason": "unknown_entity_key_id", + } + ] + + +def test_resolve_changed_inputs_sei_ref_resolution(tmp_path: Path) -> None: + repo = tmp_path / "store_repo" + repo.mkdir() + with WarplineStore.open(tmp_path / "warpline.db") as store: + repo_id = store.ensure_repo(repo) + key = store.ensure_entity_key( + repo_id, locator="python:function:a", sei="sei-a", commit_sha="c1" + ) + ids, resolved, unresolved = resolve_changed_inputs( + store, + repo, + rev_range=None, + changed_refs=[{"kind": "sei", "value": "sei-a"}], + changed_entity_key_ids=[], + ) + assert ids == [key] + assert resolved[0]["ref"] == {"kind": "sei", "value": "sei-a"} + assert unresolved == [] + + +def test_resolve_changed_inputs_unresolved_sei_reports_reason(tmp_path: Path) -> None: + repo = tmp_path / "store_repo" + repo.mkdir() + with WarplineStore.open(tmp_path / "warpline.db") as store: + store.ensure_repo(repo) + _ids, _resolved, unresolved = resolve_changed_inputs( + store, + repo, + rev_range=None, + changed_refs=[{"kind": "sei", "value": "missing"}], + changed_entity_key_ids=[], + ) + assert unresolved == [ + {"ref": {"kind": "sei", "value": "missing"}, "reason": "sei_not_in_snapshot"} + ] + + +def test_resolve_changed_inputs_rev_range_seeds_event_keys(tmp_path: Path) -> None: + repo = init_repo(tmp_path) + first = commit(repo, "a.py", "a = 1\n") + second = commit(repo, "a.py", "a = 2\n") + with WarplineStore.open(tmp_path / "warpline.db") as store: + repo_id = store.ensure_repo(repo) + key = store.ensure_entity_key( + repo_id, locator="file:a.py", sei=None, commit_sha=second + ) + store.append_change_event( + repo_id=repo_id, + entity_key_id=key, + commit_sha=second, + path="a.py", + change_kind="modified", + actor="Agent", + changed_at="2026-01-01T00:00:00+00:00", + ) + ids, _resolved, _unresolved = resolve_changed_inputs( + store, + repo, + rev_range=f"{first}..{second}", + changed_refs=[], + changed_entity_key_ids=[], + ) + assert key in ids + + +# --------------------------------------------------------------------------- enrich_blast +def test_enrich_blast_shapes_changed_and_affected(tmp_path: Path) -> None: + repo = tmp_path / "store_repo" + repo.mkdir() + with WarplineStore.open(tmp_path / "warpline.db") as store: + repo_id = store.ensure_repo(repo) + a = store.ensure_entity_key( + repo_id, locator="python:function:a", sei="sei-a", commit_sha="c1" + ) + b = store.ensure_entity_key( + repo_id, locator="python:function:b", sei="sei-b", commit_sha="c1" + ) + raw = { + "changed": [{"entity_key_id": a}], + "affected": [ + { + "entity_key_id": b, + "depth": 1, + "via_edges": [ + {"from": a, "to": b, "kind": "calls", "confidence": "resolved"} + ], + } + ], + } + changed, affected = enrich_blast(store, repo, raw) + assert changed == [{"entity": {"locator": "python:function:a", "sei": "sei-a"}}] + assert affected[0]["depth"] == 1 + assert affected[0]["entity"]["locator"] == "python:function:b" + via = affected[0]["via_edges"][0] + assert via == {"from": str(a), "to": str(b), "kind": "calls", "confidence": "resolved"} diff --git a/tests/test_enrichment_helpers.py b/tests/test_enrichment_helpers.py new file mode 100644 index 0000000..2a646a5 --- /dev/null +++ b/tests/test_enrichment_helpers.py @@ -0,0 +1,152 @@ +"""Characterization tests for the pure staleness/completeness helpers. + +Locks the (completeness, staleness) -> enrichment.edges mapping and the +warning text BEFORE the Rung 0 extraction moves these bodies into +``warpline._enrichment``. Pure functions, no fixtures. These imports are +retargeted to ``warpline._enrichment`` in Step 0.2; until then they pin the +behaviour as it lives in ``warpline.commands``. +""" + +from __future__ import annotations + +from pathlib import Path + +from warpline import commands +from warpline._enrichment import ( + EDGES_FOR_COMPLETENESS, + completeness_warnings, + edges_enrichment, + is_stale, + staleness_warnings, +) +from warpline.store import WarplineStore, default_store_path + + +# --------------------------------------------------------------------------- is_stale +def test_is_stale_zero_commits_behind_is_fresh() -> None: + assert is_stale({"commits_behind": 0, "snapshot_commit": "abc"}) is False + + +def test_is_stale_positive_commits_behind_is_stale() -> None: + assert is_stale({"commits_behind": 3, "snapshot_commit": "abc"}) is True + + +def test_is_stale_none_behind_with_snapshot_is_stale() -> None: + # Could not ask git, but a snapshot commit exists -> unknown therefore stale. + assert is_stale({"commits_behind": None, "snapshot_commit": "abc"}) is True + + +def test_is_stale_none_behind_without_snapshot_is_fresh() -> None: + # No snapshot commit at all -> nothing to be behind. + assert is_stale({"commits_behind": None, "snapshot_commit": None}) is False + + +# --------------------------------------------------------------------------- EDGES map +def test_edges_for_completeness_constant() -> None: + assert EDGES_FOR_COMPLETENESS == { + "FULL": "present", + "DELTA": "partial", + "NO_SNAPSHOT": "absent", + "SKIPPED": "skipped", + } + + +# --------------------------------------------------------------------------- edges_enrichment +def test_edges_enrichment_full_fresh_is_present() -> None: + fresh = {"commits_behind": 0, "snapshot_commit": "abc"} + assert edges_enrichment("FULL", fresh) == "present" + + +def test_edges_enrichment_full_stale_downgrades_to_stale() -> None: + stale = {"commits_behind": 2, "snapshot_commit": "abc"} + assert edges_enrichment("FULL", stale) == "stale" + + +def test_edges_enrichment_delta_fresh_is_partial() -> None: + fresh = {"commits_behind": 0, "snapshot_commit": "abc"} + assert edges_enrichment("DELTA", fresh) == "partial" + + +def test_edges_enrichment_delta_stale_downgrades_to_stale() -> None: + stale = {"commits_behind": 1, "snapshot_commit": "abc"} + assert edges_enrichment("DELTA", stale) == "stale" + + +def test_edges_enrichment_no_snapshot_is_absent_regardless_of_staleness() -> None: + stale = {"commits_behind": 5, "snapshot_commit": "abc"} + assert edges_enrichment("NO_SNAPSHOT", stale) == "absent" + + +def test_edges_enrichment_skipped_is_skipped_regardless_of_staleness() -> None: + stale = {"commits_behind": 5, "snapshot_commit": "abc"} + assert edges_enrichment("SKIPPED", stale) == "skipped" + + +def test_edges_enrichment_unknown_completeness_defaults_to_absent() -> None: + fresh = {"commits_behind": 0, "snapshot_commit": "abc"} + assert edges_enrichment("WEIRD", fresh) == "absent" + + +# --------------------------------------------------------------------------- staleness_warnings +def test_staleness_warnings_full_fresh_is_empty() -> None: + fresh = {"commits_behind": 0, "snapshot_commit": "abc"} + assert staleness_warnings("FULL", fresh) == [] + + +def test_staleness_warnings_full_stale_known_count() -> None: + stale = {"commits_behind": 2, "snapshot_commit": "abcdef0123"} + warns = staleness_warnings("FULL", stale) + assert len(warns) == 1 + assert warns[0].startswith("STALE: edge snapshot @ abcdef01") + assert "2 commit(s) behind HEAD" in warns[0] + + +def test_staleness_warnings_full_stale_unknown_count() -> None: + stale = {"commits_behind": None, "snapshot_commit": "abcdef0123"} + warns = staleness_warnings("FULL", stale) + assert len(warns) == 1 + assert "freshness unknown" in warns[0] + + +def test_staleness_warnings_no_snapshot_is_empty() -> None: + stale = {"commits_behind": 5, "snapshot_commit": "abc"} + assert staleness_warnings("NO_SNAPSHOT", stale) == [] + + +# --------------------------------------------------------------------------- completeness_warnings +def test_completeness_warnings_no_snapshot() -> None: + warns = completeness_warnings("NO_SNAPSHOT") + assert warns == ["NO_SNAPSHOT: downstream traversal unavailable; changed set only"] + + +def test_completeness_warnings_skipped() -> None: + warns = completeness_warnings("SKIPPED") + assert warns == ["SKIPPED: graph snapshot was skipped; changed set only"] + + +def test_completeness_warnings_delta() -> None: + warns = completeness_warnings("DELTA") + assert warns == ["DELTA: graph snapshot is partial; inspect failed_entities or staleness"] + + +def test_completeness_warnings_full_is_empty() -> None: + assert completeness_warnings("FULL") == [] + + +# ----------------------------------------------------- capture_snapshot dict access (B1) +def test_capture_snapshot_maps_edges_via_edges_for_completeness(tmp_path: Path) -> None: + """``capture_snapshot`` reads ``EDGES_FOR_COMPLETENESS`` (commands.py) to map + its completeness to ``enrichment.edges``. This pins that dict access stays + wired after the Rung 0 extraction: with no loomweave the capture is SKIPPED, + so the closed vocab must resolve to ``"skipped"`` (not the absent fallback). + """ + + repo = tmp_path / "repo" + repo.mkdir() + with WarplineStore.open(default_store_path(repo)) as store: + store.ensure_repo(repo) + envelope = commands.capture_snapshot( + repo, commit="c1", loomweave_command="/no/such/loomweave" + ) + assert envelope["data"]["completeness"] == "SKIPPED" + assert envelope["enrichment"]["edges"] == EDGES_FOR_COMPLETENESS["SKIPPED"] == "skipped" diff --git a/tests/test_honesty_invariant.py b/tests/test_honesty_invariant.py index 27c1e6d..22f67cf 100644 --- a/tests/test_honesty_invariant.py +++ b/tests/test_honesty_invariant.py @@ -11,10 +11,11 @@ from __future__ import annotations -import subprocess from pathlib import Path import pytest +from conftest import commit as _commit +from conftest import init_repo as _init_repo from warpline import commands from warpline.errors import InvalidChangedRefsError @@ -22,28 +23,6 @@ from warpline.store import WarplineStore, default_store_path -def _git(repo: Path, *args: str) -> str: - return subprocess.run( - ["git", *args], cwd=repo, check=True, text=True, stdout=subprocess.PIPE - ).stdout.strip() - - -def _init_repo(tmp_path: Path) -> Path: - repo = tmp_path / "repo" - repo.mkdir() - _git(repo, "init") - _git(repo, "config", "user.email", "agent@example.test") - _git(repo, "config", "user.name", "Agent") - return repo - - -def _commit(repo: Path, name: str, body: str) -> str: - (repo / name).write_text(body, encoding="utf-8") - _git(repo, "add", name) - _git(repo, "commit", "-m", f"write {name}") - return _git(repo, "rev-parse", "HEAD") - - # --------------------------------------------------------------------------- (a) def test_stale_but_full_snapshot_emits_edges_stale_GOLDEN_VECTOR(tmp_path: Path) -> None: """GOLDEN VECTOR — the stale-but-FULL-snapshot path. From b152106c2810ba9236a2bd8b9957a429c061d6f2 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 01:58:39 +1000 Subject: [PATCH 04/10] feat(store): ordered migration runner + PRAGMA hardening (Rung 1a) Establish a real ordered, forward-only migration runner in store.py as the prerequisite gate for all schema work (Rung 1b v2 anchors, Rung 2 v3 co-change). The runner is established here with an empty MIGRATIONS list (highest known version stays 1); v2/v3 land in later tracks. - Connection hardening: foreign_keys=ON, busy_timeout=5000, synchronous=NORMAL (WAL retained via SCHEMA executescript fresh-DB note). - SQLite >= 3.35 floor, justified by the RETURNING clause in create_edge_snapshot (no migration drops a column). - Per-step BEGIN IMMEDIATE / COMMIT with conn.execute only (never executescript) so user_version + meta update atomically (R3). - Concurrent open() re-reads user_version under the RESERVED lock and skips already-applied steps (idempotent, no double-apply). - M9 reconcile: user_version==0 with meta.schema_version=='1' adopts 1; with a divergent meta value, warn + adopt it before later steps. - user_version > highest-known: warn to health_log, reads stay safe, no fail. - SCHEMA DDL is FROZEN after Rung 1a (all change via MIGRATIONS). - test_store.py BOTH schema_version()==1 assertions intentionally left at 1 (they flip to ==2 when v2 lands in Rung 1b, not here). Add tests/test_store_migrations.py. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/warpline/store.py | 178 ++++++++++++++++++++++++- tests/test_store_migrations.py | 230 +++++++++++++++++++++++++++++++++ 2 files changed, 407 insertions(+), 1 deletion(-) create mode 100644 tests/test_store_migrations.py diff --git a/src/warpline/store.py b/src/warpline/store.py index 45bdb32..fe10bf9 100644 --- a/src/warpline/store.py +++ b/src/warpline/store.py @@ -1,10 +1,29 @@ from __future__ import annotations import hashlib +import logging import sqlite3 +from collections.abc import Callable from pathlib import Path from types import TracebackType - +from typing import NamedTuple + +logger = logging.getLogger(__name__) + +# Minimum bundled-SQLite floor. Justification: ``create_edge_snapshot`` uses the +# ``RETURNING`` clause (store.py), first available in SQLite 3.35.0. No migration +# in this project drops a column, so the floor is RETURNING, not ALTER … DROP COLUMN. +_MIN_SQLITE_VERSION = (3, 35, 0) + +# After Rung 1a, ``SCHEMA`` DDL is FROZEN. All schema changes (added columns, +# new tables) go through ``MIGRATIONS`` — never by editing ``SCHEMA``. ``SCHEMA`` +# remains only the fresh-DB base-table definition (idempotent ``IF NOT EXISTS``). +# +# Fresh-DB WAL note: ``executescript(SCHEMA)`` runs ``PRAGMA journal_mode=WAL`` +# via an implicit-commit ``executescript`` — intentional, and deliberately +# OUTSIDE the per-migration ``BEGIN IMMEDIATE`` pattern (Python's executescript +# issues an implicit COMMIT, so the migration runner uses ``conn.execute`` per +# statement instead; see ``_run_migrations``). SCHEMA = """ PRAGMA journal_mode=WAL; CREATE TABLE IF NOT EXISTS meta ( @@ -93,6 +112,32 @@ """ +class Migration(NamedTuple): + """One ordered, forward-only schema migration. + + ``apply`` runs its ``conn.execute(...)`` statements INSIDE an already-open + ``BEGIN IMMEDIATE`` transaction owned by the runner; it must NOT call + ``executescript`` (which would issue an implicit COMMIT and break the + transaction's atomicity, R3) and must NOT commit/begin itself. + """ + + version: int + apply: Callable[[sqlite3.Connection], None] + + +# Ordered, forward-only migrations. Each step's ``version`` is strictly greater +# than the previous. v2 (anchor columns) lands in Rung 1b; v3 (co_change_pairs) +# in Rung 2 Track A. In Rung 1a this list is empty: the runner is established +# but the highest known version is 1 (the base ``SCHEMA``). +MIGRATIONS: list[Migration] = [] + +# Highest schema version this build knows how to produce. Equals the base +# ``SCHEMA`` (1) plus the max migration version. A DB whose ``user_version`` +# exceeds this was written by a newer build — reads stay safe (additive-only +# history), so the runner WARNS rather than failing. +HIGHEST_KNOWN_VERSION = max((m.version for m in MIGRATIONS), default=1) + + def default_store_path(repo: Path, base_dir: Path | None = None) -> Path: root = repo.resolve() state = base_dir or root / ".weft" / "warpline" @@ -105,17 +150,148 @@ def _ensure_store_gitignore(store_dir: Path) -> None: gitignore.write_text(WARPLINE_GITIGNORE_CONTENTS, encoding="utf-8") +# Sentinel repo_id for store-level (repo-agnostic) health events written during +# open()/migration, where no repo Path is in scope. +_STORE_HEALTH_REPO_ID = "__store__" + + +def _meta_schema_version(conn: sqlite3.Connection) -> str | None: + row = conn.execute("SELECT value FROM meta WHERE key = 'schema_version'").fetchone() + return None if row is None else str(row["value"]) + + +def _store_health(conn: sqlite3.Connection, code: str, message: str) -> None: + """Record a store-level health event (open/migration path, no repo in scope).""" + + conn.execute( + "INSERT INTO health_log(repo_id, code, message) VALUES (?, ?, ?)", + (_STORE_HEALTH_REPO_ID, code, message), + ) + + +def _run_migrations(conn: sqlite3.Connection) -> None: + """Apply ordered forward-only migrations from ``user_version`` to HIGHEST_KNOWN. + + Atomicity (R3): each step runs inside its own explicit ``BEGIN IMMEDIATE`` / + ``COMMIT``; the step's ``apply`` callable uses ``conn.execute`` only (never + ``executescript``). ``PRAGMA user_version`` and the ``meta`` row are updated + in the SAME transaction. Concurrent ``open()`` calls block on the RESERVED + lock (busy_timeout), then re-read ``user_version`` and skip applied steps. + """ + + current = int(conn.execute("PRAGMA user_version").fetchone()[0]) + + # M9 / legacy reconcile: a DB created before the runner existed has + # user_version==0 but a meta.schema_version row. Adopt the meta value so we + # do not re-run already-applied schema. The expected legacy value is '1' + # (base SCHEMA); any other value is from a divergent/newer writer — warn and + # adopt it before running steps with version > that. + if current == 0: + meta_version = _meta_schema_version(conn) + if meta_version is None or meta_version == "1": + # Fresh DB or pre-runner legacy v1: the expected baseline. The base + # SCHEMA always inserts schema_version='1', so meta_version is None + # only on a corrupt/empty meta — treat as baseline 1 either way. + current = 1 + else: + try: + current = int(meta_version) + except (TypeError, ValueError): + logger.warning( + "warpline store: non-integer meta.schema_version %r with " + "user_version=0; adopting baseline version 1", + meta_version, + ) + _store_health( + conn, + "MIGRATION_META_UNPARSEABLE", + f"meta.schema_version={meta_version!r} not an int; adopted 1", + ) + current = 1 + else: + logger.warning( + "warpline store: user_version=0 but meta.schema_version=%s " + "(expected '1'); adopting %d before running later migrations", + meta_version, + current, + ) + _store_health( + conn, + "MIGRATION_META_RECONCILE", + f"user_version=0, meta.schema_version={meta_version}; adopted {current}", + ) + # Persist the reconciled version once so the next open() short-circuits. + conn.execute(f"PRAGMA user_version = {current}") + conn.commit() + + if current > HIGHEST_KNOWN_VERSION: + # Newer writer touched this DB. Reads are still safe (additive-only + # history); warn, record to health_log, and proceed without applying. + logger.warning( + "warpline store: on-disk schema version %d exceeds highest known %d; " + "this build is older than the writer — reads remain safe", + current, + HIGHEST_KNOWN_VERSION, + ) + _store_health( + conn, + "SCHEMA_VERSION_AHEAD", + f"on-disk version {current} > highest known {HIGHEST_KNOWN_VERSION}", + ) + conn.commit() + return + + for migration in MIGRATIONS: + if migration.version <= current: + continue + conn.execute("BEGIN IMMEDIATE") + try: + # Re-read under the RESERVED lock: a concurrent writer may have + # applied this (or a later) step while we blocked on busy_timeout. + locked_version = int(conn.execute("PRAGMA user_version").fetchone()[0]) + if migration.version <= locked_version: + conn.execute("COMMIT") + current = locked_version + continue + migration.apply(conn) + conn.execute(f"PRAGMA user_version = {migration.version}") + conn.execute( + "UPDATE meta SET value = ? WHERE key = 'schema_version'", + (str(migration.version),), + ) + conn.execute("COMMIT") + except BaseException: + conn.execute("ROLLBACK") + raise + current = migration.version + + class WarplineStore: def __init__(self, conn: sqlite3.Connection) -> None: self.conn = conn @classmethod def open(cls, path: Path) -> WarplineStore: + if sqlite3.sqlite_version_info < _MIN_SQLITE_VERSION: + have = ".".join(str(p) for p in sqlite3.sqlite_version_info) + need = ".".join(str(p) for p in _MIN_SQLITE_VERSION) + raise RuntimeError( + f"warpline requires SQLite >= {need} (RETURNING clause); " + f"this Python is bundled with SQLite {have}" + ) path.parent.mkdir(parents=True, exist_ok=True) _ensure_store_gitignore(path.parent) conn = sqlite3.connect(path) conn.row_factory = sqlite3.Row + # Connection hardening. journal_mode=WAL is also set by SCHEMA below; + # foreign_keys/busy_timeout/synchronous are per-connection pragmas. + conn.execute("PRAGMA foreign_keys = ON") + conn.execute("PRAGMA busy_timeout = 5000") + conn.execute("PRAGMA synchronous = NORMAL") + # Fresh-DB base tables (idempotent IF NOT EXISTS). The implicit-commit + # executescript is intentional here and outside the migration pattern. conn.executescript(SCHEMA) + _run_migrations(conn) return cls(conn) def __enter__(self) -> WarplineStore: diff --git a/tests/test_store_migrations.py b/tests/test_store_migrations.py new file mode 100644 index 0000000..18e3f3b --- /dev/null +++ b/tests/test_store_migrations.py @@ -0,0 +1,230 @@ +"""Rung 1a: ordered migration runner + PRAGMA hardening. + +The base SCHEMA is FROZEN after Rung 1a; all schema change lands via the ordered +``MIGRATIONS`` list. In Rung 1a that list is empty (highest known version == 1), +so the runner is exercised here against synthetic migrations monkeypatched onto +the module — proving ordering, atomicity, idempotence, and concurrency safety +without coupling these tests to a not-yet-shipped v2. +""" + +from __future__ import annotations + +import sqlite3 +import threading +from pathlib import Path + +import pytest + +from warpline import store as store_mod +from warpline.store import SCHEMA, Migration, WarplineStore + + +def _user_version(db: Path) -> int: + conn = sqlite3.connect(db) + try: + return int(conn.execute("PRAGMA user_version").fetchone()[0]) + finally: + conn.close() + + +def _health_codes(db: Path) -> list[str]: + conn = sqlite3.connect(db) + conn.row_factory = sqlite3.Row + try: + rows = conn.execute("SELECT code FROM health_log ORDER BY id").fetchall() + return [str(r["code"]) for r in rows] + finally: + conn.close() + + +def test_fresh_db_lands_at_highest_known_version(tmp_path: Path) -> None: + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + assert store.schema_version() == store_mod.HIGHEST_KNOWN_VERSION + # In Rung 1a the highest known version is 1; user_version is reconciled to it. + assert _user_version(db) == store_mod.HIGHEST_KNOWN_VERSION + assert store_mod.HIGHEST_KNOWN_VERSION == 1 + + +def test_connection_pragmas_are_hardened(tmp_path: Path) -> None: + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + assert int(store.conn.execute("PRAGMA foreign_keys").fetchone()[0]) == 1 + assert int(store.conn.execute("PRAGMA busy_timeout").fetchone()[0]) == 5000 + # journal_mode=WAL set via SCHEMA executescript (fresh-DB note). + assert str(store.conn.execute("PRAGMA journal_mode").fetchone()[0]).lower() == "wal" + + +def test_legacy_v1_db_reconciles_user_version_on_open(tmp_path: Path) -> None: + """A pre-runner DB (base tables + meta='1', user_version=0) reconciles to 1.""" + db = tmp_path / "warpline.db" + # Simulate a DB written before the runner existed: SCHEMA applied (so + # meta.schema_version='1') but user_version never set. + raw = sqlite3.connect(db) + raw.executescript(SCHEMA) + raw.commit() + raw.close() + assert _user_version(db) == 0 + + with WarplineStore.open(db) as store: + assert store.schema_version() == 1 + assert _user_version(db) == 1 + # No reconcile-warn rows for the expected legacy baseline. + assert "MIGRATION_META_RECONCILE" not in _health_codes(db) + + +def test_reopen_is_a_no_op(tmp_path: Path) -> None: + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + assert store.schema_version() == store_mod.HIGHEST_KNOWN_VERSION + first = _health_codes(db) + with WarplineStore.open(db) as store: + assert store.schema_version() == store_mod.HIGHEST_KNOWN_VERSION + # Re-open adds no health rows and does not change the version. + assert _health_codes(db) == first + assert _user_version(db) == store_mod.HIGHEST_KNOWN_VERSION + + +def test_user_version_ahead_of_known_warns_to_health_log_and_does_not_fail( + tmp_path: Path, +) -> None: + """A DB written by a newer build (user_version > highest known) reads safely.""" + db = tmp_path / "warpline.db" + with WarplineStore.open(db): + pass + # Forge a future on-disk version. + raw = sqlite3.connect(db) + raw.execute("PRAGMA user_version = 99") + raw.commit() + raw.close() + + with WarplineStore.open(db) as store: + # schema_version() reads meta (still 1); the runner did not fail. + assert store.schema_version() == 1 + # Reads remain available. + store.ensure_repo(tmp_path) + assert _user_version(db) == 99 # untouched + assert "SCHEMA_VERSION_AHEAD" in _health_codes(db) + + +def test_user_version_zero_with_divergent_meta_adopts_and_warns(tmp_path: Path) -> None: + """M9: user_version==0 but meta.schema_version!='1' → adopt meta value + warn.""" + db = tmp_path / "warpline.db" + raw = sqlite3.connect(db) + raw.executescript(SCHEMA) + raw.execute("UPDATE meta SET value = '5' WHERE key = 'schema_version'") + raw.execute("PRAGMA user_version = 0") + raw.commit() + raw.close() + + with WarplineStore.open(db) as store: + # Adopted 5 from meta; 5 > highest known (1), so it is also flagged ahead. + assert store.schema_version() == 5 + codes = _health_codes(db) + assert "MIGRATION_META_RECONCILE" in codes + assert "SCHEMA_VERSION_AHEAD" in codes + assert _user_version(db) == 5 + + +def test_migration_runner_applies_ordered_steps( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """The runner applies steps version>current in order, updating both markers.""" + applied: list[int] = [] + + def _v2(conn: sqlite3.Connection) -> None: + conn.execute("CREATE TABLE rung1a_probe_v2 (x INTEGER)") + applied.append(2) + + def _v3(conn: sqlite3.Connection) -> None: + conn.execute("ALTER TABLE rung1a_probe_v2 ADD COLUMN y INTEGER") + applied.append(3) + + migrations = [Migration(2, _v2), Migration(3, _v3)] + monkeypatch.setattr(store_mod, "MIGRATIONS", migrations) + monkeypatch.setattr(store_mod, "HIGHEST_KNOWN_VERSION", 3) + + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + assert store.schema_version() == 3 + cols = {r["name"] for r in store.conn.execute("PRAGMA table_info(rung1a_probe_v2)")} + assert cols == {"x", "y"} + assert applied == [2, 3] + assert _user_version(db) == 3 + + # Idempotent: re-open applies nothing further. + applied.clear() + with WarplineStore.open(db) as store: + assert store.schema_version() == 3 + assert applied == [] + + +def test_failed_migration_rolls_back_atomically( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """R3: a raising step rolls back; user_version and meta stay at the prior version.""" + + def _v2_boom(conn: sqlite3.Connection) -> None: + conn.execute("CREATE TABLE rung1a_partial (x INTEGER)") + raise RuntimeError("boom") + + monkeypatch.setattr(store_mod, "MIGRATIONS", [Migration(2, _v2_boom)]) + monkeypatch.setattr(store_mod, "HIGHEST_KNOWN_VERSION", 2) + + db = tmp_path / "warpline.db" + with pytest.raises(RuntimeError, match="boom"): + WarplineStore.open(db) + + # The partial table must NOT have been committed, and version stays at 1. + assert _user_version(db) == 1 + conn = sqlite3.connect(db) + try: + rows = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='rung1a_partial'" + ).fetchall() + assert rows == [] + finally: + conn.close() + + +def test_concurrent_open_does_not_double_apply( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Two threads opening at once both converge to the migrated version once.""" + apply_count = 0 + lock = threading.Lock() + + def _v2(conn: sqlite3.Connection) -> None: + nonlocal apply_count + with lock: + apply_count += 1 + conn.execute("CREATE TABLE IF NOT EXISTS rung1a_concurrent (x INTEGER)") + + db = tmp_path / "warpline.db" + # Materialize the base schema (user_version=1) WITHOUT the synthetic v2 in + # play, so both threads then race the v2 step from an identical baseline. + with WarplineStore.open(db) as store: + assert store.schema_version() == 1 + + monkeypatch.setattr(store_mod, "MIGRATIONS", [Migration(2, _v2)]) + monkeypatch.setattr(store_mod, "HIGHEST_KNOWN_VERSION", 2) + + errors: list[BaseException] = [] + + def _worker() -> None: + try: + with WarplineStore.open(db) as store: + assert store.schema_version() == 2 + except BaseException as exc: # noqa: BLE001 - surfaced via errors list + errors.append(exc) + + threads = [threading.Thread(target=_worker) for _ in range(2)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert errors == [] + assert _user_version(db) == 2 + # The migration body ran exactly once across both opens. + assert apply_count == 1 From dbb6a747bd5465432131625ed21d33d608540470 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 02:04:06 +1000 Subject: [PATCH 05/10] feat(store): working-context anchor columns + detected_context (Rung 1b, schema v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migration v2 adds detected_branch, detected_head_sha, detected_at, and detected_context (clean|working_tree_dirty|detached_head, M8/E4) to change_events — the working-context anchor for the DETECTION act, orthogonal to the SEI and never on entity_keys. git.py ingest_commit computes the anchor once per call and threads it onto every change_event it writes; backfill records all anchor columns NULL (B3: reconstruction is not detection). store.append_change_event takes optional anchor kwargs; list_change_events and timeline surface the new columns. Updates both test_store.py schema_version assertions to ==2 and reconciles the Rung 1a migration-runner tests to a non-empty MIGRATIONS list (highest known version 2). Adds tests/test_anchor_capture.py covering branch/dirty/detached detection, the B3 backfill all-NULL rule, the store read surface, the v1->v2 migration, and the M10 additive-column non-regression for change_list / entity_timeline. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/warpline/git.py | 69 +++++++++++- src/warpline/store.py | 55 ++++++++- tests/test_anchor_capture.py | 198 +++++++++++++++++++++++++++++++++ tests/test_store.py | 4 +- tests/test_store_migrations.py | 56 ++++++---- 5 files changed, 353 insertions(+), 29 deletions(-) create mode 100644 tests/test_anchor_capture.py diff --git a/src/warpline/git.py b/src/warpline/git.py index 01ce466..c178f96 100644 --- a/src/warpline/git.py +++ b/src/warpline/git.py @@ -2,8 +2,9 @@ import json import subprocess +from datetime import UTC, datetime from pathlib import Path -from typing import Any +from typing import Any, NamedTuple from warpline.locators import python_entity_locators from warpline.loomweave import ToolClient, resolve_sei_for_locator @@ -55,6 +56,61 @@ def _commit_meta(repo: Path, sha: str) -> dict[str, str]: } +class _Anchor(NamedTuple): + """Working-context anchor for one detection call (Rung 1b). + + ``branch``/``head_sha`` are git's own values (no minted identifier), and + ``detected_at`` is a clock reading; warpline owns only the contract of + recording them. ``context`` is the honest E4/M8 signal: + ``clean`` / ``working_tree_dirty`` / ``detached_head``. + """ + + branch: str | None + head_sha: str | None + detected_at: str + context: str + + +def _git_optional(repo: Path, args: list[str]) -> str | None: + """Run a git command that is allowed to exit non-zero (returns None then).""" + + result = subprocess.run( + ["git", *args], cwd=repo, check=False, text=True, capture_output=True + ) + if result.returncode != 0: + return None + return result.stdout.strip() + + +def _detect_anchor(repo: Path) -> _Anchor: + """Compute the working-context anchor once, at detection time. + + Detached HEAD → ``branch=None`` + ``context='detached_head'``. A dirty work + tree → ``context='working_tree_dirty'`` (honest E4 signal; the recorded + ``head_sha`` is the committed HEAD, which is stable, but the working tree + that produced the detection is not — so the context flags it rather than + emitting a false-precise clean anchor). Otherwise ``context='clean'``. + """ + + detected_at = datetime.now(UTC).isoformat() + head_sha = _git_optional(repo, ["rev-parse", "HEAD"]) + branch = _git_optional(repo, ["symbolic-ref", "--short", "-q", "HEAD"]) + # ``--untracked-files=no``: a dirty signal means UNCOMMITTED TRACKED changes, + # the real "the working tree that produced this detection is unstable" risk + # (E4). Untracked files (notably warpline's own ``.weft/warpline/`` runtime + # tree) are not part of what was detected and must not flip the signal. + dirty = bool(_git_optional(repo, ["status", "--porcelain", "--untracked-files=no"])) + if branch is None: + context = "detached_head" + elif dirty: + context = "working_tree_dirty" + else: + context = "clean" + return _Anchor( + branch=branch, head_sha=head_sha, detected_at=detected_at, context=context + ) + + def _name_status(repo: Path, sha: str) -> list[tuple[str, str]]: raw = _git(repo, ["diff-tree", "--root", "--no-commit-id", "--name-status", "-r", sha]) rows: list[tuple[str, str]] = [] @@ -112,6 +168,10 @@ def backfill( repo_id = store.ensure_repo(repo) count = 0 sei_stats = {"resolved": 0, "absent": 0} + # B3: backfill is RECONSTRUCTION, not DETECTION — it cannot know the working + # context that introduced a historical commit. It therefore passes NO anchor + # kwargs, so all four v2 anchor columns stay NULL (reads as ``unavailable``, + # not a false clean/detected signal). for sha in _commits(repo, since=since): meta = _commit_meta(repo, sha) store.upsert_commit(repo_id, meta) @@ -143,6 +203,9 @@ def ingest_commit( resolved = _git(repo, ["rev-parse", sha]).strip() meta = _commit_meta(repo, resolved) store.upsert_commit(repo_id, meta) + # Working-context anchor (Rung 1b): the detection act, computed ONCE per + # ingest call and threaded onto every change_event it writes. + anchor = _detect_anchor(repo) changed = 0 sei_stats = {"resolved": 0, "absent": 0} for status, path in _name_status(repo, resolved): @@ -163,6 +226,10 @@ def ingest_commit( change_kind=_change_kind(status), actor=meta["author"], changed_at=meta["authored_at"], + detected_branch=anchor.branch, + detected_head_sha=anchor.head_sha, + detected_at=anchor.detected_at, + detected_context=anchor.context, ) changed += 1 return {"commit": resolved, "changes": changed, "sei": sei_stats} diff --git a/src/warpline/store.py b/src/warpline/store.py index fe10bf9..8f65530 100644 --- a/src/warpline/store.py +++ b/src/warpline/store.py @@ -125,11 +125,39 @@ class Migration(NamedTuple): apply: Callable[[sqlite3.Connection], None] +def _migrate_v2_anchor_columns(conn: sqlite3.Connection) -> None: + """v2 (Rung 1b): working-context anchor columns on ``change_events``. + + The anchor identifies the **detection act** (a change episode, verb-moment), + orthogonal to the SEI (entity identity, noun) — so it lives on + ``change_events``, never on ``entity_keys``. All columns are NULLable with no + default (O(1) metadata-only ALTERs): a backfilled or pre-v2 row reads NULL, + which the honesty invariant surfaces as ``unavailable`` working-context + rather than a clean-looking default. + + - ``detected_branch`` — git symbolic-ref short name; NULL if detached. + - ``detected_head_sha`` — HEAD sha AT DETECTION (working context; distinct + from ``commit_sha`` = the introducing commit). + - ``detected_at`` — ISO-8601 UTC detection timestamp (distinct from + ``changed_at`` = author time). + - ``detected_context`` — honest E4/M8 signal carrier, one of + ``clean`` / ``working_tree_dirty`` / ``detached_head`` (NULL on + backfilled/pre-v2 rows). Subsumes the detached-HEAD case so a NULL + ``detected_head_sha`` is never overloaded to mean "detached". + """ + + conn.execute("ALTER TABLE change_events ADD COLUMN detected_branch TEXT") + conn.execute("ALTER TABLE change_events ADD COLUMN detected_head_sha TEXT") + conn.execute("ALTER TABLE change_events ADD COLUMN detected_at TEXT") + conn.execute("ALTER TABLE change_events ADD COLUMN detected_context TEXT") + + # Ordered, forward-only migrations. Each step's ``version`` is strictly greater # than the previous. v2 (anchor columns) lands in Rung 1b; v3 (co_change_pairs) -# in Rung 2 Track A. In Rung 1a this list is empty: the runner is established -# but the highest known version is 1 (the base ``SCHEMA``). -MIGRATIONS: list[Migration] = [] +# in Rung 2 Track A. +MIGRATIONS: list[Migration] = [ + Migration(version=2, apply=_migrate_v2_anchor_columns), +] # Highest schema version this build knows how to produce. Equals the base # ``SCHEMA`` (1) plus the max migration version. A DB whose ``user_version`` @@ -405,13 +433,22 @@ def append_change_event( actor: str, changed_at: str, hunk_summary: str = "", + detected_branch: str | None = None, + detected_head_sha: str | None = None, + detected_at: str | None = None, + detected_context: str | None = None, ) -> None: + # Working-context anchor (v2) is optional: unsupplied → NULL, which is + # backward compatible and reads as ``unavailable`` (never a clean + # default). Columns are named explicitly so the additive v2 columns + # cannot shift positionally (M10). self.conn.execute( """ INSERT OR IGNORE INTO change_events( repo_id, entity_key_id, commit_sha, path, change_kind, - actor, changed_at, hunk_summary - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + actor, changed_at, hunk_summary, + detected_branch, detected_head_sha, detected_at, detected_context + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repo_id, @@ -422,6 +459,10 @@ def append_change_event( actor, changed_at, hunk_summary, + detected_branch, + detected_head_sha, + detected_at, + detected_context, ), ) self.conn.commit() @@ -442,6 +483,8 @@ def list_change_events( f""" SELECT ce.id AS change_event_id, ce.commit_sha, ce.path, ce.change_kind, ce.actor, ce.changed_at, + ce.detected_branch, ce.detected_head_sha, ce.detected_at, + ce.detected_context, ek.id AS entity_key_id, ek.locator, ek.sei FROM change_events ce JOIN entity_keys ek ON ek.id = ce.entity_key_id @@ -459,6 +502,8 @@ def timeline(self, repo: Path, entity: str) -> list[dict[str, object]]: """ SELECT ce.id AS change_event_id, ce.commit_sha, ce.path, ce.change_kind, ce.actor, ce.changed_at, + ce.detected_branch, ce.detected_head_sha, ce.detected_at, + ce.detected_context, ek.id AS entity_key_id, ek.locator, ek.sei FROM change_events ce JOIN entity_keys ek ON ek.id = ce.entity_key_id diff --git a/tests/test_anchor_capture.py b/tests/test_anchor_capture.py new file mode 100644 index 0000000..01e33dd --- /dev/null +++ b/tests/test_anchor_capture.py @@ -0,0 +1,198 @@ +"""Rung 1b: working-context anchor columns (schema v2). + +The anchor (``detected_branch`` / ``detected_head_sha`` / ``detected_at`` / +``detected_context``) records the DETECTION act on ``change_events`` — orthogonal +to the SEI (entity identity). These tests lock the four behaviours the plan +names: branch detection on ingest, the honest ``detected_context`` signal +(clean / working_tree_dirty / detached_head, M8/E4), the B3 backfill all-NULL +rule (reconstruction is not detection), the store read surface, the v1→v2 +migration, and the M10 additive-column non-regression for the read commands. + +These require the Rung 1a migration runner (the v2 anchor columns arrive through +``MIGRATIONS``, not the frozen base SCHEMA). +""" + +from __future__ import annotations + +import sqlite3 +from pathlib import Path + +from conftest import commit as _commit +from conftest import git as _git +from conftest import init_repo as _init_repo + +from warpline import commands +from warpline.git import backfill, ingest_commit +from warpline.store import SCHEMA, WarplineStore, default_store_path + + +def _events(store: WarplineStore, repo: Path) -> list[dict[str, object]]: + return store.list_change_events(repo) + + +def test_ingest_on_branch_records_branch_head_and_context(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + sha = _commit(repo, "a.py", "x = 1\n") + branch = _git(repo, "rev-parse", "--abbrev-ref", "HEAD") + + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, sha) + rows = _events(store, repo) + + assert rows, "ingest should record at least one change event" + for row in rows: + assert row["detected_branch"] == branch + assert row["detected_head_sha"] == sha + assert row["detected_at"] is not None + # A freshly-committed clean tree. + assert row["detected_context"] == "clean" + + +def test_ingest_with_dirty_work_tree_records_working_tree_dirty(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + sha = _commit(repo, "a.py", "x = 1\n") + # Leave an uncommitted change to a TRACKED file at detection time (E4): the + # working tree that produced this detection is unstable. + (repo / "a.py").write_text("x = 999\n", encoding="utf-8") + + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, sha) + rows = _events(store, repo) + + assert rows + for row in rows: + assert row["detected_context"] == "working_tree_dirty" + # head_sha is the stable committed HEAD; the dirty signal lives in context, + # never in a false-precise / NULL head_sha. + assert row["detected_head_sha"] == sha + assert row["detected_branch"] is not None + + +def test_ingest_on_detached_head_records_null_branch_and_detached_context( + tmp_path: Path, +) -> None: + repo = _init_repo(tmp_path) + _commit(repo, "a.py", "x = 1\n") + sha = _commit(repo, "a.py", "x = 2\n") + _git(repo, "checkout", "--detach", sha) + + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, sha) + rows = _events(store, repo) + + assert rows + for row in rows: + assert row["detected_branch"] is None + assert row["detected_context"] == "detached_head" + assert row["detected_head_sha"] == sha + assert row["detected_at"] is not None + + +def test_backfill_leaves_all_anchor_columns_null(tmp_path: Path) -> None: + """B3: backfill is reconstruction, not detection — ALL anchor columns NULL.""" + repo = _init_repo(tmp_path) + _commit(repo, "a.py", "x = 1\n") + _commit(repo, "a.py", "x = 2\n") + + with WarplineStore.open(default_store_path(repo)) as store: + backfill(store, repo) + rows = _events(store, repo) + + assert rows, "backfill should record change events" + for row in rows: + assert row["detected_branch"] is None + assert row["detected_head_sha"] is None + assert row["detected_at"] is None + assert row["detected_context"] is None + + +def test_store_read_surfaces_anchor_fields(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + sha = _commit(repo, "a.py", "x = 1\n") + + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, sha) + list_rows = store.list_change_events(repo) + timeline_rows = store.timeline(repo, "file:a.py") + + anchor_keys = { + "detected_branch", + "detected_head_sha", + "detected_at", + "detected_context", + } + assert list_rows and anchor_keys <= set(list_rows[0]) + assert timeline_rows and anchor_keys <= set(timeline_rows[0]) + + +def test_v1_db_opened_by_v2_client_migrates_and_old_rows_read_null( + tmp_path: Path, +) -> None: + """A DB materialized at the frozen base SCHEMA (no anchor columns) upgrades on + open; rows written before the upgrade read NULL anchors (honest unavailable).""" + db = default_store_path(tmp_path / "repo") + db.parent.mkdir(parents=True, exist_ok=True) + + # Simulate a pre-v2 DB: base SCHEMA only, no anchor columns, with one + # hand-inserted change_event under the v1 column set. + raw = sqlite3.connect(db) + raw.row_factory = sqlite3.Row + raw.executescript(SCHEMA) + raw.execute( + "INSERT INTO repos(id, root) VALUES ('r', '/x')", + ) + raw.execute( + "INSERT INTO entity_keys(repo_id, locator, sei) VALUES ('r', 'file:legacy.py', NULL)" + ) + key_id = int( + raw.execute("SELECT id FROM entity_keys WHERE locator='file:legacy.py'").fetchone()[ + "id" + ] + ) + raw.execute( + """ + INSERT INTO change_events( + repo_id, entity_key_id, commit_sha, path, change_kind, actor, changed_at + ) VALUES ('r', ?, 'deadbeef', 'legacy.py', 'modified', 'a@b', '2020-01-01T00:00:00') + """, + (key_id,), + ) + raw.commit() + cols_before = {r["name"] for r in raw.execute("PRAGMA table_info(change_events)")} + raw.close() + assert "detected_context" not in cols_before + + with WarplineStore.open(db) as store: + assert store.schema_version() == 2 + row = store.conn.execute( + "SELECT detected_branch, detected_head_sha, detected_at, detected_context " + "FROM change_events WHERE commit_sha='deadbeef'" + ).fetchone() + assert row["detected_branch"] is None + assert row["detected_head_sha"] is None + assert row["detected_at"] is None + assert row["detected_context"] is None + + +def test_change_list_and_timeline_non_regression_on_migrated_db(tmp_path: Path) -> None: + """M10: change_list / entity_timeline return valid output with the new columns + present on a v1-then-migrated DB (additive columns do not break the reads).""" + repo = _init_repo(tmp_path) + sha = _commit(repo, "a.py", "x = 1\n") + + # Materialize the base SCHEMA first (pre-v2), then let the command's + # WarplineStore.open() run the v2 migration on top. + db = default_store_path(repo) + db.parent.mkdir(parents=True, exist_ok=True) + raw = sqlite3.connect(db) + raw.executescript(SCHEMA) + raw.commit() + raw.close() + + with WarplineStore.open(db) as store: + ingest_commit(store, repo, sha) + + change_out = commands.change_list(repo) + assert change_out["data"]["items"] + timeline_out = commands.entity_timeline(repo, entity="file:a.py") + assert timeline_out["data"]["items"] diff --git a/tests/test_store.py b/tests/test_store.py index 743ef30..1fba02e 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -23,7 +23,7 @@ def test_default_store_path_honors_explicit_store_dir(tmp_path: Path) -> None: def test_store_initializes_schema(tmp_path: Path) -> None: db = tmp_path / "warpline.db" with WarplineStore.open(db) as store: - assert store.schema_version() == 1 + assert store.schema_version() == 2 def test_store_writes_nested_gitignore_that_ignores_runtime_db(tmp_path: Path) -> None: @@ -32,7 +32,7 @@ def test_store_writes_nested_gitignore_that_ignores_runtime_db(tmp_path: Path) - subprocess.run(["git", "init"], cwd=repo, check=True, text=True, capture_output=True) with WarplineStore.open(default_store_path(repo)) as store: - assert store.schema_version() == 1 + assert store.schema_version() == 2 gitignore = repo / ".weft" / "warpline" / ".gitignore" assert gitignore.exists() diff --git a/tests/test_store_migrations.py b/tests/test_store_migrations.py index 18e3f3b..f3bee8f 100644 --- a/tests/test_store_migrations.py +++ b/tests/test_store_migrations.py @@ -1,10 +1,11 @@ """Rung 1a: ordered migration runner + PRAGMA hardening. The base SCHEMA is FROZEN after Rung 1a; all schema change lands via the ordered -``MIGRATIONS`` list. In Rung 1a that list is empty (highest known version == 1), -so the runner is exercised here against synthetic migrations monkeypatched onto -the module — proving ordering, atomicity, idempotence, and concurrency safety -without coupling these tests to a not-yet-shipped v2. +``MIGRATIONS`` list. As of Rung 1b the real list carries v2 (anchor columns), so +the highest known version is 2. The runner mechanics (ordering, atomicity, +idempotence, concurrency safety) are still exercised against synthetic +migrations monkeypatched onto the module so they stay decoupled from any single +shipped version. """ from __future__ import annotations @@ -41,9 +42,9 @@ def test_fresh_db_lands_at_highest_known_version(tmp_path: Path) -> None: db = tmp_path / "warpline.db" with WarplineStore.open(db) as store: assert store.schema_version() == store_mod.HIGHEST_KNOWN_VERSION - # In Rung 1a the highest known version is 1; user_version is reconciled to it. + # As of Rung 1b the highest known version is 2 (anchor columns). assert _user_version(db) == store_mod.HIGHEST_KNOWN_VERSION - assert store_mod.HIGHEST_KNOWN_VERSION == 1 + assert store_mod.HIGHEST_KNOWN_VERSION == 2 def test_connection_pragmas_are_hardened(tmp_path: Path) -> None: @@ -55,8 +56,9 @@ def test_connection_pragmas_are_hardened(tmp_path: Path) -> None: assert str(store.conn.execute("PRAGMA journal_mode").fetchone()[0]).lower() == "wal" -def test_legacy_v1_db_reconciles_user_version_on_open(tmp_path: Path) -> None: - """A pre-runner DB (base tables + meta='1', user_version=0) reconciles to 1.""" +def test_legacy_v1_db_reconciles_then_upgrades_on_open(tmp_path: Path) -> None: + """A pre-runner DB (base tables + meta='1', user_version=0) reconciles to 1 + then the real v2 anchor migration upgrades it to the highest known version.""" db = tmp_path / "warpline.db" # Simulate a DB written before the runner existed: SCHEMA applied (so # meta.schema_version='1') but user_version never set. @@ -67,8 +69,16 @@ def test_legacy_v1_db_reconciles_user_version_on_open(tmp_path: Path) -> None: assert _user_version(db) == 0 with WarplineStore.open(db) as store: - assert store.schema_version() == 1 - assert _user_version(db) == 1 + assert store.schema_version() == store_mod.HIGHEST_KNOWN_VERSION + # The v2 anchor columns are present after the upgrade. + cols = {r["name"] for r in store.conn.execute("PRAGMA table_info(change_events)")} + assert { + "detected_branch", + "detected_head_sha", + "detected_at", + "detected_context", + } <= cols + assert _user_version(db) == store_mod.HIGHEST_KNOWN_VERSION # No reconcile-warn rows for the expected legacy baseline. assert "MIGRATION_META_RECONCILE" not in _health_codes(db) @@ -99,8 +109,8 @@ def test_user_version_ahead_of_known_warns_to_health_log_and_does_not_fail( raw.close() with WarplineStore.open(db) as store: - # schema_version() reads meta (still 1); the runner did not fail. - assert store.schema_version() == 1 + # schema_version() reads meta (still the highest known); runner did not fail. + assert store.schema_version() == store_mod.HIGHEST_KNOWN_VERSION # Reads remain available. store.ensure_repo(tmp_path) assert _user_version(db) == 99 # untouched @@ -118,7 +128,7 @@ def test_user_version_zero_with_divergent_meta_adopts_and_warns(tmp_path: Path) raw.close() with WarplineStore.open(db) as store: - # Adopted 5 from meta; 5 > highest known (1), so it is also flagged ahead. + # Adopted 5 from meta; 5 > highest known (2), so it is also flagged ahead. assert store.schema_version() == 5 codes = _health_codes(db) assert "MIGRATION_META_RECONCILE" in codes @@ -194,27 +204,31 @@ def test_concurrent_open_does_not_double_apply( apply_count = 0 lock = threading.Lock() - def _v2(conn: sqlite3.Connection) -> None: + def _v3(conn: sqlite3.Connection) -> None: nonlocal apply_count with lock: apply_count += 1 conn.execute("CREATE TABLE IF NOT EXISTS rung1a_concurrent (x INTEGER)") db = tmp_path / "warpline.db" - # Materialize the base schema (user_version=1) WITHOUT the synthetic v2 in - # play, so both threads then race the v2 step from an identical baseline. + # Materialize the real base schema (lands at the highest known version) WITHOUT + # the synthetic step in play, so both threads then race a single step above + # that baseline from an identical starting version. with WarplineStore.open(db) as store: - assert store.schema_version() == 1 + assert store.schema_version() == store_mod.HIGHEST_KNOWN_VERSION - monkeypatch.setattr(store_mod, "MIGRATIONS", [Migration(2, _v2)]) - monkeypatch.setattr(store_mod, "HIGHEST_KNOWN_VERSION", 2) + synthetic_version = store_mod.HIGHEST_KNOWN_VERSION + 1 + monkeypatch.setattr( + store_mod, "MIGRATIONS", [Migration(synthetic_version, _v3)] + ) + monkeypatch.setattr(store_mod, "HIGHEST_KNOWN_VERSION", synthetic_version) errors: list[BaseException] = [] def _worker() -> None: try: with WarplineStore.open(db) as store: - assert store.schema_version() == 2 + assert store.schema_version() == synthetic_version except BaseException as exc: # noqa: BLE001 - surfaced via errors list errors.append(exc) @@ -225,6 +239,6 @@ def _worker() -> None: t.join() assert errors == [] - assert _user_version(db) == 2 + assert _user_version(db) == synthetic_version # The migration body ran exactly once across both opens. assert apply_count == 1 From 0493e5ee2d478c49b7790e3ef2344a022f38847b Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 02:07:52 +1000 Subject: [PATCH 06/10] feat(sei): self-healing SEI re-resolution sweep (Rung 1c) Add the idempotent UPDATE-or-merge core that heals entity keys minted while loomweave was unavailable (sei IS NULL). store.null_sei_entity_keys pages the worklist; store.reresolve_entity_key_sei repoints in place, or on a resolved-sei twin collision repoints change_events, DELETEs null-keyed duplicate events (resolved-keyed row canonical, M5/R11), deletes the orphan null key, and carries min(first)/max(last) seen onto the survivor. reresolve.sweep_reresolve_sei orchestrates per-locator resolution and reports the loomweave posture explicitly (present|absent|unavailable); client=None is a pure no-op that never marks a key resolved-to-null. The reresolve-sei CLI verb drives it and is NON-FROZEN/internal (not one of the six frozen v1 tools). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/warpline/cli.py | 37 +++++- src/warpline/reresolve.py | 95 +++++++++++++++ src/warpline/store.py | 147 ++++++++++++++++++++++ tests/test_reresolve.py | 249 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 src/warpline/reresolve.py create mode 100644 tests/test_reresolve.py diff --git a/src/warpline/cli.py b/src/warpline/cli.py index 77a81c0..12f30ba 100644 --- a/src/warpline/cli.py +++ b/src/warpline/cli.py @@ -11,6 +11,7 @@ from warpline.loomweave import LoomweaveMcpClient, LoomweaveProbe, ToolClient from warpline.mcp_smoke import run_mcp_smoke from warpline.productization import read_productization_decision +from warpline.reresolve import sweep_reresolve_sei from warpline.store import WarplineStore, default_store_path # install/doctor component flags -> component keys @@ -103,6 +104,21 @@ def build_parser() -> argparse.ArgumentParser: ) ingest.add_argument("--loomweave-command", default="loomweave") + # NON-FROZEN/internal verb (Rung 1c). Not one of the six frozen v1 MCP + # tools; the self-healing SEI re-resolution sweep, exposed for the hook and + # for `doctor --fix`. + reresolve_parser = sub.add_parser( + "reresolve-sei", + help="Re-resolve null-sei entity keys via loomweave (self-healing sweep).", + ) + reresolve_parser.add_argument("--repo", type=Path, default=Path(".")) + reresolve_parser.add_argument("--limit", type=int, default=200) + reresolve_parser.add_argument( + "--resolve-sei", action=argparse.BooleanOptionalAction, default=True + ) + reresolve_parser.add_argument("--loomweave-command", default="loomweave") + reresolve_parser.add_argument("--json", action="store_true") + loomweave_probe = sub.add_parser("loomweave-probe") loomweave_probe.add_argument("--repo", type=Path, default=Path(".")) loomweave_probe.add_argument("--command", dest="loomweave_command", default="loomweave") @@ -243,7 +259,26 @@ def main(argv: list[str] | None = None) -> int: with WarplineStore.open(default_store_path(args.repo)) as store: store.log_health(args.repo, "HOOK_INGEST_FAILED", str(exc)) return 0 - if args.command == "loomweave-probe": + if args.command == "reresolve-sei": + try: + sei_client, sei_resolution = _optional_sei_client( + args.repo, + enabled=args.resolve_sei, + command=args.loomweave_command, + ) + with WarplineStore.open(default_store_path(args.repo)) as store: + report = sweep_reresolve_sei( + store, args.repo, sei_client, limit=args.limit + ) + except Exception as exc: # fail-soft: hook + doctor contract + with WarplineStore.open(default_store_path(args.repo)) as store: + store.log_health(args.repo, "RERESOLVE_FAILED", str(exc)) + report = {"error": str(exc), "loomweave": "unavailable"} + else: + if sei_resolution is not None: + report["sei_resolution"] = sei_resolution + print(json.dumps(report, sort_keys=True) if args.json else report) + return 0 payload = LoomweaveProbe(repo=args.repo, command=args.loomweave_command).probe() print(json.dumps(payload, sort_keys=True) if args.json else json.dumps(payload, indent=2)) return 0 diff --git a/src/warpline/reresolve.py b/src/warpline/reresolve.py new file mode 100644 index 0000000..5660b83 --- /dev/null +++ b/src/warpline/reresolve.py @@ -0,0 +1,95 @@ +"""Self-healing SEI re-resolution sweep (Rung 1c). + +An entity key minted while loomweave was unavailable keeps ``sei IS NULL`` +forever: the ``entity_keys`` UNIQUE index keys on ``COALESCE(sei, '')``, so a +null-sei row and a resolved-sei row for the same locator are distinct +identities and the null row never heals on its own. This sweep is the idempotent +repair: it pages the null-sei worklist, asks loomweave to resolve each locator, +and applies the store's UPDATE-or-merge core (never re-minting a SEI). + +Doctrine: +- SEI-orthogonality — the SEI is loomweave's minted identifier, reused verbatim + via ``resolve_sei_for_locator``; this module never invents or parses one. +- Honesty invariant — the report names the loomweave posture explicitly + (``present`` / ``absent`` / ``unavailable``). When no client is available the + sweep is a pure no-op and reports ``unavailable``; it NEVER marks a key + resolved-to-null. + +``sweep_reresolve_sei`` is internal machinery. The ``reresolve-sei`` CLI verb +that drives it is NON-FROZEN/internal — it is not one of the six frozen v1 MCP +tools. +""" + +from __future__ import annotations + +from pathlib import Path + +from warpline.loomweave import ToolClient, resolve_sei_for_locator +from warpline.store import WarplineStore + + +def sweep_reresolve_sei( + store: WarplineStore, + repo: Path, + client: ToolClient | None, + limit: int = 200, +) -> dict[str, object]: + """Re-resolve null-sei entity keys for ``repo``, healing in place. + + Returns ``{scanned, resolved, merged, still_null, loomweave}`` where + ``loomweave`` is the closed-vocab posture: + + - ``unavailable`` — no client (loomweave absent); a pure no-op, zero rows + mutated, ``resolved``/``merged`` are 0 and ``still_null == scanned``. + - ``present`` — a client was available and resolved at least one locator. + - ``absent`` — a client was available but resolved no locators (the index + has no SEI for any scanned locator yet). + """ + + repo_id = store.ensure_repo(repo) + null_keys = store.null_sei_entity_keys(repo, limit=limit) + scanned = len(null_keys) + + if client is None: + # Honest no-op: never mark a key resolved-to-null. Every scanned key + # remains unresolved and the posture is explicitly ``unavailable``. + return { + "scanned": scanned, + "resolved": 0, + "merged": 0, + "still_null": scanned, + "loomweave": "unavailable", + } + + resolved = 0 + merged = 0 + still_null = 0 + for key in null_keys: + locator = str(key["locator"]) + key_id = int(str(key["id"])) + sei = resolve_sei_for_locator(client, locator) + if sei is None: + still_null += 1 + continue + outcome = store.reresolve_entity_key_sei( + repo_id=repo_id, + null_key_id=key_id, + locator=locator, + resolved_sei=sei, + ) + action = outcome["action"] + if action == "resolved": + resolved += 1 + elif action == "merged": + merged += 1 + else: # "noop" — already healed on a prior pass + still_null += 1 + + posture = "present" if (resolved or merged) else "absent" + return { + "scanned": scanned, + "resolved": resolved, + "merged": merged, + "still_null": still_null, + "loomweave": posture, + } diff --git a/src/warpline/store.py b/src/warpline/store.py index 8f65530..c306730 100644 --- a/src/warpline/store.py +++ b/src/warpline/store.py @@ -409,6 +409,153 @@ def ensure_entity_key( self.conn.commit() return int(row["id"]) + def null_sei_entity_keys(self, repo: Path, limit: int = 200) -> list[dict[str, object]]: + """Entity keys whose SEI is still NULL, bounded and id-ordered. + + Rung 1c self-healing sweep input. A row minted while loomweave was down + keeps ``sei IS NULL`` forever (the UNIQUE index treats a null-sei row and + a resolved-sei row for the same locator as distinct identities), so this + is the worklist the re-resolution sweep pages through. Ordered by ``id`` + for deterministic, resumable paging. + """ + + repo_id = self._repo_id(repo) + rows = self.conn.execute( + """ + SELECT id, locator FROM entity_keys + WHERE repo_id = ? AND sei IS NULL + ORDER BY id + LIMIT ? + """, + (repo_id, int(limit)), + ).fetchall() + return [dict(row) for row in rows] + + def reresolve_entity_key_sei( + self, + repo_id: str, + null_key_id: int, + locator: str, + resolved_sei: str, + ) -> dict[str, str]: + """Idempotent UPDATE-or-merge of a null-sei entity key to a resolved SEI. + + Never re-mints (R11). Returns ``{"action": ...}`` where action is: + + - ``resolved`` — the null row was repointed in place (no twin existed). + - ``merged`` — a resolved-sei twin for the same locator already + existed; its row is the survivor. ``change_events`` were repointed from + the null key to the twin, any rows colliding on the ``change_events`` + UNIQUE constraint had their **null-keyed duplicate DELETED** (M5: the + resolved-keyed row is canonical), the orphan null ``entity_keys`` row + was deleted, and ``min(first_seen_commit)`` / ``max(last_seen_commit)`` + were carried onto the survivor. + - ``noop`` — the row no longer matches ``sei IS NULL`` (already healed + on a prior pass); convergent re-run. + + All steps run inside one ``BEGIN IMMEDIATE`` transaction. + """ + + self.conn.execute("BEGIN IMMEDIATE") + try: + row = self.conn.execute( + "SELECT id, first_seen_commit, last_seen_commit FROM entity_keys " + "WHERE id = ? AND repo_id = ? AND sei IS NULL", + (null_key_id, repo_id), + ).fetchone() + if row is None: + # Already healed (or never null) — convergent no-op. + self.conn.execute("COMMIT") + return {"action": "noop"} + try: + self.conn.execute( + "UPDATE entity_keys SET sei = ? WHERE id = ? AND sei IS NULL", + (resolved_sei, null_key_id), + ) + except sqlite3.IntegrityError: + # A resolved-sei twin for this (repo, locator) already exists; it + # is the survivor. Repoint change_events, drop colliding null-keyed + # duplicates, delete the orphan null key, carry first/last seen. + action = self._merge_into_twin( + repo_id=repo_id, + null_key_id=null_key_id, + locator=locator, + resolved_sei=resolved_sei, + null_first_seen=row["first_seen_commit"], + null_last_seen=row["last_seen_commit"], + ) + self.conn.execute("COMMIT") + return {"action": action} + self.conn.execute("COMMIT") + return {"action": "resolved"} + except BaseException: + self.conn.execute("ROLLBACK") + raise + + def _merge_into_twin( + self, + *, + repo_id: str, + null_key_id: int, + locator: str, + resolved_sei: str, + null_first_seen: str | None, + null_last_seen: str | None, + ) -> str: + """Merge a null-sei key into its resolved-sei twin (caller holds the txn).""" + + twin = self.conn.execute( + "SELECT id, first_seen_commit, last_seen_commit FROM entity_keys " + "WHERE repo_id = ? AND locator = ? AND sei = ?", + (repo_id, locator, resolved_sei), + ).fetchone() + if twin is None: # pragma: no cover - IntegrityError implies a twin exists + raise RuntimeError( + f"reresolve: IntegrityError but no resolved twin for {locator!r}" + ) + twin_id = int(twin["id"]) + + # Repoint change_events one row at a time so a UNIQUE collision on the + # twin (same commit/path/change_kind already recorded under the resolved + # key) deletes the null-keyed duplicate rather than aborting the repoint. + null_events = self.conn.execute( + "SELECT id FROM change_events WHERE entity_key_id = ?", + (null_key_id,), + ).fetchall() + for event in null_events: + try: + self.conn.execute( + "UPDATE change_events SET entity_key_id = ? WHERE id = ?", + (twin_id, event["id"]), + ) + except sqlite3.IntegrityError: + # The resolved-keyed row is canonical (M5): drop the null-keyed + # duplicate. Any divergent data on it (hunk_summary, actor) is + # deliberately discarded — explicit, documented data loss (Q7). + self.conn.execute( + "DELETE FROM change_events WHERE id = ?", (event["id"],) + ) + + # Carry first/last seen onto the survivor: min(first), max(last) across + # both rows. Commit SHAs are not chronologically orderable, so this is a + # deterministic string min/max that never drops a non-null value. + twin_first = twin["first_seen_commit"] + twin_last = twin["last_seen_commit"] + firsts = [ + str(v) for v in (twin_first, null_first_seen) if v is not None + ] + lasts = [str(v) for v in (twin_last, null_last_seen) if v is not None] + merged_first = min(firsts) if firsts else None + merged_last = max(lasts) if lasts else None + self.conn.execute( + "UPDATE entity_keys SET first_seen_commit = ?, last_seen_commit = ? WHERE id = ?", + (merged_first, merged_last, twin_id), + ) + + # Delete the now-orphaned null key (its events were repointed/merged). + self.conn.execute("DELETE FROM entity_keys WHERE id = ?", (null_key_id,)) + return "merged" + def list_entity_keys(self, repo: Path) -> list[dict[str, object]]: repo_id = self._repo_id(repo) rows = self.conn.execute( diff --git a/tests/test_reresolve.py b/tests/test_reresolve.py new file mode 100644 index 0000000..ebf5e8c --- /dev/null +++ b/tests/test_reresolve.py @@ -0,0 +1,249 @@ +"""Rung 1c — self-healing SEI re-resolution sweep. + +Covers the store merge core (``reresolve_entity_key_sei`` / ``null_sei_entity_keys``) +and the orchestration sweep (``reresolve.sweep_reresolve_sei``): + +- a null-sei key heals to ``resolved`` when loomweave returns a SEI; +- the twin-collision merge (with and without a colliding change_event), where + the resolved-keyed row is canonical (M5) and the orphan null key is dropped, + the survivor keeps the resolved SEI and the merged first/last seen; +- the M5 differing-``hunk_summary`` case — the resolved row's data is preserved; +- a double run is a convergent no-op; +- loomweave absent → zero rows mutated, posture ``unavailable``, never + resolved-to-null. +""" + +from __future__ import annotations + +from pathlib import Path + +from warpline.reresolve import sweep_reresolve_sei +from warpline.store import WarplineStore + + +class _SeiClient: + """Fake loomweave client resolving every locator to a fixed SEI.""" + + def __init__(self, sei: str = "loomweave:eid:resolved") -> None: + self.sei = sei + self.calls = 0 + + def call_tool(self, name: str, arguments: dict[str, object]) -> dict[str, object]: + assert name == "entity_resolve" + self.calls += 1 + qualnames = arguments["qualnames"] + assert isinstance(qualnames, list) and qualnames + return { + "results": [ + { + "qualname": qualnames[0], + "result_kind": "resolved", + "candidates": [{"id": "python:function:x", "sei": self.sei}], + } + ] + } + + +class _NullClient: + """Fake loomweave client that resolves nothing (no SEI in the index yet).""" + + def call_tool(self, name: str, arguments: dict[str, object]) -> dict[str, object]: + return {"results": []} + + +_LOCATOR = "python:function:src/pkg/mod.py::fn" +_LOCATOR_B = "python:function:src/pkg/mod.py::other" + + +def _open(tmp_path: Path) -> WarplineStore: + return WarplineStore.open(tmp_path / "warpline.db") + + +def _null_key(store: WarplineStore, repo_id: str, locator: str, commit: str) -> int: + return store.ensure_entity_key(repo_id, locator=locator, sei=None, commit_sha=commit) + + +def test_null_sei_entity_keys_lists_only_null_rows_id_ordered(tmp_path: Path) -> None: + repo = tmp_path / "repo" + with _open(tmp_path) as store: + repo_id = store.ensure_repo(repo) + _null_key(store, repo_id, _LOCATOR, "c1") + store.ensure_entity_key(repo_id, locator=_LOCATOR_B, sei="loomweave:eid:x", commit_sha="c1") + _null_key(store, repo_id, "python:function:src/pkg/mod.py::third", "c1") + + rows = store.null_sei_entity_keys(repo) + locators = [r["locator"] for r in rows] + assert locators == [_LOCATOR, "python:function:src/pkg/mod.py::third"] + assert rows[0]["id"] < rows[1]["id"] + + +def test_sweep_resolves_null_key_in_place(tmp_path: Path) -> None: + repo = tmp_path / "repo" + client = _SeiClient() + with _open(tmp_path) as store: + repo_id = store.ensure_repo(repo) + key_id = _null_key(store, repo_id, _LOCATOR, "c1") + + report = sweep_reresolve_sei(store, repo, client) + assert report == { + "scanned": 1, + "resolved": 1, + "merged": 0, + "still_null": 0, + "loomweave": "present", + } + keys = {int(k["id"]): k for k in store.list_entity_keys(repo)} + assert keys[key_id]["sei"] == "loomweave:eid:resolved" + + +def test_sweep_loomweave_absent_is_noop_and_unavailable(tmp_path: Path) -> None: + repo = tmp_path / "repo" + with _open(tmp_path) as store: + repo_id = store.ensure_repo(repo) + key_id = _null_key(store, repo_id, _LOCATOR, "c1") + + report = sweep_reresolve_sei(store, repo, client=None) + assert report == { + "scanned": 1, + "resolved": 0, + "merged": 0, + "still_null": 1, + "loomweave": "unavailable", + } + # Never resolved-to-null: the row is untouched, sei still NULL. + keys = {int(k["id"]): k for k in store.list_entity_keys(repo)} + assert keys[key_id]["sei"] is None + + +def test_sweep_resolves_nothing_when_index_has_no_sei(tmp_path: Path) -> None: + repo = tmp_path / "repo" + with _open(tmp_path) as store: + repo_id = store.ensure_repo(repo) + _null_key(store, repo_id, _LOCATOR, "c1") + + report = sweep_reresolve_sei(store, repo, _NullClient()) + assert report["loomweave"] == "absent" + assert report["resolved"] == 0 + assert report["still_null"] == 1 + + +def test_twin_collision_merges_without_duplicate_event(tmp_path: Path) -> None: + repo = tmp_path / "repo" + resolved_sei = "loomweave:eid:resolved" + with _open(tmp_path) as store: + repo_id = store.ensure_repo(repo) + # Resolved twin exists first (commit c2), then a null-keyed row for the + # same locator (commit c1) — minted while loomweave was down. + twin_id = store.ensure_entity_key( + repo_id, locator=_LOCATOR, sei=resolved_sei, commit_sha="c2" + ) + null_id = _null_key(store, repo_id, _LOCATOR, "c1") + # A change_event on the null key that does NOT collide with the twin. + store.append_change_event( + repo_id=repo_id, + entity_key_id=null_id, + commit_sha="c1", + path="src/pkg/mod.py", + change_kind="modified", + actor="agent", + changed_at="2026-01-01T00:00:00Z", + ) + + report = sweep_reresolve_sei(store, repo, _SeiClient(resolved_sei)) + assert report["merged"] == 1 + assert report["resolved"] == 0 + + keys = {int(k["id"]): k for k in store.list_entity_keys(repo)} + # Orphan null key gone; twin survives with the resolved SEI. + assert null_id not in keys + assert keys[twin_id]["sei"] == resolved_sei + # Carried first/last seen: min(first)=c1, max(last)=c2. + assert keys[twin_id]["first_seen_commit"] == "c1" + assert keys[twin_id]["last_seen_commit"] == "c2" + # The non-colliding event was repointed onto the survivor. + events = store.list_change_events(repo) + assert len(events) == 1 + assert int(events[0]["entity_key_id"]) == twin_id + + +def test_twin_collision_drops_null_keyed_duplicate_preserving_resolved_data( + tmp_path: Path, +) -> None: + """M5/Q7: colliding change_events keep the resolved-keyed row's data.""" + + repo = tmp_path / "repo" + resolved_sei = "loomweave:eid:resolved" + common = dict( + commit_sha="c1", + path="src/pkg/mod.py", + change_kind="modified", + actor="agent", + changed_at="2026-01-01T00:00:00Z", + ) + with _open(tmp_path) as store: + repo_id = store.ensure_repo(repo) + twin_id = store.ensure_entity_key( + repo_id, locator=_LOCATOR, sei=resolved_sei, commit_sha="c1" + ) + null_id = _null_key(store, repo_id, _LOCATOR, "c1") + # Two events that collide on the change_events UNIQUE constraint + # (same commit/path/change_kind), differing only on hunk_summary. + store.append_change_event( + repo_id=repo_id, entity_key_id=twin_id, hunk_summary="RESOLVED-DATA", **common + ) + store.append_change_event( + repo_id=repo_id, entity_key_id=null_id, hunk_summary="NULL-DATA", **common + ) + + report = sweep_reresolve_sei(store, repo, _SeiClient(resolved_sei)) + assert report["merged"] == 1 + + events = store.list_change_events(repo) + assert len(events) == 1, "null-keyed duplicate must be deleted" + assert int(events[0]["entity_key_id"]) == twin_id + # The resolved-keyed row's data survives; the null row's was discarded. + keys = {int(k["id"]): k for k in store.list_entity_keys(repo)} + assert null_id not in keys + # Re-read the surviving event's hunk_summary directly. + row = store.conn.execute( + "SELECT hunk_summary FROM change_events WHERE entity_key_id = ?", + (twin_id,), + ).fetchone() + assert row["hunk_summary"] == "RESOLVED-DATA" + + +def test_double_run_is_a_noop(tmp_path: Path) -> None: + repo = tmp_path / "repo" + client = _SeiClient() + with _open(tmp_path) as store: + repo_id = store.ensure_repo(repo) + _null_key(store, repo_id, _LOCATOR, "c1") + + first = sweep_reresolve_sei(store, repo, client) + assert first["resolved"] == 1 + + second = sweep_reresolve_sei(store, repo, client) + assert second == { + "scanned": 0, + "resolved": 0, + "merged": 0, + "still_null": 0, + "loomweave": "absent", + } + + +def test_merge_core_action_noop_when_already_healed(tmp_path: Path) -> None: + repo = tmp_path / "repo" + with _open(tmp_path) as store: + repo_id = store.ensure_repo(repo) + key_id = store.ensure_entity_key( + repo_id, locator=_LOCATOR, sei="loomweave:eid:resolved", commit_sha="c1" + ) + # Calling the merge core on an already-resolved key matches no null row. + outcome = store.reresolve_entity_key_sei( + repo_id=repo_id, + null_key_id=key_id, + locator=_LOCATOR, + resolved_sei="loomweave:eid:resolved", + ) + assert outcome == {"action": "noop"} From f0368f5bd3c603587030abdccb9ac73b09ef2a29 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 02:13:38 +1000 Subject: [PATCH 07/10] feat(snapshot): always-on lazy edge-snapshot capture + hook/doctor (Rung 1d) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/warpline/commands.py | 73 +++++++++++ src/warpline/install.py | 2 + src/warpline/install_support.py | 21 ++- tests/install/test_install_doctor.py | 44 +++++++ tests/test_hooks.py | 13 ++ tests/test_lazy_capture.py | 186 +++++++++++++++++++++++++++ 6 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 tests/test_lazy_capture.py diff --git a/src/warpline/commands.py b/src/warpline/commands.py index ecc8f94..cdafd5f 100644 --- a/src/warpline/commands.py +++ b/src/warpline/commands.py @@ -383,6 +383,71 @@ def entity_churn_count( ) +# --------------------------------------------------------------------------- +# Rung 1d — always-on lazy edge-snapshot capture (M6, Option A). +# +# Today the post-commit hook only ingests, so a freshly-installed repo has no +# edge snapshot and ``blast_radius`` honestly returns NO_SNAPSHOT. The lazy +# capture below restores the correctness floor on read: when the store has no +# usable snapshot AND loomweave is reachable, attempt one scoped capture, then +# re-read. It is ALWAYS-ON internally whenever loomweave is available — there is +# deliberately no ``auto_capture`` inputSchema field on the frozen tools (M6); +# the opt-out toggle is interface-pending only. +# +# Doctrine: ``blast_radius`` stays PURE (R7) — no ``on_missing_snapshot`` +# parameter, no per-entity subprocess in the traversal path. The capture lives +# entirely in the tool bodies. It is fail-soft: any loomweave failure falls +# through to the unchanged NO_SNAPSHOT path (honesty invariant — absence never +# reads as a clean/empty graph, never raises, never gates). +# +# Latency note: the first read against an uncaptured repo pays the +# ``LoomweaveProbe`` first-call cost (~1-5s of `loomweave serve` spin-up plus +# the scoped capture). This is bounded to once per NO_SNAPSHOT store — the next +# read sees a snapshot and skips the probe entirely. +# --------------------------------------------------------------------------- +def _lazy_capture_if_missing( + store: WarplineStore, + repo: Path, + key_ids: list[int], + loomweave_command: str | None, +) -> None: + """Best-effort scoped snapshot when none exists and loomweave is reachable. + + Always-on internally; fail-soft. Never raises, never gates: on any failure + the caller falls through to the unchanged NO_SNAPSHOT path. + """ + + try: + existing = store.latest_snapshot(repo) + if existing is not None and existing.get("completeness") != "SKIPPED": + return # a usable snapshot already exists — nothing to do. + # loomweave_command is server/project config (env), NOT public agent + # input — mirrors capture_snapshot. It is deliberately absent from the + # frozen tools' inputSchema (M6). + command = loomweave_command or os.environ.get("WARPLINE_LOOMWEAVE_COMMAND", "loomweave") + probe = LoomweaveProbe(repo=repo, command=command).probe() + if probe.get("status") != "available": + return # loomweave absent/unavailable — honest fall-through. + client = LoomweaveMcpClient(repo=repo, command=command) + source_version = str(probe.get("version") or "unknown") + # Scope the capture to the changed seed's locators when known; an empty + # scope means "no resolved seed", so capture the full graph (FULL) so a + # cold repo still gets a usable snapshot on the first read. + rows = store.entity_keys_by_ids(repo, key_ids) if key_ids else {} + scope_locators = { + str(row["locator"]) for row in rows.values() if isinstance(row.get("locator"), str) + } + capture_edge_snapshot( + store, + repo, + client=client, + source_version=source_version, + scope_locators=scope_locators or None, + ) + except Exception: # noqa: BLE001 — capture is advisory; never block the read. + return + + # --------------------------------------------------------------------------- # warpline_impact_radius_get — warpline.impact_radius.v1 # --------------------------------------------------------------------------- @@ -398,6 +463,7 @@ def impact_radius( sort_order: str | None = None, cursor: Any = None, limit: int = 100, + loomweave_command: str | None = None, ) -> dict[str, Any]: refs = parse_changed_refs(changed_refs) with WarplineStore.open(default_store_path(repo)) as store: @@ -408,6 +474,9 @@ def impact_radius( changed_refs=refs, changed_entity_key_ids=changed_entity_key_ids or [], ) + # Always-on lazy capture (Rung 1d): restore the correctness floor when no + # snapshot exists and loomweave is reachable; fail-soft otherwise. + _lazy_capture_if_missing(store, repo, key_ids, loomweave_command) result = compute_blast_radius(store, repo, key_ids, depth) changed, affected = enrich_blast(store, repo, result) completeness = result["completeness"] @@ -477,6 +546,7 @@ def reverify_worklist( include_federation: bool = False, risk_client: RiskClient | None = None, legis_client: LegisClient | None = None, + loomweave_command: str | None = None, ) -> dict[str, Any]: refs = parse_changed_refs(changed_refs) with WarplineStore.open(default_store_path(repo)) as store: @@ -487,6 +557,9 @@ def reverify_worklist( changed_refs=refs, changed_entity_key_ids=changed_entity_key_ids or [], ) + # Always-on lazy capture (Rung 1d): restore the correctness floor when no + # snapshot exists and loomweave is reachable; fail-soft otherwise. + _lazy_capture_if_missing(store, repo, key_ids, loomweave_command) result = compute_blast_radius(store, repo, key_ids, depth) changed, affected = enrich_blast(store, repo, result) completeness = result["completeness"] diff --git a/src/warpline/install.py b/src/warpline/install.py index a61b659..1083440 100644 --- a/src/warpline/install.py +++ b/src/warpline/install.py @@ -8,6 +8,8 @@ def hook_body(executable: str) -> str: # BEGIN WARPLINE MANAGED BLOCK # Managed by Warpline. Fail-soft by design: Warpline must never block commits. {executable} ingest-commit HEAD >/dev/null 2>&1 || true +{executable} reresolve-sei --limit 25 >/dev/null 2>&1 || true +{executable} capture-snapshot --commit HEAD >/dev/null 2>&1 || true # END WARPLINE MANAGED BLOCK exit 0 """ diff --git a/src/warpline/install_support.py b/src/warpline/install_support.py index b898384..febe00a 100644 --- a/src/warpline/install_support.py +++ b/src/warpline/install_support.py @@ -305,13 +305,30 @@ def apply_gitignore(repo: Path) -> str: # --- git post-commit hook -------------------------------------------------------- +# Sentinel proving an installed hook carries the current managed body. Rung 1d +# added the reresolve-sei + capture-snapshot lines (install.py:hook_body); an +# older installed hook lacks them, so `doctor` flags it stale and `--fix` +# reinstalls (R5 — editing hook_body alone never rewrites installed hooks). +HOOK_CURRENCY_SENTINEL = "reresolve-sei" + + def check_git_hook(repo: Path) -> CheckResult: hook = repo.resolve() / ".git" / "hooks" / "post-commit" if not (repo.resolve() / ".git").exists(): return _result("git post-commit hook", False, "not a git repository", fixable=False) - if not hook.exists() or "BEGIN WARPLINE MANAGED BLOCK" not in hook.read_text(encoding="utf-8"): + if not hook.exists(): + return _result("git post-commit hook", False, "warpline ingest hook not installed") + body = hook.read_text(encoding="utf-8") + if "BEGIN WARPLINE MANAGED BLOCK" not in body: return _result("git post-commit hook", False, "warpline ingest hook not installed") - return _result("git post-commit hook", True, "ingest hook installed") + if HOOK_CURRENCY_SENTINEL not in body: + return _result( + "git post-commit hook", + False, + "post-commit hook out of date (missing reresolve/capture lines); run " + "`warpline install --hooks` or `warpline doctor --fix`", + ) + return _result("git post-commit hook", True, "ingest hook installed (current)") def apply_git_hook(repo: Path) -> str: diff --git a/tests/install/test_install_doctor.py b/tests/install/test_install_doctor.py index cb83c3f..e1e6f0a 100644 --- a/tests/install/test_install_doctor.py +++ b/tests/install/test_install_doctor.py @@ -88,6 +88,50 @@ def test_doctor_reports_missing_then_fix_repairs(tmp_path: Path) -> None: assert (repo / ".claude" / "skills" / "warpline-workflow" / "SKILL.md").exists() +def test_doctor_flags_stale_hook_then_fix_reinstalls(tmp_path: Path) -> None: + """Rung 1d: an installed-but-old hook (no reresolve/capture lines) is flagged + stale by doctor, and `--fix` regenerates it (R5 — editing hook_body alone + never rewrites already-installed hooks).""" + + repo = _git_repo(tmp_path) + install_support.run_install(repo) + hook = repo / ".git" / "hooks" / "post-commit" + + # Simulate a pre-Rung-1d managed hook: the ingest line, but no currency lines. + hook.write_text( + "#!/bin/sh\n" + "# BEGIN WARPLINE MANAGED BLOCK\n" + "warpline ingest-commit HEAD >/dev/null 2>&1 || true\n" + "# END WARPLINE MANAGED BLOCK\nexit 0\n", + encoding="utf-8", + ) + + pre = install_support.run_doctor(repo) + assert not pre.ok + stale = next(r for r in pre.results if r.name == "git post-commit hook") + assert stale.ok is False + assert "out of date" in stale.detail + + fixed = install_support.run_doctor(repo, fix=True) + assert fixed.ok + assert any(name == "git post-commit hook" for name, _ in fixed.fixed) + body = hook.read_text(encoding="utf-8") + assert "reresolve-sei" in body + assert "capture-snapshot" in body + + +def test_doctor_passes_for_current_hook(tmp_path: Path) -> None: + """A freshly installed hook carries the currency sentinel and doctor is green + on the hook check.""" + + repo = _git_repo(tmp_path) + install_support.run_install(repo) + report = install_support.run_doctor(repo) + hook = next(r for r in report.results if r.name == "git post-commit hook") + assert hook.ok is True + assert "reresolve-sei" in (repo / ".git" / "hooks" / "post-commit").read_text() + + def test_doctor_flags_non_git_repo_as_unfixable(tmp_path: Path) -> None: repo = tmp_path / "plain" repo.mkdir() diff --git a/tests/test_hooks.py b/tests/test_hooks.py index c9462c6..dc2833e 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -13,6 +13,19 @@ def test_hook_body_exits_zero_and_invokes_ingest() -> None: assert "exit 0" in body +def test_hook_body_carries_reresolve_and_capture_lines() -> None: + """Rung 1d: the managed block runs a bounded reresolve sweep + a HEAD capture + after ingest, both fail-soft (`|| true`).""" + + body = hook_body("/usr/bin/warpline") + assert "/usr/bin/warpline reresolve-sei --limit 25 >/dev/null 2>&1 || true" in body + assert "/usr/bin/warpline capture-snapshot --commit HEAD >/dev/null 2>&1 || true" in body + # Ordering: ingest first, then heal SEIs, then capture (so capture sees them). + assert body.index("ingest-commit") < body.index("reresolve-sei") < body.index( + "capture-snapshot" + ) + + def test_install_hook_writes_post_commit(tmp_path: Path) -> None: repo = tmp_path / "repo" hooks = repo / ".git" / "hooks" diff --git a/tests/test_lazy_capture.py b/tests/test_lazy_capture.py new file mode 100644 index 0000000..83bd84a --- /dev/null +++ b/tests/test_lazy_capture.py @@ -0,0 +1,186 @@ +"""Rung 1d — always-on lazy edge-snapshot capture (M6, Option A). + +The post-commit hook only ingests, so a cold repo has no edge snapshot and +``blast_radius`` honestly returns NO_SNAPSHOT. The lazy capture restores the +correctness floor on the *read* path (``impact_radius`` / ``reverify_worklist``) +whenever loomweave is reachable — always-on internally, with NO ``auto_capture`` +inputSchema field on the frozen tools. + +Doctrine asserted here: +* ``blast_radius`` stays PURE (R7) — its signature is unchanged. +* lazy capture is fail-soft: no loomweave -> unchanged NO_SNAPSHOT, never raises. +""" + +from __future__ import annotations + +import inspect +from pathlib import Path + +import pytest +from conftest import commit, init_repo + +from warpline import commands +from warpline.propagation import blast_radius +from warpline.store import WarplineStore, default_store_path + + +class _FakeNeighborhoodClient: + """Stands in for ``LoomweaveMcpClient``: a -> b call edge.""" + + def __init__(self, *_args: object, **_kwargs: object) -> None: + self.calls: list[str] = [] + + def neighborhood(self, entity: str) -> dict[str, object]: + self.calls.append(entity) + if entity == "python:function:a": + return { + "entity": {"id": "python:function:a"}, + "callees": [{"id": "python:function:b"}], + "truncated": {"callers": False, "callees": False}, + } + return { + "entity": {"id": entity}, + "truncated": {"callers": False, "callees": False}, + } + + +def _seed_two_entities(repo: Path) -> tuple[int, int]: + """Two warpline-local entity_keys (a, b) and NO snapshot. Returns (a_id, b_id).""" + + with WarplineStore.open(default_store_path(repo)) as store: + repo_id = store.ensure_repo(repo) + a = store.ensure_entity_key( + repo_id, locator="python:function:a", sei=None, commit_sha="c1" + ) + b = store.ensure_entity_key( + repo_id, locator="python:function:b", sei=None, commit_sha="c1" + ) + return a, b + + +def _force_loomweave_available(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + commands.LoomweaveProbe, + "probe", + lambda self: {"status": "available", "version": "fake-1"}, + ) + monkeypatch.setattr(commands, "LoomweaveMcpClient", _FakeNeighborhoodClient) + + +def test_impact_radius_lazily_captures_when_loomweave_available( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo = init_repo(tmp_path) + commit(repo, "f.py", "x = 1\n") # a real HEAD for capture's git rev-parse + a, _b = _seed_two_entities(repo) + _force_loomweave_available(monkeypatch) + + # Precondition: no snapshot, so a *pure* blast_radius is NO_SNAPSHOT. + with WarplineStore.open(default_store_path(repo)) as store: + cold = blast_radius(store, repo, [a], depth=2) + assert cold["completeness"] == "NO_SNAPSHOT" + assert cold["affected"] == [] + + # impact_radius triggers the lazy capture, then traverses the populated graph. + payload = commands.impact_radius(repo, [a], depth=2) + assert payload["data"]["completeness"] == "FULL" + affected_locators = { + item["entity"]["locator"] for item in payload["data"]["affected"] + } + assert "python:function:b" in affected_locators + + # The snapshot now persists: a second read needs no capture (probe is skipped). + with WarplineStore.open(default_store_path(repo)) as store: + snap = store.latest_snapshot(repo) + assert snap is not None + assert snap["completeness"] == "FULL" + + +def test_reverify_worklist_lazily_captures_when_loomweave_available( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo = init_repo(tmp_path) + commit(repo, "f.py", "x = 1\n") + a, _b = _seed_two_entities(repo) + _force_loomweave_available(monkeypatch) + + payload = commands.reverify_worklist(repo, [a], depth=2) + assert payload["data"]["completeness"] == "FULL" + with WarplineStore.open(default_store_path(repo)) as store: + assert store.latest_snapshot(repo) is not None + + +def test_no_loomweave_falls_through_to_no_snapshot_unchanged( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo = init_repo(tmp_path) + commit(repo, "f.py", "x = 1\n") + a, _b = _seed_two_entities(repo) + # loomweave unreachable -> honest fall-through, no error, no gate. + monkeypatch.setattr( + commands.LoomweaveProbe, + "probe", + lambda self: {"status": "skipped", "reason": "command_unavailable"}, + ) + + payload = commands.impact_radius(repo, [a], depth=2) + assert payload["data"]["completeness"] == "NO_SNAPSHOT" + assert payload["data"]["affected"] == [] + # No snapshot was written (loomweave never consulted for a capture). + with WarplineStore.open(default_store_path(repo)) as store: + assert store.latest_snapshot(repo) is None + + +def test_lazy_capture_is_fail_soft_when_probe_raises( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo = init_repo(tmp_path) + commit(repo, "f.py", "x = 1\n") + a, _b = _seed_two_entities(repo) + + def _boom(self: object) -> dict[str, object]: + raise RuntimeError("loomweave exploded") + + monkeypatch.setattr(commands.LoomweaveProbe, "probe", _boom) + + # The read must still succeed honestly — never propagate the capture failure. + payload = commands.impact_radius(repo, [a], depth=2) + assert payload["data"]["completeness"] == "NO_SNAPSHOT" + + +def test_lazy_capture_skips_when_snapshot_already_present( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo = init_repo(tmp_path) + commit(repo, "f.py", "x = 1\n") + a, b = _seed_two_entities(repo) + # An existing FULL snapshot must short-circuit the probe entirely. + with WarplineStore.open(default_store_path(repo)) as store: + repo_id = store.ensure_repo(repo) + sid = store.create_edge_snapshot(repo_id, "c1", "loomweave", "v0", "FULL") + store.append_snapshot_edge( + sid, + source_entity_key_id=a, + target_entity_key_id=b, + edge_kind="calls", + confidence="resolved", + ) + + probe_calls: list[int] = [] + + def _track(self: object) -> dict[str, object]: + probe_calls.append(1) + return {"status": "available", "version": "fake"} + + monkeypatch.setattr(commands.LoomweaveProbe, "probe", _track) + + commands.impact_radius(repo, [a], depth=2) + assert probe_calls == [] # snapshot present -> capture machinery never engaged + + +def test_blast_radius_signature_stays_pure() -> None: + """R7: no ``on_missing_snapshot`` / lazy-capture parameter leaked into the + pure traversal.""" + + params = set(inspect.signature(blast_radius).parameters) + assert params == {"store", "repo", "changed_entity_key_ids", "depth"} From 6831af7e15352a13ce6a00fad19bec2ec861365c Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 02:16:13 +1000 Subject: [PATCH 08/10] feat(reverify): light up risk/governance enrichment (Rung 2 Track C) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/warpline/commands.py | 74 ++++++++++++++ tests/test_enrichment_merge.py | 174 +++++++++++++++++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 tests/test_enrichment_merge.py diff --git a/src/warpline/commands.py b/src/warpline/commands.py index cdafd5f..6b5fd03 100644 --- a/src/warpline/commands.py +++ b/src/warpline/commands.py @@ -593,6 +593,17 @@ def reverify_worklist( if include_federation else None ) + # Track C — light up the inert per-item risk/governance enrichment. + # The federation consult resolves per-entity risk/governance facts but + # leaves item.enrichment.{risk,governance} at the empty scaffold from + # reverify._empty_enrichment(). Merge the federation facts back onto the + # matching items HERE — immediately after consult_federation and BEFORE + # apply_overflow/apply_page (M3) — so the merge runs over the FULL + # filtered+sorted list and a page-2 item is enriched just like a page-1 + # one. Additive and reversible (D2): this is the proven-need demonstration + # that earns freezing the wardline/legis inbound shape, not a pre-promised + # contract; it does not lock the RESERVED-SHAPE inbound. + risk_state, gov_state = _merge_federation_enrichment(items, federation) items, overflow_warnings, overflow = apply_overflow( items, repo=repo, tool="warpline_reverify_worklist_get", schema=SCHEMA_REVERIFY_WORKLIST ) @@ -635,6 +646,8 @@ def reverify_worklist( enrichment=enrichment_state( edges=edges_enrichment(completeness, staleness), work=work_state, + risk=risk_state, + governance=gov_state, ), next_actions={"filigree": filigree_candidates}, warnings=( @@ -666,6 +679,67 @@ def _federation_warnings(federation: dict[str, Any] | None) -> list[str]: return warnings +def _member_scalar(federation: dict[str, Any] | None, member: str) -> str: + """R6 scalar rule for a single federation member, mirroring ``work_state``. + + ``federation is None`` means the caller never asked (``include_federation`` + was False); a member whose ``weft_reason.reason_class`` is anything other than + ``clean`` (i.e. ``disabled``/``unreachable``) was asked but could not answer. + Both are honestly ``unavailable`` — never ``absent`` (which would falsely read + as "asked, peer present, found nothing"). A ``clean`` member that returned at + least one entity's facts is ``present``; a ``clean`` member with no facts is the + earned-empty ``absent``. + """ + + if federation is None: + return "unavailable" + block = federation.get("members", {}).get(member, {}) + klass = block.get("weft_reason", {}).get("reason_class") + if klass != "clean": + return "unavailable" + return "present" if int(block.get("entity_count", 0) or 0) > 0 else "absent" + + +def _merge_federation_enrichment( + items: list[dict[str, Any]], federation: dict[str, Any] | None +) -> tuple[str, str]: + """Merge per-entity ``risk``/``governance`` federation facts onto each item's + ``enrichment`` block and return the ``(risk_state, governance_state)`` scalars. + + Track C: ``consult_federation`` resolves the facts but leaves + ``item.enrichment.{risk,governance}`` at the empty scaffold. Copy each + federation entity's ``risk``/``governance`` lists onto the matching item + (keyed on locator). Called over the FULL filtered+sorted worklist before + paging, so a page-2 item is enriched identically to a page-1 one (M3). + + Returns the two envelope-level scalars per the R6 rule (see + :func:`_member_scalar`). Additive/advisory only; never gates (D2). + """ + + risk_state = _member_scalar(federation, "wardline") + gov_state = _member_scalar(federation, "legis") + if federation is None: + return risk_state, gov_state + fed_by_locator: dict[str, dict[str, Any]] = {} + for fed_entity in federation.get("entities", []): + locator = fed_entity.get("locator") + if isinstance(locator, str) and locator: + fed_by_locator[locator] = fed_entity + for item in items: + locator = item.get("entity", {}).get("locator") + if not isinstance(locator, str) or not locator: + continue + fed_entity = fed_by_locator.get(locator) + if fed_entity is None: + continue + enrichment = item.get("enrichment") + if not isinstance(enrichment, dict): + continue + enrichment["risk"] = fed_entity.get("risk", []) + enrichment["governance"] = fed_entity.get("governance", []) + return risk_state, gov_state + + # --------------------------------------------------------------------------- # warpline_edge_snapshot_capture — warpline.edge_snapshot.v1 (only mutating tool) # --------------------------------------------------------------------------- diff --git a/tests/test_enrichment_merge.py b/tests/test_enrichment_merge.py new file mode 100644 index 0000000..fb74d5e --- /dev/null +++ b/tests/test_enrichment_merge.py @@ -0,0 +1,174 @@ +"""Track C — light up the inert risk/governance enrichment in reverify. + +The federation consult resolves per-entity risk/governance facts, but the +worklist items carry only the empty ``_empty_enrichment()`` scaffold. Track C +merges the federation facts back onto each item's ``enrichment`` block and lifts +the envelope-level ``risk``/``governance`` scalars off the perpetual default. + +These tests pin: + + 1. **R6 scalar rule** — ``include_federation=False`` (federation never asked) + or a member that is disabled/unreachable -> ``unavailable``; a reachable + member with no findings -> ``absent``; a reachable member with findings -> + ``present``. ``unavailable`` is NEVER conflated with ``absent``. + 2. **per-item merge** — a reachable wardline finding lands on the matching + ``item.enrichment.risk`` (no longer the empty scaffold). + 3. **M3 insertion point** — the merge runs over the FULL filtered+sorted list + BEFORE paging, so a page-2 item is enriched exactly like a page-1 one. + +Additive/advisory only (D2): this is the proven-need demonstration, not a +pre-promised contract; it never gates. +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path +from typing import Any + +from warpline import commands +from warpline.store import WarplineStore, default_store_path + + +def _git(repo: Path, *args: str) -> str: + return subprocess.run( + ["git", *args], cwd=repo, check=True, text=True, stdout=subprocess.PIPE + ).stdout.strip() + + +def _seed_repo_with_entities( + tmp_path: Path, locators: list[str], *, sei_prefix: str = "loomweave:eid:" +) -> tuple[Path, list[int]]: + repo = tmp_path / "repo" + repo.mkdir() + _git(repo, "init") + _git(repo, "config", "user.email", "agent@example.test") + _git(repo, "config", "user.name", "Agent") + (repo / "a.py").write_text("a = 1\n", encoding="utf-8") + _git(repo, "add", "a.py") + _git(repo, "commit", "-m", "init") + head = _git(repo, "rev-parse", "HEAD") + keys: list[int] = [] + with WarplineStore.open(default_store_path(repo)) as store: + repo_id = store.ensure_repo(repo) + for i, locator in enumerate(locators): + keys.append( + store.ensure_entity_key( + repo_id, locator=locator, sei=f"{sei_prefix}{i}", commit_sha=head + ) + ) + return repo, keys + + +class _FindingsRisk: + """A reachable wardline RiskClient returning a finding for every locator.""" + + def findings_for_locator(self, locator: str) -> list[dict[str, Any]]: + return [{"fingerprint": f"f-{locator}", "rule": "taint", "severity": "ERROR"}] + + +class _EmptyRisk: + """A reachable wardline RiskClient that finds nothing (earned-empty).""" + + def findings_for_locator(self, locator: str) -> list[dict[str, Any]]: + return [] + + +class _BoomRisk: + """A wardline RiskClient whose transport raises mid-consult (unreachable).""" + + def findings_for_locator(self, locator: str) -> list[dict[str, Any]]: + raise RuntimeError("wardline dossier exploded") + + +def test_findings_populate_item_risk_and_envelope_present(tmp_path: Path) -> None: + repo, keys = _seed_repo_with_entities(tmp_path, ["python:function:a.py::a"]) + env = commands.reverify_worklist( + repo, keys, depth=2, include_federation=True, risk_client=_FindingsRisk() + ) + # envelope-level scalar: wardline reachable + findings -> present. + assert env["enrichment"]["risk"] == "present" + # legis has no transport -> unavailable (never absent). + assert env["enrichment"]["governance"] == "unavailable" + # the per-item enrichment.risk is no longer the empty scaffold. + item = next( + i for i in env["data"]["items"] + if i["entity"].get("locator") == "python:function:a.py::a" + ) + assert item["enrichment"]["risk"] + assert item["enrichment"]["risk"][0]["fingerprint"] == "f-python:function:a.py::a" + + +def test_include_federation_false_is_unavailable_not_absent(tmp_path: Path) -> None: + repo, keys = _seed_repo_with_entities(tmp_path, ["python:function:a.py::a"]) + env = commands.reverify_worklist(repo, keys, depth=2) # include_federation=False + # never asked -> unavailable, NOT the false-clean "absent". + assert env["enrichment"]["risk"] == "unavailable" + assert env["enrichment"]["governance"] == "unavailable" + # and the per-item enrichment stays the empty scaffold. + item = env["data"]["items"][0] + assert item["enrichment"]["risk"] == [] + assert item["enrichment"]["governance"] == [] + + +def test_reachable_but_empty_is_absent(tmp_path: Path) -> None: + repo, keys = _seed_repo_with_entities(tmp_path, ["python:function:a.py::a"]) + env = commands.reverify_worklist( + repo, keys, depth=2, include_federation=True, risk_client=_EmptyRisk() + ) + # reachable wardline, no findings -> earned-empty absent. + assert env["enrichment"]["risk"] == "absent" + + +def test_unreachable_member_is_unavailable_not_absent(tmp_path: Path) -> None: + repo, keys = _seed_repo_with_entities(tmp_path, ["python:function:a.py::a"]) + env = commands.reverify_worklist( + repo, keys, depth=2, include_federation=True, risk_client=_BoomRisk() + ) + # a transport that raised is unreachable -> unavailable, never a false absent. + assert env["enrichment"]["risk"] == "unavailable" + + +def test_page_two_item_is_still_enriched(tmp_path: Path) -> None: + """M3: the merge runs over the FULL filtered+sorted list BEFORE paging, so an + item that only appears on page 2 is enriched identically to a page-1 item.""" + + locators = [ + "python:function:a.py::first", + "python:function:a.py::second", + ] + repo, keys = _seed_repo_with_entities(tmp_path, locators) + # limit=1 so each changed entity lands on its own page; sort by depth keeps + # the two changed (depth 0) items in a stable order across the page boundary. + page1 = commands.reverify_worklist( + repo, + keys, + depth=2, + include_federation=True, + risk_client=_FindingsRisk(), + limit=1, + sort_by="depth", + sort_order="asc", + ) + assert page1["data"]["page"]["has_more"] is True + cursor = page1["data"]["page"]["next_cursor"] + page2 = commands.reverify_worklist( + repo, + keys, + depth=2, + include_federation=True, + risk_client=_FindingsRisk(), + limit=1, + sort_by="depth", + sort_order="asc", + cursor=cursor, + ) + page2_item = page2["data"]["items"][0] + # the item that surfaced only on page 2 still carries its risk findings. + assert page2_item["enrichment"]["risk"] + assert page2_item["enrichment"]["risk"][0]["fingerprint"].startswith("f-") + # and the page-1/page-2 items are distinct entities (real paging, not a repeat). + assert ( + page1["data"]["items"][0]["entity"]["locator"] + != page2_item["entity"]["locator"] + ) From 7b2d19ac6b6c129cbdb1645f88d08ce5ccf6f61f Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 02:23:16 +1000 Subject: [PATCH 09/10] feat(coupling): temporal co-change graph (Rung 2 Track A, schema v3) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/warpline/cli.py | 98 ++++++++++++ src/warpline/coupling.py | 77 +++++++++ src/warpline/git.py | 15 ++ src/warpline/store.py | 274 +++++++++++++++++++++++++++++++ tests/test_anchor_capture.py | 2 +- tests/test_coupling.py | 283 +++++++++++++++++++++++++++++++++ tests/test_store.py | 4 +- tests/test_store_migrations.py | 7 +- 8 files changed, 754 insertions(+), 6 deletions(-) create mode 100644 src/warpline/coupling.py create mode 100644 tests/test_coupling.py diff --git a/src/warpline/cli.py b/src/warpline/cli.py index 12f30ba..1b05b93 100644 --- a/src/warpline/cli.py +++ b/src/warpline/cli.py @@ -50,6 +50,67 @@ def _optional_sei_client( } +def _co_change_payload( + repo: Path, + *, + sei: str | None, + locator: str | None, + entity_key_id: int | None, + min_count: int, +) -> dict[str, object]: + """Compose the ``co-change`` read surface (NON-FROZEN/internal Track A). + + Resolves the requested entity to a warpline-local ``entity_key_id`` (by + explicit id, SEI, or locator), then lists its co-change partners. Each + partner carries an honest ``enrichment.sei`` state per the closed vocab: + ``present`` when the partner's SEI resolved, ``absent`` when it is still NULL + (``sei:null`` — minted before loomweave resolved it). The graph is keyed on + warpline-local ids; the SEI is joined, never minted. + """ + + from warpline.coupling import classify_confidence + + with WarplineStore.open(default_store_path(repo)) as store: + key_id: int | None = entity_key_id + if key_id is None: + if sei is not None: + row = store.resolve_ref(repo, "sei", sei) + elif locator is not None: + row = store.resolve_ref(repo, "locator", locator) + else: + return { + "schema": "warpline.coupling.partners.v1", + "error": "one of --sei / --locator / --entity-key-id is required", + "partners": [], + } + if row is None: + return { + "schema": "warpline.coupling.partners.v1", + "error": "entity not found", + "partners": [], + } + key_id = int(str(row["id"])) + partners = store.co_change_partners(repo, key_id, min_count=min_count) + + enriched: list[dict[str, object]] = [] + for partner in partners: + partner_sei = partner.get("sei") + co_count = partner["co_change_count"] + assert isinstance(co_count, int) + enriched.append( + { + **partner, + "confidence": classify_confidence(co_count), + "enrichment": {"sei": "present" if partner_sei is not None else "absent"}, + } + ) + return { + "schema": "warpline.coupling.partners.v1", + "entity_key_id": key_id, + "partners": enriched, + } + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(prog="warpline") parser.add_argument("--version", action="store_true", help="print version and exit") @@ -119,6 +180,27 @@ def build_parser() -> argparse.ArgumentParser: reresolve_parser.add_argument("--loomweave-command", default="loomweave") reresolve_parser.add_argument("--json", action="store_true") + # NON-FROZEN/internal verbs (Rung 2 Track A). Neither is one of the six + # frozen v1 MCP tools; both are read-only advisory surfaces over the + # warpline-owned co-change graph. + rebuild_coupling = sub.add_parser( + "rebuild-coupling", + help="Rebuild the co-change coupling graph from change_events (idempotent).", + ) + rebuild_coupling.add_argument("--repo", type=Path, default=Path(".")) + rebuild_coupling.add_argument("--json", action="store_true") + + co_change = sub.add_parser( + "co-change", + help="List temporal co-change partners of an entity (read-only advisory).", + ) + co_change.add_argument("--repo", type=Path, default=Path(".")) + co_change.add_argument("--sei") + co_change.add_argument("--locator") + co_change.add_argument("--entity-key-id", type=int) + co_change.add_argument("--min-count", type=int, default=2) + co_change.add_argument("--json", action="store_true") + loomweave_probe = sub.add_parser("loomweave-probe") loomweave_probe.add_argument("--repo", type=Path, default=Path(".")) loomweave_probe.add_argument("--command", dest="loomweave_command", default="loomweave") @@ -282,6 +364,22 @@ def main(argv: list[str] | None = None) -> int: payload = LoomweaveProbe(repo=args.repo, command=args.loomweave_command).probe() print(json.dumps(payload, sort_keys=True) if args.json else json.dumps(payload, indent=2)) return 0 + if args.command == "rebuild-coupling": + with WarplineStore.open(default_store_path(args.repo)) as store: + report = store.rebuild_co_change_pairs(args.repo) + out: dict[str, object] = {"schema": "warpline.coupling.rebuild.v1", **report} + print(json.dumps(out, sort_keys=True) if args.json else json.dumps(out, indent=2)) + return 0 + if args.command == "co-change": + payload = _co_change_payload( + args.repo, + sei=args.sei, + locator=args.locator, + entity_key_id=args.entity_key_id, + min_count=args.min_count, + ) + print(json.dumps(payload, sort_keys=True) if args.json else json.dumps(payload, indent=2)) + return 0 if args.command == "changed": payload = commands.change_list(args.repo, args.rev_range) print(json.dumps(payload, sort_keys=True) if args.json else json.dumps(payload, indent=2)) diff --git a/src/warpline/coupling.py b/src/warpline/coupling.py new file mode 100644 index 0000000..cdc03db --- /dev/null +++ b/src/warpline/coupling.py @@ -0,0 +1,77 @@ +"""Temporal co-change coupling derivation (Rung 2 Track A). + +Pure derivation helpers for the co-change graph. Two entities are "coupled" when +they keep changing together in the same commit; the count of such co-changes is a +co-occurrence fact warpline OWNS, derived entirely from its own ``change_events``. + +Doctrine: +- SEI-orthogonality / no-mirror — these helpers operate only on warpline-local + ``entity_key_id`` integers and the counts warpline derives from them. They mint + no identifier, parse no SEI, and read no sibling state. The SEI is joined at + read time in the store, never here. +- Honesty invariant — below the sample floor, ``coupling_rate`` returns ``None`` + (the rate is not yet meaningful) and ``classify_confidence`` returns ``low``; + a sparse pair is never dressed up as a high-confidence signal. + +This module is a pure leaf: it imports nothing from ``warpline.commands`` or +``warpline.store`` (the store imports IT), so there is no import cycle. +""" + +from __future__ import annotations + +# Sample-size floor below which a coupling rate is not yet meaningful. Mirrors +# the confidence threshold so the two honesty signals agree. +_RATE_SAMPLE_FLOOR = 5 + +# Confidence thresholds on raw co-change count. +_CONFIDENCE_MEDIUM_FLOOR = 5 +_CONFIDENCE_HIGH_FLOOR = 20 + + +def derive_pairs_from_commit(entity_key_ids: list[int]) -> list[tuple[int, int]]: + """All unordered pairs of a commit's changed entities, canonically ``a < b``. + + Duplicate ids collapse (a commit touches a given entity once for coupling + purposes); the output is deterministic (sorted) so a rebuild reproduces the + same set. An empty/singleton input yields no pairs. + """ + + ids = sorted(set(entity_key_ids)) + pairs: list[tuple[int, int]] = [] + for i in range(len(ids)): + for j in range(i + 1, len(ids)): + pairs.append((ids[i], ids[j])) + return pairs + + +def classify_confidence(co_change_count: int) -> str: + """Map a raw co-change count to a closed confidence vocab. + + ``< 5`` → ``low`` · ``5–19`` → ``medium`` · ``>= 20`` → ``high``. + """ + + if co_change_count >= _CONFIDENCE_HIGH_FLOOR: + return "high" + if co_change_count >= _CONFIDENCE_MEDIUM_FLOOR: + return "medium" + return "low" + + +def coupling_rate(co_change_count: int, total: int) -> float | None: + """Fraction of an entity's changes that co-occurred with the partner. + + Returns ``None`` (suppressed) when the denominator is below the sample floor + — the rate would over-read a handful of co-changes as a strong coupling. The + rate is clamped to ``[0.0, 1.0]`` (co-change count can never exceed an + entity's own total change count in a consistent store, but a rebuild race or + divergent data must never emit a >1 rate). + """ + + if total < _RATE_SAMPLE_FLOOR or total <= 0: + return None + rate = co_change_count / total + if rate < 0.0: + return 0.0 + if rate > 1.0: + return 1.0 + return rate diff --git a/src/warpline/git.py b/src/warpline/git.py index c178f96..af0e272 100644 --- a/src/warpline/git.py +++ b/src/warpline/git.py @@ -175,6 +175,10 @@ def backfill( for sha in _commits(repo, since=since): meta = _commit_meta(repo, sha) store.upsert_commit(repo_id, meta) + # M7: accumulate the commit's changed entity ids across the FULL + # path+locator loop, then derive co-change pairs ONCE per commit so the + # >30 fan-out cap is per-commit, not per-locator. + commit_key_ids: set[int] = set() for status, path in _name_status(repo, sha): for locator in _locators_for_path(repo, sha, path): sei = _sei_for_locator(sei_client, locator) @@ -189,6 +193,10 @@ def backfill( actor=meta["author"], changed_at=meta["authored_at"], ) + commit_key_ids.add(key_id) + store.update_co_change_pairs( + repo_id, sha, commit_key_ids, changed_at=meta["authored_at"] + ) count += 1 return {"commits": count, "sei": sei_stats} @@ -208,6 +216,9 @@ def ingest_commit( anchor = _detect_anchor(repo) changed = 0 sei_stats = {"resolved": 0, "absent": 0} + # M7: accumulate the commit's changed entity ids across the FULL path+locator + # loop; derive co-change pairs ONCE after the loop (per-commit fan-out cap). + commit_key_ids: set[int] = set() for status, path in _name_status(repo, resolved): for locator in _locators_for_path(repo, resolved, path): sei = _sei_for_locator(sei_client, locator) @@ -231,5 +242,9 @@ def ingest_commit( detected_at=anchor.detected_at, detected_context=anchor.context, ) + commit_key_ids.add(key_id) changed += 1 + store.update_co_change_pairs( + repo_id, resolved, commit_key_ids, changed_at=meta["authored_at"] + ) return {"commit": resolved, "changes": changed, "sei": sei_stats} diff --git a/src/warpline/store.py b/src/warpline/store.py index c306730..2d52424 100644 --- a/src/warpline/store.py +++ b/src/warpline/store.py @@ -2,6 +2,7 @@ import hashlib import logging +import os import sqlite3 from collections.abc import Callable from pathlib import Path @@ -15,6 +16,13 @@ # in this project drops a column, so the floor is RETURNING, not ALTER … DROP COLUMN. _MIN_SQLITE_VERSION = (3, 35, 0) +# R8/M7 co-change fan-out cap. A commit touching MORE than this many distinct +# entities skips pair generation entirely: when everything changes together the +# pairwise coupling signal is near zero and O(n^2) pair writes are pure noise. +# The cap is applied per-commit (the caller accumulates ids across the full +# path+locator loop and calls update_co_change_pairs once per commit, M7). +_CO_CHANGE_FANOUT_CAP = 30 + # After Rung 1a, ``SCHEMA`` DDL is FROZEN. All schema changes (added columns, # new tables) go through ``MIGRATIONS`` — never by editing ``SCHEMA``. ``SCHEMA`` # remains only the fresh-DB base-table definition (idempotent ``IF NOT EXISTS``). @@ -152,11 +160,50 @@ def _migrate_v2_anchor_columns(conn: sqlite3.Connection) -> None: conn.execute("ALTER TABLE change_events ADD COLUMN detected_context TEXT") +def _migrate_v3_co_change_pairs(conn: sqlite3.Connection) -> None: + """v3 (Rung 2 Track A): temporal co-change coupling graph. + + ``co_change_pairs`` records, for each unordered pair of warpline-local + ``entity_key_id``s, how many times they changed together in the same commit + — a co-occurrence fact warpline OWNS (derived from its own ``change_events``), + not a mirror of any sibling. Pairs are stored canonically (``a < b``) so each + unordered pair has exactly one row. + + Per-entity totals are NOT denormalized here: they come from ``change_events`` + aggregation at read time (``co_change_partners``). If read cost ever demands + it, denormalized ``total_a``/``total_b`` columns are an additive later + migration — the co-change read cost note in the plan. + + SEI-orthogonality: the table keys on ``entity_key_id`` integers only and mints + no identifier; the SEI is joined from ``entity_keys`` at read time, never + stored here. + """ + + conn.execute( + """ + CREATE TABLE IF NOT EXISTS co_change_pairs ( + repo_id TEXT NOT NULL, + entity_key_id_a INTEGER NOT NULL, + entity_key_id_b INTEGER NOT NULL, + co_change_count INTEGER NOT NULL, + last_co_change TEXT, + last_commit_sha TEXT, + PRIMARY KEY (repo_id, entity_key_id_a, entity_key_id_b) + ) + """ + ) + + # Ordered, forward-only migrations. Each step's ``version`` is strictly greater # than the previous. v2 (anchor columns) lands in Rung 1b; v3 (co_change_pairs) # in Rung 2 Track A. +# +# Migration-ordering gate (B5): v3 MUST NOT precede v2 on disk — a DB opened in +# the gap would land at user_version=3 and permanently skip v2. The ordered list +# is the enforcement: v2 always runs before v3 for any DB below 2. MIGRATIONS: list[Migration] = [ Migration(version=2, apply=_migrate_v2_anchor_columns), + Migration(version=3, apply=_migrate_v3_co_change_pairs), ] # Highest schema version this build knows how to produce. Equals the base @@ -770,6 +817,233 @@ def churn_for_entity( "last_actor": last_actor, } + def update_co_change_pairs( + self, + repo_id: str, + commit_sha: str, + entity_key_ids: set[int] | list[int], + changed_at: str | None = None, + ) -> dict[str, int | str]: + """Upsert co-change pairs for one commit's changed entity set (Track A). + + Returns a small report dict: ``{"pairs": , ...}``. Three honest + early-return shapes carry their reason so the caller and tests can assert + them without inspecting health_log: + + - kill-switch (B4): ``WARPLINE_COCHANGE`` set to a falsy/zero value → + ``{"pairs": 0, "skipped": "kill_switch"}``; NO rows written, no health + event. A pathological repo opts out without a code change. + - high fan-out (R8/M7): more than ``_CO_CHANGE_FANOUT_CAP`` (30) changed + entities in one commit → ``{"pairs": 0, "skipped": "high_fanout", + "entities": }``; records ``coupling_skipped=high_fanout`` via + ``log_health`` (a count is also returned). High-fanout commits carry + near-zero coupling signal, so generating O(n^2) pairs is pure noise. + - too few entities (no pair possible) → ``{"pairs": 0}``. + + All writes are fail-soft: a SQLite error records ``coupling_write_failed`` + to health_log and returns ``{"pairs": 0, "error": ...}`` rather than + propagating — co-change derivation NEVER blocks ingest. + + The ``>30`` cap is applied to the WHOLE commit's entity set (M7): the + caller accumulates ids across the full path+locator loop and calls this + ONCE per commit, so the cap is per-commit, not per-locator. + """ + + # B4 kill-switch: WARPLINE_COCHANGE set to "0"/"false"/"no"/"" → skip. + raw = os.environ.get("WARPLINE_COCHANGE") + if raw is not None and raw.strip().lower() in {"0", "false", "no", "off", ""}: + return {"pairs": 0, "skipped": "kill_switch"} + + ids = sorted(set(int(i) for i in entity_key_ids)) + n = len(ids) + if n > _CO_CHANGE_FANOUT_CAP: + # R8: record the skip both to health_log AND in the return dict. + self.conn.execute( + "INSERT INTO health_log(repo_id, code, message) VALUES (?, ?, ?)", + ( + repo_id, + "coupling_skipped", + f"high_fanout: {n} entities in commit {commit_sha} (cap " + f"{_CO_CHANGE_FANOUT_CAP})", + ), + ) + self.conn.commit() + return {"pairs": 0, "skipped": "high_fanout", "entities": n} + if n < 2: + return {"pairs": 0} + + # Import locally to keep coupling.py a pure leaf (it never imports store, + # avoiding an import cycle); store -> coupling is the one-way edge. + from warpline.coupling import derive_pairs_from_commit + + pairs = derive_pairs_from_commit(ids) + try: + for key_a, key_b in pairs: + self.conn.execute( + """ + INSERT INTO co_change_pairs( + repo_id, entity_key_id_a, entity_key_id_b, + co_change_count, last_co_change, last_commit_sha + ) VALUES (?, ?, ?, 1, ?, ?) + ON CONFLICT(repo_id, entity_key_id_a, entity_key_id_b) DO UPDATE SET + co_change_count = co_change_count + 1, + last_co_change = excluded.last_co_change, + last_commit_sha = excluded.last_commit_sha + """, + (repo_id, key_a, key_b, changed_at, commit_sha), + ) + self.conn.commit() + except sqlite3.Error as exc: # fail-soft: never block ingest (R8) + self.conn.execute( + "INSERT INTO health_log(repo_id, code, message) VALUES (?, ?, ?)", + (repo_id, "coupling_write_failed", f"{commit_sha}: {exc}"), + ) + self.conn.commit() + return {"pairs": 0, "error": str(exc)} + return {"pairs": len(pairs)} + + def co_change_partners( + self, + repo: Path, + entity_key_id: int, + min_count: int = 2, + ) -> list[dict[str, object]]: + """Co-change partners of one entity, with SEI joined at read time. + + Returns rows ordered by descending ``co_change_count``. Each row carries + the partner's ``entity_key_id``, ``locator``, ``sei`` (NULL when the + partner key was minted before its SEI resolved — honest ``sei:null``), + ``co_change_count``, ``coupling_rate`` (suppressed to None below the + sample floor), ``sample_size`` (the partner's total churn), and + ``last_co_change``. + + Read-cost note: ``coupling_rate`` requires the partner's per-entity total + churn, computed here with one COUNT per partner via ``churn_for_entity``. + This is acceptable for the bounded partner lists this surface returns; if + a hot path ever needs it at scale, denormalized totals are an additive + later migration (no schema break). + """ + + repo_id = self._repo_id(repo) + rows = self.conn.execute( + """ + SELECT CASE WHEN entity_key_id_a = :id THEN entity_key_id_b + ELSE entity_key_id_a END AS partner_id, + co_change_count, last_co_change + FROM co_change_pairs + WHERE repo_id = :repo + AND (entity_key_id_a = :id OR entity_key_id_b = :id) + AND co_change_count >= :min_count + ORDER BY co_change_count DESC, partner_id + """, + {"repo": repo_id, "id": int(entity_key_id), "min_count": int(min_count)}, + ).fetchall() + + from warpline.coupling import coupling_rate + + partners: list[dict[str, object]] = [] + for row in rows: + partner_id = int(row["partner_id"]) + key = self.conn.execute( + "SELECT locator, sei FROM entity_keys WHERE id = ? AND repo_id = ?", + (partner_id, repo_id), + ).fetchone() + if key is None: + continue + total = int(str(self.churn_for_entity(repo, partner_id)["churn_count"])) + co_count = int(row["co_change_count"]) + partners.append( + { + "entity_key_id": partner_id, + "locator": key["locator"], + "sei": key["sei"], + "co_change_count": co_count, + "coupling_rate": coupling_rate(co_count, total), + "sample_size": total, + "last_co_change": row["last_co_change"], + } + ) + return partners + + def co_change_commit_groups(self, repo: Path) -> list[dict[str, object]]: + """Group ``change_events`` by commit into ``(commit_sha, [entity_key_id])``. + + The rebuild input: one group per commit, deduplicated entity ids, ordered + by commit for deterministic, interruptible rebuilds. ``last_co_change`` is + the commit's max ``changed_at`` so a rebuilt row carries the same recency + marker as the live ingest path. + """ + + repo_id = self._repo_id(repo) + rows = self.conn.execute( + """ + SELECT commit_sha, entity_key_id, MAX(changed_at) AS changed_at + FROM change_events + WHERE repo_id = ? + GROUP BY commit_sha, entity_key_id + ORDER BY commit_sha + """, + (repo_id,), + ).fetchall() + groups: dict[str, dict[str, object]] = {} + for row in rows: + sha = str(row["commit_sha"]) + group = groups.setdefault( + sha, {"commit_sha": sha, "entity_key_ids": [], "changed_at": None} + ) + ids = group["entity_key_ids"] + assert isinstance(ids, list) + ids.append(int(row["entity_key_id"])) + changed = row["changed_at"] + if changed is not None and ( + group["changed_at"] is None or str(changed) > str(group["changed_at"]) + ): + group["changed_at"] = changed + return list(groups.values()) + + def clear_co_change_pairs(self, repo: Path) -> None: + """Drop all co-change rows for a repo (rebuild precondition).""" + + repo_id = self._repo_id(repo) + self.conn.execute("DELETE FROM co_change_pairs WHERE repo_id = ?", (repo_id,)) + self.conn.commit() + + def rebuild_co_change_pairs(self, repo: Path) -> dict[str, int]: + """Rebuild the whole co-change graph from ``change_events`` (idempotent). + + Clears existing rows, then replays every commit group through the same + ``update_co_change_pairs`` path the live ingest uses — so a rebuild and an + incremental ingest converge to identical counts. Interruptible: each + commit group commits independently; a re-run is idempotent because the + clear precedes the replay. + + Returns ``{"commits": , "pairs": , + "skipped": }``. + """ + + repo_id = self.ensure_repo(repo) + self.clear_co_change_pairs(repo) + commits = 0 + pairs = 0 + skipped = 0 + for group in self.co_change_commit_groups(repo): + ids = group["entity_key_ids"] + assert isinstance(ids, list) + report = self.update_co_change_pairs( + repo_id, + str(group["commit_sha"]), + ids, + changed_at=( + None if group["changed_at"] is None else str(group["changed_at"]) + ), + ) + commits += 1 + written = report.get("pairs", 0) + pairs += int(written) if isinstance(written, int) else 0 + if report.get("skipped"): + skipped += 1 + return {"commits": commits, "pairs": pairs, "skipped": skipped} + def log_health(self, repo: Path, code: str, message: str) -> None: repo_id = self.ensure_repo(repo) self.conn.execute( diff --git a/tests/test_anchor_capture.py b/tests/test_anchor_capture.py index 01e33dd..00c0066 100644 --- a/tests/test_anchor_capture.py +++ b/tests/test_anchor_capture.py @@ -163,7 +163,7 @@ def test_v1_db_opened_by_v2_client_migrates_and_old_rows_read_null( assert "detected_context" not in cols_before with WarplineStore.open(db) as store: - assert store.schema_version() == 2 + assert store.schema_version() == 3 row = store.conn.execute( "SELECT detected_branch, detected_head_sha, detected_at, detected_context " "FROM change_events WHERE commit_sha='deadbeef'" diff --git a/tests/test_coupling.py b/tests/test_coupling.py new file mode 100644 index 0000000..c299a05 --- /dev/null +++ b/tests/test_coupling.py @@ -0,0 +1,283 @@ +"""Rung 2 Track A: temporal co-change coupling graph (schema v3). + +Locks the behaviours the plan names: canonical pair derivation, the closed +confidence vocab and its thresholds, sample-floor rate suppression, the R8/M7 +per-commit >30 fan-out cap (recorded to health_log AND returned), the B4 +``WARPLINE_COCHANGE`` kill-switch, fail-soft writes, ``rebuild-coupling`` +idempotence, and the honest ``sei:null`` / ``enrichment.sei:absent`` surface for +SEI-sparse partners. The graph keys on warpline-local ``entity_key_id`` only and +joins the SEI at read time — never minting one. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from conftest import git as _git +from conftest import init_repo as _init_repo + +from warpline.coupling import ( + classify_confidence, + coupling_rate, + derive_pairs_from_commit, +) +from warpline.git import backfill, ingest_commit +from warpline.store import WarplineStore, default_store_path + +# --- pure derivation helpers ------------------------------------------------- + + +def test_derive_pairs_canonical_ordering_and_dedup() -> None: + # Unsorted, with a duplicate id — pairs come out sorted, canonical (a None: + assert derive_pairs_from_commit([]) == [] + assert derive_pairs_from_commit([7]) == [] + + +def test_classify_confidence_thresholds() -> None: + assert classify_confidence(0) == "low" + assert classify_confidence(4) == "low" + assert classify_confidence(5) == "medium" + assert classify_confidence(19) == "medium" + assert classify_confidence(20) == "high" + assert classify_confidence(99) == "high" + + +def test_coupling_rate_suppressed_below_sample_floor() -> None: + # total < 5 → None (the rate is not yet meaningful). + assert coupling_rate(2, 4) is None + assert coupling_rate(0, 0) is None + # total >= 5 → real fraction, clamped to [0, 1]. + assert coupling_rate(3, 6) == pytest.approx(0.5) + assert coupling_rate(10, 5) == 1.0 # clamped, never > 1 + + +# --- store: update_co_change_pairs ------------------------------------------- + + +def test_update_co_change_pairs_counts_and_upserts(tmp_path: Path) -> None: + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + repo_id = store.ensure_repo(tmp_path) + r1 = store.update_co_change_pairs(repo_id, "sha1", {1, 2, 3}, changed_at="2024-01-01") + assert r1 == {"pairs": 3} + # Re-changing the same pair bumps its count. + r2 = store.update_co_change_pairs(repo_id, "sha2", {1, 2}, changed_at="2024-01-02") + assert r2 == {"pairs": 1} + row = store.conn.execute( + "SELECT co_change_count, last_commit_sha, last_co_change FROM co_change_pairs " + "WHERE entity_key_id_a=1 AND entity_key_id_b=2" + ).fetchone() + assert row["co_change_count"] == 2 + assert row["last_commit_sha"] == "sha2" + assert row["last_co_change"] == "2024-01-02" + + +def test_update_co_change_pairs_singleton_writes_nothing(tmp_path: Path) -> None: + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + repo_id = store.ensure_repo(tmp_path) + assert store.update_co_change_pairs(repo_id, "sha1", {7}) == {"pairs": 0} + n = store.conn.execute("SELECT COUNT(*) AS c FROM co_change_pairs").fetchone()["c"] + assert n == 0 + + +def test_high_fanout_commit_is_skipped_and_recorded(tmp_path: Path) -> None: + """R8/M7: >30 entities in one commit → skip, record to health_log AND return.""" + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + repo_id = store.ensure_repo(tmp_path) + big = set(range(1, 35)) # 34 entities > cap 30 + report = store.update_co_change_pairs(repo_id, "fanout", big) + assert report == {"pairs": 0, "skipped": "high_fanout", "entities": 34} + # Nothing written. + n = store.conn.execute("SELECT COUNT(*) AS c FROM co_change_pairs").fetchone()["c"] + assert n == 0 + # Health event recorded. + codes = [ + r["code"] + for r in store.conn.execute("SELECT code FROM health_log ORDER BY id").fetchall() + ] + assert "coupling_skipped" in codes + + +def test_kill_switch_skips_all_writes(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """B4: WARPLINE_COCHANGE=0 → skip + return, zero co_change_pairs rows.""" + monkeypatch.setenv("WARPLINE_COCHANGE", "0") + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + repo_id = store.ensure_repo(tmp_path) + report = store.update_co_change_pairs(repo_id, "sha1", {1, 2, 3}) + assert report == {"pairs": 0, "skipped": "kill_switch"} + n = store.conn.execute("SELECT COUNT(*) AS c FROM co_change_pairs").fetchone()["c"] + assert n == 0 + + +def test_kill_switch_via_ingest_yields_zero_rows( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """B4 end-to-end: an ingest under WARPLINE_COCHANGE=0 records no pairs.""" + monkeypatch.setenv("WARPLINE_COCHANGE", "false") + repo = _init_repo(tmp_path) + (repo / "a.py").write_text("x = 1\n", encoding="utf-8") + (repo / "b.py").write_text("y = 2\n", encoding="utf-8") + _git(repo, "add", "a.py", "b.py") + _git(repo, "commit", "-m", "two files") + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, "HEAD") + n = store.conn.execute("SELECT COUNT(*) AS c FROM co_change_pairs").fetchone()["c"] + assert n == 0 + + +def test_co_change_write_is_fail_soft(tmp_path: Path) -> None: + """A SQLite write error is recorded and swallowed — never blocks ingest. + + Drop ``co_change_pairs`` out from under the writer so the INSERT raises + ``OperationalError`` (the fail-soft branch); ``health_log`` still exists so + the skip is recorded rather than propagated. + """ + db = tmp_path / "warpline.db" + with WarplineStore.open(db) as store: + repo_id = store.ensure_repo(tmp_path) + store.conn.execute("DROP TABLE co_change_pairs") + store.conn.commit() + report = store.update_co_change_pairs(repo_id, "sha1", {1, 2}) + assert report["pairs"] == 0 + assert "error" in report + codes = [ + r["code"] + for r in store.conn.execute("SELECT code FROM health_log ORDER BY id").fetchall() + ] + assert "coupling_write_failed" in codes + + +# --- ingest + read surface --------------------------------------------------- + + +def _two_coupled_files(tmp_path: Path) -> Path: + repo = _init_repo(tmp_path) + (repo / "a.py").write_text("x = 1\n", encoding="utf-8") + (repo / "b.py").write_text("y = 1\n", encoding="utf-8") + _git(repo, "add", "a.py", "b.py") + _git(repo, "commit", "-m", "c1") + return repo + + +def test_ingest_records_co_change_partners(tmp_path: Path) -> None: + repo = _two_coupled_files(tmp_path) + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, "HEAD") + a = store.resolve_ref(repo, "locator", "file:a.py") + assert a is not None + partners = store.co_change_partners(repo, int(a["id"]), min_count=1) + locators = {str(p["locator"]) for p in partners} + assert "file:b.py" in locators + + +def test_sei_sparse_partner_reads_null_sei(tmp_path: Path) -> None: + """SEI-sparse pairs surface sei:null (no SEI minted; honest absence).""" + repo = _two_coupled_files(tmp_path) + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, "HEAD") # no sei_client → SEI stays NULL + a = store.resolve_ref(repo, "locator", "file:a.py") + assert a is not None + partners = store.co_change_partners(repo, int(a["id"]), min_count=1) + assert partners + assert all(p["sei"] is None for p in partners) + + +def test_co_change_cli_payload_marks_sei_absent(tmp_path: Path) -> None: + """The co-change read surface emits enrichment.sei == 'absent' for null SEI.""" + from warpline.cli import _co_change_payload + + repo = _two_coupled_files(tmp_path) + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, "HEAD") + payload = _co_change_payload( + repo, sei=None, locator="file:a.py", entity_key_id=None, min_count=1 + ) + partners = payload["partners"] + assert isinstance(partners, list) and partners + for partner in partners: + assert partner["sei"] is None + assert partner["enrichment"]["sei"] == "absent" + assert partner["confidence"] in {"low", "medium", "high"} + + +def test_co_change_cli_payload_requires_a_selector(tmp_path: Path) -> None: + from warpline.cli import _co_change_payload + + repo = _init_repo(tmp_path) + with WarplineStore.open(default_store_path(repo)): + pass + payload = _co_change_payload( + repo, sei=None, locator=None, entity_key_id=None, min_count=2 + ) + assert payload["partners"] == [] + assert "error" in payload + + +# --- rebuild ----------------------------------------------------------------- + + +def test_rebuild_coupling_is_idempotent(tmp_path: Path) -> None: + """rebuild-coupling run twice → identical counts (idempotent).""" + repo = _two_coupled_files(tmp_path) + (repo / "a.py").write_text("x = 2\n", encoding="utf-8") + (repo / "b.py").write_text("y = 2\n", encoding="utf-8") + _git(repo, "add", "a.py", "b.py") + _git(repo, "commit", "-m", "c2") + + with WarplineStore.open(default_store_path(repo)) as store: + backfill(store, repo) + + def _counts() -> list[tuple[int, int, int]]: + return [ + (int(r["entity_key_id_a"]), int(r["entity_key_id_b"]), int(r["co_change_count"])) + for r in store.conn.execute( + "SELECT entity_key_id_a, entity_key_id_b, co_change_count " + "FROM co_change_pairs ORDER BY entity_key_id_a, entity_key_id_b" + ).fetchall() + ] + + first = store.rebuild_co_change_pairs(repo) + counts_1 = _counts() + second = store.rebuild_co_change_pairs(repo) + counts_2 = _counts() + + assert first == second + assert counts_1 == counts_2 + # a.py and b.py changed together in both commits → count 2. + assert counts_1 + assert any(c == 2 for *_pair, c in counts_1) + + +def test_rebuild_matches_incremental_ingest(tmp_path: Path) -> None: + """A rebuild converges to the same counts the live ingest path produced.""" + repo = _two_coupled_files(tmp_path) + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, "HEAD") + live = [ + (int(r["entity_key_id_a"]), int(r["entity_key_id_b"]), int(r["co_change_count"])) + for r in store.conn.execute( + "SELECT entity_key_id_a, entity_key_id_b, co_change_count " + "FROM co_change_pairs ORDER BY entity_key_id_a, entity_key_id_b" + ).fetchall() + ] + store.rebuild_co_change_pairs(repo) + rebuilt = [ + (int(r["entity_key_id_a"]), int(r["entity_key_id_b"]), int(r["co_change_count"])) + for r in store.conn.execute( + "SELECT entity_key_id_a, entity_key_id_b, co_change_count " + "FROM co_change_pairs ORDER BY entity_key_id_a, entity_key_id_b" + ).fetchall() + ] + assert live == rebuilt diff --git a/tests/test_store.py b/tests/test_store.py index 1fba02e..65836c2 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -23,7 +23,7 @@ def test_default_store_path_honors_explicit_store_dir(tmp_path: Path) -> None: def test_store_initializes_schema(tmp_path: Path) -> None: db = tmp_path / "warpline.db" with WarplineStore.open(db) as store: - assert store.schema_version() == 2 + assert store.schema_version() == 3 def test_store_writes_nested_gitignore_that_ignores_runtime_db(tmp_path: Path) -> None: @@ -32,7 +32,7 @@ def test_store_writes_nested_gitignore_that_ignores_runtime_db(tmp_path: Path) - subprocess.run(["git", "init"], cwd=repo, check=True, text=True, capture_output=True) with WarplineStore.open(default_store_path(repo)) as store: - assert store.schema_version() == 2 + assert store.schema_version() == 3 gitignore = repo / ".weft" / "warpline" / ".gitignore" assert gitignore.exists() diff --git a/tests/test_store_migrations.py b/tests/test_store_migrations.py index f3bee8f..8d2e27a 100644 --- a/tests/test_store_migrations.py +++ b/tests/test_store_migrations.py @@ -42,9 +42,10 @@ def test_fresh_db_lands_at_highest_known_version(tmp_path: Path) -> None: db = tmp_path / "warpline.db" with WarplineStore.open(db) as store: assert store.schema_version() == store_mod.HIGHEST_KNOWN_VERSION - # As of Rung 1b the highest known version is 2 (anchor columns). + # As of Rung 2 Track A the highest known version is 3 (co_change_pairs; + # v2 anchor columns + v3 co-change graph). assert _user_version(db) == store_mod.HIGHEST_KNOWN_VERSION - assert store_mod.HIGHEST_KNOWN_VERSION == 2 + assert store_mod.HIGHEST_KNOWN_VERSION == 3 def test_connection_pragmas_are_hardened(tmp_path: Path) -> None: @@ -128,7 +129,7 @@ def test_user_version_zero_with_divergent_meta_adopts_and_warns(tmp_path: Path) raw.close() with WarplineStore.open(db) as store: - # Adopted 5 from meta; 5 > highest known (2), so it is also flagged ahead. + # Adopted 5 from meta; 5 > highest known (3), so it is also flagged ahead. assert store.schema_version() == 5 codes = _health_codes(db) assert "MIGRATION_META_RECONCILE" in codes From d5edbc36f33e2047914d10949262191591f149d4 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Tue, 16 Jun 2026 02:30:00 +1000 Subject: [PATCH 10/10] feat(cop): temporal COP internals + non-frozen demo CLI + squash-merge reconstruction demo (Rung 2 Track D) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/warpline/cli.py | 95 +++++ src/warpline/cop.py | 395 ++++++++++++++++++ src/warpline/store.py | 23 +- tests/integration/test_reconstruction_demo.py | 162 +++++++ tests/test_cop.py | 187 +++++++++ 5 files changed, 858 insertions(+), 4 deletions(-) create mode 100644 src/warpline/cop.py create mode 100644 tests/integration/test_reconstruction_demo.py create mode 100644 tests/test_cop.py diff --git a/src/warpline/cli.py b/src/warpline/cli.py index 1b05b93..a4ada24 100644 --- a/src/warpline/cli.py +++ b/src/warpline/cli.py @@ -14,6 +14,63 @@ from warpline.reresolve import sweep_reresolve_sei from warpline.store import WarplineStore, default_store_path +# NON-FROZEN/internal COP frame kinds (Rung 2 Track D demo verb). Mirrors +# cop.FRAME_KINDS; kept local so the parser builds without importing cop at +# module import time (cop pulls in the federation consults). +_COP_FRAME_KINDS = ("rev_range", "time_window", "sei", "branch_sha", "edit") + + +def _cop_payload( + repo: Path, + *, + frame: str, + rev_range: str | None, + since: str | None, + until: str | None, + sei: str | None, + branch: str | None, + sha: str | None, + rev: str, +) -> dict[str, object]: + """Compose the NON-FROZEN/internal ``cop`` demo payload (Track D). + + Builds the frame spec from CLI args, resolves it through ``cop.resolve_frame``, + and composes the temporal COP with NO federation transports wired (members + are honestly ``disabled``/dark — the demo asserts the resolution + degradation + path, not live sibling reads). The PUBLIC COP tool surface is + interface-pending; this is a demo runner, not a frozen contract. + """ + + from warpline.cop import compose_temporal_cop, resolve_frame + + frame_spec: dict[str, object] = {"kind": frame} + if frame == "rev_range": + frame_spec["rev_range"] = rev_range + elif frame == "time_window": + frame_spec["since"] = since + frame_spec["until"] = until + elif frame == "sei": + frame_spec["sei"] = sei + elif frame == "branch_sha": + frame_spec["branch"] = branch + frame_spec["sha"] = sha + if rev_range is not None: + frame_spec["rev_range"] = rev_range + elif frame == "edit": + frame_spec["rev"] = rev + + with WarplineStore.open(default_store_path(repo)) as store: + items, frame_echo, warnings = resolve_frame(store, repo, frame_spec) + cop = compose_temporal_cop(items, frame_echo) + return { + "schema": "warpline.cop.demo.v1", + "non_frozen": True, + "items": items, + "warnings": warnings, + **cop, + } + + # install/doctor component flags -> component keys _INSTALL_FLAGS = { "claude_code": "claude-code", @@ -201,6 +258,30 @@ def build_parser() -> argparse.ArgumentParser: co_change.add_argument("--min-count", type=int, default=2) co_change.add_argument("--json", action="store_true") + # NON-FROZEN/internal verb (Rung 2 Track D). NOT one of the six frozen v1 + # MCP tools and NOT in the frozen tool set/glossary; the PUBLIC COP tool + # surface is interface-pending. This verb exists only so the temporal-COP + # internals (cop.py) run end-to-end for the reconstruction demo. + cop_parser = sub.add_parser( + "cop", + help="(NON-FROZEN/internal) Temporal COP over a change frame — demo surface.", + ) + cop_parser.add_argument("--repo", type=Path, default=Path(".")) + cop_parser.add_argument( + "--frame", + choices=list(_COP_FRAME_KINDS), + default="rev_range", + help="frame kind: rev_range | time_window | sei | branch_sha | edit", + ) + cop_parser.add_argument("--rev-range") + cop_parser.add_argument("--since") + cop_parser.add_argument("--until") + cop_parser.add_argument("--sei") + cop_parser.add_argument("--branch") + cop_parser.add_argument("--sha") + cop_parser.add_argument("--rev", default="HEAD") + cop_parser.add_argument("--json", action="store_true") + loomweave_probe = sub.add_parser("loomweave-probe") loomweave_probe.add_argument("--repo", type=Path, default=Path(".")) loomweave_probe.add_argument("--command", dest="loomweave_command", default="loomweave") @@ -380,6 +461,20 @@ def main(argv: list[str] | None = None) -> int: ) print(json.dumps(payload, sort_keys=True) if args.json else json.dumps(payload, indent=2)) return 0 + if args.command == "cop": + payload = _cop_payload( + args.repo, + frame=args.frame, + rev_range=args.rev_range, + since=args.since, + until=args.until, + sei=args.sei, + branch=args.branch, + sha=args.sha, + rev=args.rev, + ) + print(json.dumps(payload, sort_keys=True) if args.json else json.dumps(payload, indent=2)) + return 0 if args.command == "changed": payload = commands.change_list(args.repo, args.rev_range) print(json.dumps(payload, sort_keys=True) if args.json else json.dumps(payload, indent=2)) diff --git a/src/warpline/cop.py b/src/warpline/cop.py new file mode 100644 index 0000000..f642162 --- /dev/null +++ b/src/warpline/cop.py @@ -0,0 +1,395 @@ +"""Temporal Change-Oriented Posture (COP) internals — Rung 2 Track D. + +The COP read surface answers "for THIS change frame, what is the cross-member +posture, and which members did we actually consult?". It is composed of two +read-time, never-gating, no-mirror steps: + + * :func:`resolve_frame` turns a *frame spec* (``rev_range`` / ``time_window`` / + ``sei`` / ``branch_sha`` / ``edit``) into the warpline-local change items the + frame selects, plus an echo of how the frame resolved and any honest + degradation warning. It reads ONLY existing store methods + git; it mints no + identifier and writes nothing. + * :func:`compose_temporal_cop` consults the federation (reusing the three + ``_consult_*`` from :mod:`warpline.federation` VERBATIM) and wraps the result + in a coverage block. ``coverage.dark_sectors`` is the load-bearing honesty + surface: a member we could not consult is named as a dark sector with its own + reason class, NEVER silently dropped to look like an earned-clean empty. + +R9: this module imports the consults from :mod:`warpline.federation`; +``federation.py`` NEVER imports ``cop.py`` (the edge is one-way). + +The honesty invariant (PDR-0023) governs the degradation path: every COP/frame +output carries a ``weft_reason_class`` drawn from the canonical reason vocab +(:data:`warpline.listing.REASON_CLASSES`). ``clean`` means the frame resolved the +change set it was asked for; any other class means the frame honestly degraded +(e.g. a ``rev_range`` whose SHAs no longer exist after a squash-merge collapsed +and the feature branch was deleted) and the ``frame`` echo carries the cause/fix. + +This module is INTERNAL. The PUBLIC MCP/CLI COP tool surface is interface-pending +(it is NOT one of the six frozen v1 tools); the non-frozen internal ``warpline +cop`` CLI verb exists only to run the end-to-end reconstruction demo. +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path +from typing import Any + +from warpline._blast import rev_range_commits +from warpline.errors import BadRevisionError +from warpline.federation import ( + FEDERATION_MEMBERS, + LegisClient, + RiskClient, + _consult_filigree, + _consult_legis, + _consult_wardline, +) +from warpline.listing import reason +from warpline.refs import entity_view +from warpline.siblings import WorkClient +from warpline.store import WarplineStore + +# Frame kinds this module resolves. ``branch_sha`` is the squash-merge / rewritten +# history case: until ``detected_branch`` (Rung 1b) is populated and an episode +# boundary is ratified, it falls back to a rev-range resolution WITH a warning. +FRAME_KINDS = ("rev_range", "time_window", "sei", "branch_sha", "edit") + + +def _item_from_event(event: dict[str, Any]) -> dict[str, Any]: + """Shape a stored change event as a COP item. + + The ``entity`` sub-dict matches the federation consult contract + (``entity.locator`` / ``entity.sei``) so the three ``_consult_*`` can read it + verbatim. The anchor columns (Rung 1b) ride along when present. + """ + + path = event.get("path") + view = entity_view(event, include_key_id=True, path=str(path) if path else None) + return { + "change_id": f"warpline:change:{event.get('change_event_id')}", + "entity": view, + "change_kind": event.get("change_kind"), + "actor": event.get("actor"), + "commit": event.get("commit_sha"), + "changed_at": event.get("changed_at"), + "detected_branch": event.get("detected_branch"), + "detected_head_sha": event.get("detected_head_sha"), + } + + +def _safe_rev_range_commits( + repo: Path, rev_range: str | None +) -> tuple[set[str] | None, str | None]: + """Resolve a rev-range fail-soft for COP. + + A frame may name a rev-range whose commits were rewritten away (squash-merge + collapse, branch deletion, GC) — git then exits non-zero and + :func:`rev_range_commits` raises ``BadRevisionError``. For a READ-time COP + that is not an error; it is honest degradation. Returns ``(commit_shas, + bad_detail)``: on a bad range, ``(set(), )`` so the caller resolves to + an empty change set with an ``unresolved_input`` reason, never a crash. + """ + + try: + return rev_range_commits(repo, rev_range), None + except BadRevisionError as exc: + return set(), str(exc) + + +def _git_diff_paths(repo: Path, rev: str) -> list[str]: + """Tracked paths changed in the working tree relative to ``rev``. + + ``edit`` frame support (M4): ``git diff --name-only `` reports the + uncommitted edit set. Fail-soft — a git error yields an empty path list (the + caller degrades honestly), never an exception out of a read. + """ + + try: + out = subprocess.run( + ["git", "diff", "--name-only", rev], + cwd=repo, + check=True, + text=True, + capture_output=True, + ).stdout + except (subprocess.CalledProcessError, OSError): + return [] + return [line for line in out.splitlines() if line] + + +def resolve_frame( + store: WarplineStore, repo: Path, frame_spec: dict[str, Any] +) -> tuple[list[dict[str, Any]], dict[str, Any], list[str]]: + """Resolve a COP frame spec to its change items, a frame echo, and warnings. + + ``frame_spec`` is ``{"kind": , ...kind-specific keys}``: + + * ``rev_range`` — ``{"rev_range": "A..B"}``; events whose commit is in the + range. Unknown / vanished SHAs resolve to an empty commit set → honest + ``unresolved_input`` (the squash-merge reversal clause's failure path). + * ``time_window`` — ``{"since": iso, "until": iso}``; events by author-time. + * ``sei`` — ``{"sei": "..."}`` (or ``{"value": ...}``); the entity timeline. + * ``edit`` — ``{"rev": "HEAD"}``; events on paths in ``git diff ``. + * ``branch_sha`` — ``{"branch": ..., "sha": ...}``; FALLS BACK to a + rev-range resolution WITH a warning (``detected_branch`` not yet wired + into a frozen read path; episode boundary unratified). This is the honest + squash-merge degradation: branch + boundary, never a false-precise set. + + The frame echo always carries ``weft_reason_class``: + * ``clean`` — the frame resolved a non-degraded change set; + * ``unresolved_input``— the inputs named no resolvable change events; + * ``partial`` — a fallback resolution (branch_sha) stood in. + Returns ``(items, frame_echo, warnings)``. Reads only; mints nothing. + """ + + kind = frame_spec.get("kind") + warnings: list[str] = [] + + if kind == "rev_range": + rev_range = frame_spec.get("rev_range") + commit_shas, bad_detail = _safe_rev_range_commits( + repo, rev_range if isinstance(rev_range, str) else None + ) + events = store.list_change_events(repo, commit_shas=commit_shas) + items = [_item_from_event(e) for e in events] + if not items: + detail = f" (git: {bad_detail})" if bad_detail else "" + why = reason( + "unresolved_input", + cause=( + f"rev_range {rev_range!r} resolved to no recorded change events " + "(the range may be empty, or its commits were rewritten away — " + f"e.g. collapsed by a squash-merge and the source branch deleted){detail}" + ), + fix=( + "re-run against a branch name + episode boundary (a branch_sha frame), " + "or backfill/ingest the new mainline commit so warpline records its events" + ), + ) + else: + why = reason("clean") + echo = { + "kind": "rev_range", + "rev_range": rev_range, + "weft_reason_class": why["reason_class"], + "weft_reason": why, + } + return items, echo, warnings + + if kind == "time_window": + since = frame_spec.get("since") + until = frame_spec.get("until") + events = store.list_change_events( + repo, + since=since if isinstance(since, str) else None, + until=until if isinstance(until, str) else None, + ) + items = [_item_from_event(e) for e in events] + why = reason("clean") if items else reason( + "unresolved_input", + cause=f"time_window [{since!r}, {until!r}] matched no recorded change events", + fix="widen the window, or ingest/backfill commits whose author-time falls in it", + ) + echo = { + "kind": "time_window", + "since": since, + "until": until, + "weft_reason_class": why["reason_class"], + "weft_reason": why, + } + return items, echo, warnings + + if kind == "sei": + value = frame_spec.get("sei", frame_spec.get("value")) + rows = store.timeline(repo, str(value)) if value is not None else [] + items = [_item_from_event(r) for r in rows] + why = reason("clean") if items else reason( + "unresolved_input", + cause=f"sei {value!r} matched no recorded change events", + fix="confirm the SEI/locator is one warpline has ingested, or backfill the repo", + ) + echo = { + "kind": "sei", + "sei": value, + "weft_reason_class": why["reason_class"], + "weft_reason": why, + } + return items, echo, warnings + + if kind == "edit": + rev = frame_spec.get("rev", "HEAD") + paths = _git_diff_paths(repo, str(rev)) + events = store.list_change_events(repo) + path_set = set(paths) + items = [_item_from_event(e) for e in events if e.get("path") in path_set] + if not paths: + why = reason( + "unresolved_input", + cause=f"git diff {rev!r} reported no changed tracked paths (clean work tree)", + fix="make (or stage) an edit, then re-run the edit frame against HEAD", + ) + elif not items: + why = reason( + "partial", + cause=( + f"git diff {rev!r} changed {len(paths)} path(s), but warpline has no " + "recorded change events for them yet" + ), + fix="ingest/backfill the repo so the edited paths' entities are recorded", + ) + else: + why = reason("clean") + echo = { + "kind": "edit", + "rev": rev, + "diff_paths": paths, + "weft_reason_class": why["reason_class"], + "weft_reason": why, + } + return items, echo, warnings + + if kind == "branch_sha": + # Squash-merge / rewritten-history fallback (M4): until detected_branch is + # surfaced on a frozen read path and the work-session episode boundary is + # ratified, a branch_sha frame resolves through the rev-range built from + # its sha (or branch ref) WITH an honest fallback warning — branch + + # episode-boundary, never a false-precise commit set. + branch = frame_spec.get("branch") + sha = frame_spec.get("sha") + fallback_range = frame_spec.get("rev_range") + if not isinstance(fallback_range, str): + fallback_range = f"{sha}~1..{sha}" if isinstance(sha, str) and sha else None + warnings.append( + "branch_sha frame fell back to a rev-range resolution: detected_branch is not yet " + "surfaced on a frozen read path and the work-session episode boundary is unratified" + ) + commit_shas, bad_detail = _safe_rev_range_commits(repo, fallback_range) + events = store.list_change_events(repo, commit_shas=commit_shas) + items = [_item_from_event(e) for e in events] + git_note = f"; git: {bad_detail}" if bad_detail else "" + why = reason( + "partial", + cause=( + f"branch_sha frame (branch={branch!r}, sha={sha!r}) resolved via a " + f"rev-range fallback ({fallback_range!r}); episode-boundary keying is " + f"pending{git_note}" + ), + fix=( + "ratify the work-session episode boundary and wire detected_branch onto a " + "frozen read path so branch_sha can resolve to a precise episode" + ), + ) + echo = { + "kind": "branch_sha", + "branch": branch, + "sha": sha, + "fallback_rev_range": fallback_range, + "weft_reason_class": why["reason_class"], + "weft_reason": why, + } + return items, echo, warnings + + # Unknown frame kind — honest rejection, never a silent empty. + why = reason( + "rejected", + cause=f"unknown COP frame kind {kind!r}", + fix=f"use one of {', '.join(FRAME_KINDS)}", + ) + echo = { + "kind": kind, + "weft_reason_class": why["reason_class"], + "weft_reason": why, + } + return [], echo, warnings + + +def compose_temporal_cop( + items: list[dict[str, Any]], + frame: dict[str, Any], + *, + work_client: WorkClient | None = None, + risk_client: RiskClient | None = None, + legis_client: LegisClient | None = None, +) -> dict[str, Any]: + """Compose the temporal COP for a resolved frame's ``items``. + + Reuses the three federation consults VERBATIM (R9) and wraps them in a + coverage block. Returns + ``{"members", "entities", "coverage", "frame"}`` where: + + * ``members`` — every member in :data:`FEDERATION_MEMBERS` with its own + weft-reason and entity count (NEVER omitted); + * ``entities`` — per-locator facts a member actually returned; + * ``coverage`` — ``{members_consulted, members_total, dark_sectors}``; a + ``dark_sector`` is any member whose reason class is NOT ``clean`` + (``disabled`` = no transport, ``unreachable`` = transport raised). This is + the load-bearing coverage-honesty surface: an unmonitored domain is named, + never read as a clean empty. + * ``frame`` — the frame echo from :func:`resolve_frame` (carries + ``weft_reason_class``). + + ``consult_federation`` is NOT modified; this composes the same three consults + and adds coverage. Read-time, never-gating, no-mirror — writes nothing. + """ + + work_by, work_reason = _consult_filigree(items, work_client) + risk_by, risk_reason = _consult_wardline(items, risk_client) + gov_by, gov_reason = _consult_legis(items, legis_client) + + member_reasons = { + "filigree": (work_reason, work_by), + "wardline": (risk_reason, risk_by), + "legis": (gov_reason, gov_by), + } + members: dict[str, Any] = {} + dark_sectors: list[dict[str, Any]] = [] + consulted = 0 + for name in FEDERATION_MEMBERS: + member_reason, by_locator = member_reasons[name] + members[name] = {"weft_reason": member_reason, "entity_count": len(by_locator)} + if member_reason.get("reason_class") == "clean": + consulted += 1 + else: + dark_sectors.append( + { + "member": name, + "reason_class": member_reason.get("reason_class"), + "cause": member_reason.get("cause"), + "fix": member_reason.get("fix"), + } + ) + + entities: list[dict[str, Any]] = [] + for item in items: + entity = item.get("entity", {}) + locator = entity.get("locator") + if not isinstance(locator, str) or not locator: + continue + work = work_by.get(locator, []) + risk = risk_by.get(locator, []) + gov = gov_by.get(locator, []) + if not (work or risk or gov): + continue + entities.append( + { + "locator": locator, + "sei": entity.get("sei"), + "work": work, + "risk": risk, + "governance": gov, + } + ) + + coverage = { + "members_consulted": consulted, + "members_total": len(FEDERATION_MEMBERS), + "dark_sectors": dark_sectors, + } + return { + "members": members, + "entities": entities, + "coverage": coverage, + "frame": frame, + } diff --git a/src/warpline/store.py b/src/warpline/store.py index 2d52424..776ce71 100644 --- a/src/warpline/store.py +++ b/src/warpline/store.py @@ -662,17 +662,32 @@ def append_change_event( self.conn.commit() def list_change_events( - self, repo: Path, commit_shas: set[str] | None = None + self, + repo: Path, + commit_shas: set[str] | None = None, + *, + since: str | None = None, + until: str | None = None, ) -> list[dict[str, object]]: + # ``since``/``until`` (inclusive, on ``changed_at``) are the time_window + # frame's store support (M4): the COP ``time_window`` frame kind resolves + # change events by author-time bounds. Additive optional kwargs — existing + # callers (commit-sha or unfiltered reads) are unaffected. repo_id = self._repo_id(repo) params: list[object] = [repo_id] - commit_filter = "" + clauses = "" if commit_shas is not None: if not commit_shas: return [] placeholders = ",".join("?" for _ in commit_shas) - commit_filter = f" AND ce.commit_sha IN ({placeholders})" + clauses += f" AND ce.commit_sha IN ({placeholders})" params.extend(sorted(commit_shas)) + if since is not None: + clauses += " AND ce.changed_at >= ?" + params.append(since) + if until is not None: + clauses += " AND ce.changed_at <= ?" + params.append(until) rows = self.conn.execute( f""" SELECT ce.id AS change_event_id, ce.commit_sha, ce.path, ce.change_kind, @@ -683,7 +698,7 @@ def list_change_events( FROM change_events ce JOIN entity_keys ek ON ek.id = ce.entity_key_id WHERE ce.repo_id = ? - {commit_filter} + {clauses} ORDER BY ce.changed_at, ce.id """, params, diff --git a/tests/integration/test_reconstruction_demo.py b/tests/integration/test_reconstruction_demo.py new file mode 100644 index 0000000..3690372 --- /dev/null +++ b/tests/integration/test_reconstruction_demo.py @@ -0,0 +1,162 @@ +"""PDR-0025 cond 1 / E1 — the squash-merge reconstruction demo (Rung 2 Track D). + +This is the LOAD-BEARING acceptance fixture for the capability ladder: a +clean-history fixture does NOT satisfy PDR-0025. We build a real squash-merge: + + * N feature-branch commits, each ingested into warpline's store (so their + change events are on record under the ORIGINAL feature SHAs); + * ``git merge --squash`` collapses those N commits into ONE new mainline commit + with a brand-new SHA (the original feature SHAs are NOT mainline ancestors); + * the feature branch is deleted (its commits become unreferenced). + +The demo runs end-to-end through the NON-FROZEN internal ``warpline cop`` CLI +verb (the PUBLIC COP MCP tool is interface-pending) and asserts the bundle +EITHER reconstructs the change set OR honestly degrades with a populated +``weft_reason_class`` — never a confident-empty that reads as "nothing changed". + +Two paths are exercised: + 1. A ``sei`` frame keyed on an entity warpline ingested from the feature branch + STILL reconstructs the timeline (the entity identity survives the squash — + reconstruction via recorded events, not via the rewritten SHAs). + 2. A ``rev_range`` frame keyed on the NEW squashed mainline SHA finds no + recorded events (warpline recorded the originals) and degrades to an honest + ``unresolved_input`` reason class — the bundle is useful (it names why it is + empty and how to recover), never a false clean. +""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +from conftest import git as _git +from conftest import init_repo as _init_repo + +from warpline.git import ingest_commit +from warpline.store import WarplineStore, default_store_path + + +def _commit_file(repo: Path, name: str, body: str) -> str: + (repo / name).write_text(body, encoding="utf-8") + _git(repo, "add", name) + _git(repo, "commit", "-m", f"feat: {name}") + return _git(repo, "rev-parse", "HEAD") + + +def _run_cop(repo: Path, *args: str) -> dict[str, object]: + proc = subprocess.run( + [sys.executable, "-m", "warpline.cli", "cop", "--repo", str(repo), "--json", *args], + check=True, + text=True, + capture_output=True, + ) + payload = json.loads(proc.stdout) + assert isinstance(payload, dict) + return payload + + +def _build_squash_merge_fixture(tmp_path: Path) -> tuple[Path, str, str, str]: + """Return (repo, feature_locator, squashed_sha, original_feature_tip). + + Mainline gets one base commit; a feature branch gets N commits (each + ingested); the branch is squash-merged into mainline (new SHA) and deleted. + """ + + repo = _init_repo(tmp_path) + # Mainline base commit. + _commit_file(repo, "base.py", "BASE = 0\n") + main_branch = _git(repo, "rev-parse", "--abbrev-ref", "HEAD") + + # Feature branch: N commits, each ingested under its ORIGINAL feature SHA. + _git(repo, "checkout", "-b", "feature") + feature_shas: list[str] = [] + feature_shas.append(_commit_file(repo, "feature.py", "def widget():\n return 1\n")) + feature_shas.append(_commit_file(repo, "feature.py", "def widget():\n return 2\n")) + feature_shas.append(_commit_file(repo, "feature.py", "def widget():\n return 3\n")) + original_feature_tip = feature_shas[-1] + + feature_locator = "" + with WarplineStore.open(default_store_path(repo)) as store: + for sha in feature_shas: + ingest_commit(store, repo, sha) + events = store.list_change_events(repo) + for event in events: + if event.get("path") == "feature.py": + feature_locator = str(event["locator"]) + break + assert feature_locator, "feature.py entity should have been ingested" + + # Squash-merge: collapse the N feature commits into ONE new mainline SHA. + _git(repo, "checkout", main_branch) + _git(repo, "merge", "--squash", "feature") + _git(repo, "commit", "-m", "feat: squashed feature") + squashed_sha = _git(repo, "rev-parse", "HEAD") + # Delete the feature branch — its original SHAs are now unreferenced. + _git(repo, "branch", "-D", "feature") + + assert squashed_sha != original_feature_tip + return repo, feature_locator, squashed_sha, original_feature_tip + + +def test_squash_merge_sei_frame_still_reconstructs(tmp_path: Path) -> None: + repo, feature_locator, _squashed, _tip = _build_squash_merge_fixture(tmp_path) + # The entity identity survives the squash: a sei/locator frame reconstructs + # the recorded timeline even though the feature SHAs are gone from mainline. + payload = _run_cop(repo, "--frame", "sei", "--sei", feature_locator) + assert payload["non_frozen"] is True + frame = payload["frame"] + assert isinstance(frame, dict) + assert frame["weft_reason_class"] == "clean" + items = payload["items"] + assert isinstance(items, list) + assert items, "the squashed feature entity must still reconstruct from recorded events" + assert all(item["entity"]["path"] == "feature.py" for item in items) + + +def test_squash_merge_rev_range_on_new_sha_degrades_honestly(tmp_path: Path) -> None: + repo, _locator, squashed_sha, _tip = _build_squash_merge_fixture(tmp_path) + # A rev-range keyed on the NEW squashed SHA finds no recorded events (warpline + # recorded the ORIGINAL feature SHAs). The bundle must degrade HONESTLY — a + # populated weft_reason_class with cause+fix — never a confident-empty. + payload = _run_cop( + repo, "--frame", "rev_range", "--rev-range", f"{squashed_sha}~1..{squashed_sha}" + ) + items = payload["items"] + assert isinstance(items, list) + assert items == [], "the squashed SHA has no recorded warpline events" + frame = payload["frame"] + assert isinstance(frame, dict) + assert frame["weft_reason_class"] == "unresolved_input" + weft_reason = frame["weft_reason"] + assert isinstance(weft_reason, dict) + assert weft_reason["cause"], "honest degradation must name the cause" + assert weft_reason["fix"], "honest degradation must recruit a fix" + + +def test_squash_merge_branch_sha_fallback_is_useful(tmp_path: Path) -> None: + repo, _locator, squashed_sha, _tip = _build_squash_merge_fixture(tmp_path) + # The branch_sha frame is the ratified squash-merge fallback: branch + + # episode-boundary via a rev-range, with an honest 'partial' reason class and + # a fallback warning. The COP must still name every member in coverage. + payload = _run_cop( + repo, + "--frame", + "branch_sha", + "--branch", + "feature", + "--sha", + squashed_sha, + ) + frame = payload["frame"] + assert isinstance(frame, dict) + assert frame["weft_reason_class"] == "partial" + warnings = payload["warnings"] + assert isinstance(warnings, list) + assert any("fell back" in str(w) for w in warnings) + coverage = payload["coverage"] + assert isinstance(coverage, dict) + assert coverage["members_total"] == 3 + dark = {d["member"] for d in coverage["dark_sectors"]} + assert dark == {"filigree", "wardline", "legis"} diff --git a/tests/test_cop.py b/tests/test_cop.py new file mode 100644 index 0000000..e816498 --- /dev/null +++ b/tests/test_cop.py @@ -0,0 +1,187 @@ +"""Rung 2 Track D — temporal COP internals (cop.py). + +Locks ``resolve_frame`` per frame kind (rev_range / time_window / sei / +branch_sha / edit) and ``compose_temporal_cop``'s coverage / dark_sectors +honesty. The PUBLIC COP MCP/CLI tool is interface-pending and NOT wired here; +these test the internals only. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from conftest import commit as _commit +from conftest import init_repo as _init_repo + +from warpline.cop import compose_temporal_cop, resolve_frame +from warpline.git import ingest_commit +from warpline.store import WarplineStore, default_store_path + + +def _seed(repo: Path, name: str, body: str) -> str: + sha = _commit(repo, name, body) + with WarplineStore.open(default_store_path(repo)) as store: + ingest_commit(store, repo, sha) + return sha + + +def test_resolve_frame_rev_range_resolves_items(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + sha1 = _seed(repo, "a.py", "def f():\n return 1\n") + sha2 = _seed(repo, "b.py", "def g():\n return 2\n") + with WarplineStore.open(default_store_path(repo)) as store: + items, echo, warnings = resolve_frame( + store, repo, {"kind": "rev_range", "rev_range": f"{sha1}..{sha2}"} + ) + assert echo["kind"] == "rev_range" + assert echo["weft_reason_class"] == "clean" + assert warnings == [] + paths = {item["entity"]["path"] for item in items} + assert "b.py" in paths + + +def test_resolve_frame_rev_range_vanished_shas_degrades_honestly(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + _seed(repo, "a.py", "x = 1\n") + with WarplineStore.open(default_store_path(repo)) as store: + # A rev-range naming SHAs that resolve to no recorded events (the + # squash-merge collapse failure path) degrades to unresolved_input. + items, echo, _warnings = resolve_frame( + store, + repo, + {"kind": "rev_range", "rev_range": "0000000000000000000000000000000000000000..HEAD~99"}, + ) + assert items == [] + assert echo["weft_reason_class"] == "unresolved_input" + assert echo["weft_reason"]["cause"] + assert echo["weft_reason"]["fix"] + + +def test_resolve_frame_time_window(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + _seed(repo, "a.py", "x = 1\n") + with WarplineStore.open(default_store_path(repo)) as store: + all_events = store.list_change_events(repo) + when = str(all_events[0]["changed_at"]) + items, echo, _ = resolve_frame( + store, repo, {"kind": "time_window", "since": when, "until": when} + ) + empty_items, empty_echo, _ = resolve_frame( + store, + repo, + {"kind": "time_window", "since": "2099-01-01T00:00:00+00:00", "until": None}, + ) + assert echo["kind"] == "time_window" + assert echo["weft_reason_class"] == "clean" + assert items + assert empty_items == [] + assert empty_echo["weft_reason_class"] == "unresolved_input" + + +def test_resolve_frame_sei_uses_timeline(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + _seed(repo, "a.py", "def f():\n return 1\n") + with WarplineStore.open(default_store_path(repo)) as store: + events = store.list_change_events(repo) + locator = str(events[0]["locator"]) + items, echo, _ = resolve_frame(store, repo, {"kind": "sei", "sei": locator}) + miss_items, miss_echo, _ = resolve_frame( + store, repo, {"kind": "sei", "sei": "warpline:does-not-exist"} + ) + assert echo["kind"] == "sei" + assert echo["weft_reason_class"] == "clean" + assert items + assert miss_items == [] + assert miss_echo["weft_reason_class"] == "unresolved_input" + + +def test_resolve_frame_edit_uses_git_diff(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + _seed(repo, "a.py", "x = 1\n") + # Uncommitted edit to a tracked file → git diff HEAD reports it. + (repo / "a.py").write_text("x = 2\n", encoding="utf-8") + with WarplineStore.open(default_store_path(repo)) as store: + items, echo, _ = resolve_frame(store, repo, {"kind": "edit", "rev": "HEAD"}) + assert echo["kind"] == "edit" + assert "a.py" in echo["diff_paths"] + # The edited path matches a recorded change event → clean resolution. + assert echo["weft_reason_class"] == "clean" + assert any(item["entity"]["path"] == "a.py" for item in items) + + +def test_resolve_frame_edit_clean_tree_is_unresolved_input(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + _seed(repo, "a.py", "x = 1\n") + with WarplineStore.open(default_store_path(repo)) as store: + items, echo, _ = resolve_frame(store, repo, {"kind": "edit", "rev": "HEAD"}) + assert items == [] + assert echo["weft_reason_class"] == "unresolved_input" + + +def test_resolve_frame_branch_sha_emits_fallback_warning(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + _seed(repo, "base.py", "BASE = 0\n") + sha = _seed(repo, "a.py", "x = 1\n") # has a parent → sha~1..sha is valid + with WarplineStore.open(default_store_path(repo)) as store: + items, echo, warnings = resolve_frame( + store, repo, {"kind": "branch_sha", "branch": "main", "sha": sha} + ) + assert echo["kind"] == "branch_sha" + assert echo["weft_reason_class"] == "partial" + assert any("fell back" in w for w in warnings) + assert echo["fallback_rev_range"] == f"{sha}~1..{sha}" + assert items + + +def test_resolve_frame_unknown_kind_is_rejected(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + with WarplineStore.open(default_store_path(repo)) as store: + items, echo, _ = resolve_frame(store, repo, {"kind": "nonsense"}) + assert items == [] + assert echo["weft_reason_class"] == "rejected" + + +def test_compose_temporal_cop_lists_every_member_as_dark_sector(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + _seed(repo, "a.py", "def f():\n return 1\n") + with WarplineStore.open(default_store_path(repo)) as store: + items, echo, _ = resolve_frame(store, repo, {"kind": "rev_range", "rev_range": None}) + # No transports wired → all three members are dark sectors (disabled), + # NEVER silently dropped to look like a clean empty. + cop = compose_temporal_cop(items, echo) + assert set(cop["members"]) == {"filigree", "wardline", "legis"} + coverage = cop["coverage"] + assert coverage["members_total"] == 3 + assert coverage["members_consulted"] == 0 + dark = {d["member"] for d in coverage["dark_sectors"]} + assert dark == {"filigree", "wardline", "legis"} + for sector in coverage["dark_sectors"]: + assert sector["reason_class"] == "disabled" + assert sector["cause"] and sector["fix"] + assert cop["frame"] is echo + + +class _StubWork: + """Reachable filigree transport that returns associations for any SEI.""" + + def associations(self, sei: str) -> list[dict[str, Any]]: + return [{"issue_id": "WL-1", "sei": sei}] + + +def test_compose_temporal_cop_consulted_member_is_not_dark(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + _seed(repo, "a.py", "def f():\n return 1\n") + with WarplineStore.open(default_store_path(repo)) as store: + events = store.list_change_events(repo) + # Force a SEI onto the item so the filigree consult has something to read. + items, echo, _ = resolve_frame(store, repo, {"kind": "rev_range", "rev_range": None}) + for item in items: + item["entity"]["sei"] = "loomweave:eid:demo" + cop = compose_temporal_cop(items, echo, work_client=_StubWork()) + assert cop["members"]["filigree"]["weft_reason"]["reason_class"] == "clean" + dark = {d["member"] for d in cop["coverage"]["dark_sectors"]} + assert "filigree" not in dark + assert dark == {"wardline", "legis"} + assert cop["coverage"]["members_consulted"] == 1 + assert events # sanity: events were ingested