From c8f3d4ef78430a06212ff65e51d85a44a1543116 Mon Sep 17 00:00:00 2001
From: Junghwan <70629228+shaun0927@users.noreply.github.com>
Date: Wed, 20 May 2026 12:02:48 +0900
Subject: [PATCH 1/5] =?UTF-8?q?docs+test(agentos):=20IR=20=E2=86=94=20proj?=
 =?UTF-8?q?ection=20mapping=20contract?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refs #1142, #956, #946. Locks the boundary contract documented in
workflow-ir-v1.md.
---
 .../agentos/workflow-ir-projection-mapping.md | 151 +++++++
 .../test_ir_projection_consistency.py         | 410 ++++++++++++++++++
 2 files changed, 561 insertions(+)
 create mode 100644 docs/agentos/workflow-ir-projection-mapping.md
 create mode 100644 tests/integration/test_ir_projection_consistency.py

diff --git a/docs/agentos/workflow-ir-projection-mapping.md b/docs/agentos/workflow-ir-projection-mapping.md
new file mode 100644
index 000000000..2d0ee56c8
--- /dev/null
+++ b/docs/agentos/workflow-ir-projection-mapping.md
@@ -0,0 +1,151 @@
+# Workflow IR ↔ Projection Mapping Contract
+
+This document pins the **read-only mapping** between the #956 Workflow IR and
+the #946 projection vocabulary. It is a contract that downstream Wave 3+ work
+(PR F dispatch wiring, S4 StepSnapshot, conformance harness extensions, etc.)
+can rely on without re-deriving the boundary each time.
+
+## Purpose & scope
+
+This file is a **mapping reference**, not a schema. It does not introduce new
+types, new flags, new event families, or new persistence. It restates how
+identifiers and lifecycle vocabulary on either side already line up under the
+boundaries fixed by:
+
+- [`workflow-ir-v1.md`](./workflow-ir-v1.md) — what the Workflow IR owns
+  (planning graph, validation, lifecycle events) and what it must not embed
+  (projection records, dispatch, persistence).
+- [`projection-v1-scope.md`](./projection-v1-scope.md) — what the projection
+  vocabulary owns (`RunRecord` / `StageRecord` / `StepRecord` /
+  `ArtifactRecord` / `VerdictRecord`) as a rebuildable read model over the
+  EventStore.
+
+The locked boundary paragraph in `workflow-ir-v1.md` is treated as
+authoritative: *"The default boundary fixture must stay local and
+deterministic: it may pair a validated `WorkflowSpec` with synthetic
+`EventStore` rows to prove source-event linkage, but it must not add
+dispatch, cache, persistence, or projection-record embedding to the IR."*
+
+Every consistency test that ships with this mapping doc must obey that
+rule — see `tests/integration/test_ir_projection_consistency.py` for the
+canonical fixture pattern.
+
+## Identifier mapping
+
+The Workflow IR plans work as a graph of `WorkflowNode` instances connected
+by `WorkflowEdge` instances. The #946 projection observes the work that was
+emitted to the journal afterwards. The two sides share identifiers in the
+following way; they do **not** share storage.
+
+### `WorkflowNode.node_id` → projection step/verdict identity
+
+| Node owner / kind | Projection target | How identifiers line up |
+| --- | --- | --- |
+| `NodeOwner.AGENT`, `NodeOwner.PLUGIN`, `NodeOwner.HARNESS` with tool/LLM work | `StepRecord` | Runtime callers set `event.data["call_id"] == WorkflowNode.node_id` on the paired `tool.call.started` / `tool.call.returned` (or `llm.call.requested` / `llm.call.returned`) rows. `ProjectionBuilder._stable_step_id(source_key, family, call_id)` then yields a deterministic `StepRecord.step_id` keyed off the node id. The IR side never stores the step id; it stays purely derivable from journal rows. |
+| `NodeOwner.AGENT`, `NodeOwner.PLUGIN` producing acceptance evidence | `StepRecord.ac_id` | When the node is the acceptance-criterion anchor for the work, `event.data["ac_id"]` carries the same identifier the IR plan uses for that AC (typically `WorkflowNode.node_id` or a metadata-attached AC label). `ProjectionBuilder._extract_ac_id` lifts it onto the projected `StepRecord` without invention. |
+| `NodeOwner.VERIFIER` | `VerdictRecord` | The verifier's `harness.verdict.recorded` / `evaluation.verdict.recorded` event sets `event.data["scope"] = "ac"` and `event.data["ac_id"] == WorkflowNode.node_id` for the AC the verifier judged. `_verdict_from_event` projects that into `VerdictRecord.ac_id`. Run-scope verdicts (`scope == "run"`) project against the run, not a node. |
+| `NodeOwner.HUMAN_GATE` | _(not projected in v1)_ | HITL WAIT/RESUME authority lives under #960 and is explicitly deferred by `projection-v1-scope.md`. The mapping leaves these node ids dangling on purpose; the projection has no record kind for them today. |
+| `NodeKind.TERMINAL` | `RunRecord` end | Reaching a terminal node corresponds to a terminal `WorkflowLifecycleEventType.RUN_COMPLETED` / `RUN_FAILED` / `RUN_CANCELLED` event, which the run-level `VerdictRecord` (scope `"run"`) projects. The terminal node id itself is not projected as a separate record. |
+
+### `WorkflowEdge.edge_id` → projection event-pair linkage
+
+`WorkflowEdge` instances are **not** projected as a dedicated projection
+record kind in v1. Their observability surface is the source-event pair on
+either side of the transition:
+
+- `WorkflowLifecycleEventType.EDGE_TRAVERSED` rows carry the `edge_id` and
+  the attempt number. They are stored as journal events, not as projection
+  rows.
+- The projection's `StepRecord.source_event_ids` tuple on the predecessor
+  step's `*.returned` event and the successor step's `*.started` event is
+  the read-model evidence that the edge was traversed.
+- A consumer that wants edge-grained read state can join the journal
+  (`edge_id` field on lifecycle events) against the projection's
+  `source_event_ids` without the projection needing a new `EdgeRecord`
+  kind. **This is intentional.** v1 does not add one.
+
+### Run / stage anchors
+
+- `WorkflowSpec.spec_id` is the lifecycle `workflow_id`. It is **not** a
+  projection identifier; the projection keys runs off `seed_id` plus an
+  execution / session anchor (see `_derive_projection_source_key`).
+- A single `WorkflowSpec` execution maps to exactly one `RunRecord` and at
+  least one `StageRecord` (default kind `StageKind.EXECUTE`). Richer stage
+  detection is additive follow-up work explicitly deferred by
+  `projection-v1-scope.md`.
+
+## Lifecycle event → projection mapping
+
+The Workflow IR's lifecycle vocabulary is bounded
+(`WorkflowLifecycleEventType` in
+`src/ouroboros/orchestrator/workflow_lifecycle.py`). Each lifecycle event
+type is observable through the existing projection vocabulary as follows.
+
+| `WorkflowLifecycleEventType` | Projection effect | Linked via |
+| --- | --- | --- |
+| `workflow.run.created` | Opens a `RunRecord`. `started_at` is anchored to the earliest projected event timestamp. | `RunRecord.metadata` is the only place a consumer may attach a `workflow_id` provenance label; the record is not extended in v1. |
+| `workflow.node.scheduled` | Reserves a future `StepRecord` slot. No `StepRecord` is emitted until a paired `tool.call.*` or `llm.call.*` event exists in the journal. | `StepRecord.source_event_ids` will reference the `*.started` and `*.returned` rows; the scheduled lifecycle row is *not* embedded. |
+| `workflow.node.started` | Emits the `*.started` half of the projected `StepRecord`. The dangling step has `ended_at=None` until the matching returned event arrives. | `StepRecord.source_event_ids = (started_event.id,)` until pairing completes. |
+| `workflow.node.completed` | Pairs the `StepRecord` with `ended_at` and `ok` derived from the returned event. | `StepRecord.source_event_ids = (started_event.id, returned_event.id)`. |
+| `workflow.node.failed` | Same as `completed`, with `StepRecord.ok = False`. The node's `reason_code` lives on the lifecycle event and is **not** copied into the projection. | `StepRecord.source_event_ids`. |
+| `workflow.node.retried` | Re-opens the node slot. The previous `StepRecord` retains its `step_id`; the next attempt produces a new `StepRecord` keyed on the same `node_id` (via `call_id`) plus a new `attempt` number. | `StepRecord.source_event_ids` for each attempt. |
+| `workflow.edge.traversed` | _Not projected as a record._ Observable via `EDGE_TRAVERSED` lifecycle rows in the journal. | Predecessor `StepRecord` `source_event_ids` cover the read-model evidence. |
+| `workflow.checkpoint.saved` | _Not projected in v1._ Checkpoint refs are `RunSnapshotRecord` material in a later projection slice; the mapping doc lists this row deliberately so future PRs know where it lands. | Deferred per `projection-v1-scope.md`. |
+| `workflow.run.completed` | Closes the `RunRecord` (`ended_at` is the terminal lifecycle row timestamp). If the runtime also emitted a run-scope verdict event, `RunRecord.verdict_id` points at the projected `VerdictRecord`. | `VerdictRecord.evidence_event_ids` for the run verdict. |
+| `workflow.run.failed` | Same as `completed`; the projected run-scope verdict (if any) has `outcome=FAIL`. | `VerdictRecord.evidence_event_ids`. |
+| `workflow.run.cancelled` | Same as `completed`; the projected run-scope verdict (if any) has `outcome=CANCELLED`. | `VerdictRecord.evidence_event_ids`. |
+
+The mapping table is **not exhaustive in either direction**. Projection
+event families that have no lifecycle equivalent (for example,
+`harness.artifact.recorded`) are governed by `projection-v1-scope.md`
+alone; lifecycle events that have no projection equivalent
+(`workflow.checkpoint.saved`, `workflow.edge.traversed`) are governed by
+`workflow-ir-v1.md` alone. This document only locks the **intersection**.
+
+## Anti-actions
+
+This mapping doc explicitly **does not** introduce or imply:
+
+1. **No schema change** to either `src/ouroboros/orchestrator/workflow_ir.py`
+   or `src/ouroboros/harness/projection.py`. Both surfaces stay at their
+   currently published `*_SCHEMA_VERSION`.
+2. **No new field or flag** on any projection record. `legacy_inferred`,
+   `source_event_ids`, `ac_id`, and `metadata` are the only surfaces a
+   consistency test may rely on. New flags (`workflow_node_id`,
+   `edge_id`, etc.) are out of scope.
+3. **No live dispatch.** Workflow IR fixtures used to prove this mapping
+   stay local and deterministic per the locked boundary paragraph in
+   `workflow-ir-v1.md`. No `parallel_executor` call, no agent spawn, no
+   plugin command execution.
+4. **No projection-record embedding inside the IR.** `WorkflowNode` and
+   `WorkflowEdge` continue to carry only their planning vocabulary; they
+   do not reference `step_id`, `run_id`, or `verdict_id`.
+5. **No IR embedding inside projection records.** `StepRecord.metadata`
+   may carry `workflow_node_id` only when the journal event already
+   carries it; the projection does not invent or backfill IR identifiers.
+6. **No persistence write.** This contract is observed through the
+   existing EventStore + projection builder. The mapping doc and its
+   tests must not create migrations, caches, or new tables.
+7. **No new event family.** The lifecycle event vocabulary
+   (`WorkflowLifecycleEventType`) and the projection event-family set
+   (`_TOOL_STARTED`, `_TOOL_RETURNED`, `_LLM_REQUESTED`, `_LLM_RETURNED`,
+   `_ARTIFACT_RECORDED_TYPES`, `_VERDICT_RECORDED_TYPES`) are both
+   closed sets at v1. Adding to either is governed by its own canonical
+   issue, not this mapping doc.
+8. **No HITL / plugin / evidence schema authority.** The boundary tables
+   in `workflow-ir-v1.md` and `projection-v1-scope.md` allocate those to
+   #960, #939, and #830/#978 respectively. This document defers to them.
+
+## Verification
+
+The mapping is exercised by
+`tests/integration/test_ir_projection_consistency.py`, which builds a
+small validated `WorkflowSpec` (fan-out + terminal), emits synthetic
+`EventStore` rows that obey the rules above, and asserts that the
+projection's identifiers line up with the IR's planned identifiers
+exactly. A negative test pins the documented behavior when synthetic
+lifecycle events reference a node id that is not in the spec: the
+projection still builds without error (because the projection builder is
+spec-agnostic by design), and the mismatch is surfaced by the IR side's
+existing `validate_workflow_lifecycle_conformance` helper, which emits an
+`unknown_node_id` conformance issue. No new flag is added to either side.
diff --git a/tests/integration/test_ir_projection_consistency.py b/tests/integration/test_ir_projection_consistency.py
new file mode 100644
index 000000000..98daca80e
--- /dev/null
+++ b/tests/integration/test_ir_projection_consistency.py
@@ -0,0 +1,410 @@
+"""Integration tests for the IR ↔ projection mapping contract.
+
+These tests pin the read-only mapping documented in
+``docs/agentos/workflow-ir-projection-mapping.md`` between the #956
+Workflow IR and the #946 projection vocabulary. They are deterministic,
+offline, and never dispatch work, persist state, or open the network.
+
+Per the locked boundary paragraph in ``docs/agentos/workflow-ir-v1.md``:
+the default boundary fixture pairs a validated ``WorkflowSpec`` with
+synthetic ``EventStore`` rows to prove source-event linkage, and it must
+not add dispatch, cache, persistence, or projection-record embedding to
+the IR. No production source file under ``src/`` is modified to make
+these tests pass.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime, timedelta
+
+from ouroboros.events.base import BaseEvent
+from ouroboros.harness.projection import StepKind, VerdictOutcome
+from ouroboros.harness.projection_builder import (
+    ProjectionBuilder,
+    _stable_step_id,
+)
+from ouroboros.orchestrator.workflow_ir import (
+    EdgeKind,
+    NodeKind,
+    NodeOwner,
+    SourceKind,
+    WorkflowEdge,
+    WorkflowNode,
+    WorkflowSpec,
+    validate_workflow,
+)
+from ouroboros.orchestrator.workflow_lifecycle import (
+    WorkflowLifecycleEvent,
+    WorkflowLifecycleEventType,
+    validate_workflow_lifecycle_conformance,
+)
+
+# ---------------------------------------------------------------------------
+# Deterministic fixture helpers (local, offline, no persistence).
+# ---------------------------------------------------------------------------
+
+
+def _fixture_spec() -> WorkflowSpec:
+    """Build a small validated WorkflowSpec: fan-out -> two tasks -> terminal.
+
+    The graph deliberately exercises the three identifier roles the
+    mapping doc covers:
+
+    * ``plan_node``      — harness fan-out (no projection record by itself,
+                            but its lifecycle events anchor the run).
+    * ``run_tool_node``  — agent task that projects into ``StepRecord``.
+    * ``judge_ac_node``  — verifier task that projects into
+                            ``VerdictRecord`` with ``ac_id``.
+    * ``done_node``      — terminal that closes the run.
+    """
+
+    plan_node = WorkflowNode(
+        node_id="plan_node",
+        kind=NodeKind.FAN_OUT,
+        owner=NodeOwner.HARNESS,
+        name="plan",
+    )
+    run_tool_node = WorkflowNode(
+        node_id="run_tool_node",
+        kind=NodeKind.TASK,
+        owner=NodeOwner.AGENT,
+        name="run_tool",
+        input_schema_ref="agent.input.v1",
+        evidence_schema_ref="agent.evidence.v1",
+    )
+    judge_ac_node = WorkflowNode(
+        node_id="judge_ac_node",
+        kind=NodeKind.TASK,
+        owner=NodeOwner.VERIFIER,
+        name="judge_ac",
+        evidence_schema_ref="verifier.verdict.v1",
+    )
+    done_node = WorkflowNode(
+        node_id="done_node",
+        kind=NodeKind.TERMINAL,
+        owner=NodeOwner.HARNESS,
+        name="done",
+    )
+
+    edges = (
+        WorkflowEdge(
+            edge_id="edge_plan_to_tool",
+            source="plan_node",
+            target="run_tool_node",
+            kind=EdgeKind.FAN_OUT,
+        ),
+        WorkflowEdge(
+            edge_id="edge_plan_to_judge",
+            source="plan_node",
+            target="judge_ac_node",
+            kind=EdgeKind.FAN_OUT,
+        ),
+        WorkflowEdge(
+            edge_id="edge_tool_to_done",
+            source="run_tool_node",
+            target="done_node",
+        ),
+        WorkflowEdge(
+            edge_id="edge_judge_to_done",
+            source="judge_ac_node",
+            target="done_node",
+        ),
+    )
+
+    spec = WorkflowSpec(
+        spec_id="wfspec_ir_proj_fixture",
+        source=SourceKind.SYNTHETIC,
+        nodes=(plan_node, run_tool_node, judge_ac_node, done_node),
+        edges=edges,
+    )
+
+    # Sanity: the fixture must be a valid spec so the mapping is exercised
+    # against a graph the IR side would actually accept.
+    validation = validate_workflow(spec)
+    assert validation.ok, validation.errors
+    return spec
+
+
+def _at(seconds: int) -> datetime:
+    """Deterministic UTC timestamp anchored to a fixed instant."""
+    return datetime(2026, 5, 19, 12, 0, 0, tzinfo=UTC) + timedelta(seconds=seconds)
+
+
+def _tool_started(*, call_id: str, tool_name: str, when: datetime) -> BaseEvent:
+    return BaseEvent(
+        id=f"evt_{call_id}_started",
+        type="tool.call.started",
+        timestamp=when,
+        aggregate_type="execution",
+        aggregate_id="exec_ir_proj",
+        data={"call_id": call_id, "tool_name": tool_name, "ac_id": call_id},
+    )
+
+
+def _tool_returned(
+    *, call_id: str, tool_name: str, when: datetime, is_error: bool = False
+) -> BaseEvent:
+    return BaseEvent(
+        id=f"evt_{call_id}_returned",
+        type="tool.call.returned",
+        timestamp=when,
+        aggregate_type="execution",
+        aggregate_id="exec_ir_proj",
+        data={
+            "call_id": call_id,
+            "tool_name": tool_name,
+            "is_error": is_error,
+            "duration_ms": 7,
+            "ac_id": call_id,
+        },
+    )
+
+
+def _verdict_event(*, ac_id: str, when: datetime) -> BaseEvent:
+    return BaseEvent(
+        id=f"evt_{ac_id}_verdict",
+        type="harness.verdict.recorded",
+        timestamp=when,
+        aggregate_type="execution",
+        aggregate_id="exec_ir_proj",
+        data={
+            "scope": "ac",
+            "ac_id": ac_id,
+            "outcome": "pass",
+            "rationale": "fixture verdict",
+        },
+    )
+
+
+def _lifecycle(
+    spec_id: str,
+    event_type: WorkflowLifecycleEventType,
+    *,
+    when: datetime,
+    node_id: str | None = None,
+    edge_id: str | None = None,
+    reason_code: str | None = None,
+    refs: tuple[str, ...] = (),
+    attempt: int | None = None,
+) -> WorkflowLifecycleEvent:
+    return WorkflowLifecycleEvent(
+        event_type=event_type,
+        workflow_id=spec_id,
+        node_id=node_id,
+        edge_id=edge_id,
+        reason_code=reason_code,
+        refs=refs,
+        attempt=attempt,
+        timestamp=when,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 1: identifier mapping holds for a fan-out + terminal fixture.
+# ---------------------------------------------------------------------------
+
+
+def test_projection_identifiers_match_workflow_ir_plan() -> None:
+    """Synthetic events keyed by node_id project into matching records.
+
+    Locks the rules in
+    ``docs/agentos/workflow-ir-projection-mapping.md`` § "Identifier
+    mapping":
+
+    * ``WorkflowNode.node_id`` for a task/agent node maps to the projected
+      ``StepRecord.step_id`` via the existing ``call_id``-keyed stable id
+      derivation.
+    * The same node id appears as ``StepRecord.ac_id`` when the event
+      carries it.
+    * ``WorkflowNode.node_id`` for a verifier node maps to
+      ``VerdictRecord.ac_id``.
+    * ``WorkflowSpec.spec_id`` is **not** a projection identifier; the
+      projection's ``run_id`` is derived from the execution anchor.
+
+    The fixture also confirms that lifecycle conformance (the IR's side
+    of the same boundary) accepts the synthetic history.
+    """
+
+    spec = _fixture_spec()
+
+    # 1. Synthetic projection events keyed on the IR node ids.
+    events: list[BaseEvent] = [
+        _tool_started(call_id="run_tool_node", tool_name="Bash", when=_at(10)),
+        _tool_returned(call_id="run_tool_node", tool_name="Bash", when=_at(11)),
+        _verdict_event(ac_id="judge_ac_node", when=_at(12)),
+    ]
+
+    builder = ProjectionBuilder(
+        seed_id="seed_ir_proj",
+        goal="Verify IR ↔ projection mapping contract",
+    )
+    result = builder.add_events(events).build()
+
+    # 2. The projection produces exactly one run and one default stage.
+    assert result.run.seed_id == "seed_ir_proj"
+    assert len(result.stages) == 1
+    stage = result.stages[0]
+    assert stage.run_id == result.run.run_id
+    assert result.run.stage_ids == (stage.stage_id,)
+
+    # 3. One StepRecord whose step_id matches the IR-derived stable id.
+    assert len(result.steps) == 1, [step.name for step in result.steps]
+    step = result.steps[0]
+    expected_step_id = _stable_step_id("execution:exec_ir_proj", "tool", "run_tool_node")
+    assert step.step_id == expected_step_id, (
+        "WorkflowNode.node_id must derive a deterministic StepRecord.step_id "
+        "via the documented call_id mapping"
+    )
+
+    # 4. The step preserves the IR node id as ac_id (per the mapping doc
+    #    row for AGENT/PLUGIN nodes producing acceptance evidence).
+    assert step.ac_id == "run_tool_node"
+    assert step.kind is StepKind.SHELL_COMMAND
+    # Source-event linkage is the only read-model evidence of node
+    # lifecycle; legacy_inferred must stay False on a properly linked
+    # projection.
+    assert step.legacy_inferred is False
+    assert step.source_event_ids == (
+        "evt_run_tool_node_started",
+        "evt_run_tool_node_returned",
+    )
+
+    # 5. One verdict whose ac_id matches the verifier node id.
+    assert len(result.verdicts) == 1
+    verdict = result.verdicts[0]
+    assert verdict.scope == "ac"
+    assert verdict.ac_id == "judge_ac_node"
+    assert verdict.outcome is VerdictOutcome.PASS
+    assert verdict.run_id == result.run.run_id
+
+    # 6. spec_id MUST NOT leak into the projection identity space.
+    assert spec.spec_id not in {result.run.run_id, stage.stage_id, step.step_id}
+    assert spec.spec_id not in {verdict.verdict_id}
+
+    # 7. The IR side accepts a lifecycle history that mirrors these
+    #    projection events, proving the boundary holds in both
+    #    directions. The lifecycle records are not embedded into the
+    #    projection.
+    lifecycle = [
+        _lifecycle(
+            spec.spec_id,
+            WorkflowLifecycleEventType.RUN_CREATED,
+            when=_at(9),
+        ),
+        _lifecycle(
+            spec.spec_id,
+            WorkflowLifecycleEventType.NODE_STARTED,
+            when=_at(10),
+            node_id="run_tool_node",
+            attempt=1,
+        ),
+        _lifecycle(
+            spec.spec_id,
+            WorkflowLifecycleEventType.NODE_COMPLETED,
+            when=_at(11),
+            node_id="run_tool_node",
+            attempt=1,
+        ),
+        _lifecycle(
+            spec.spec_id,
+            WorkflowLifecycleEventType.NODE_STARTED,
+            when=_at(11),
+            node_id="judge_ac_node",
+            attempt=1,
+        ),
+        _lifecycle(
+            spec.spec_id,
+            WorkflowLifecycleEventType.NODE_COMPLETED,
+            when=_at(12),
+            node_id="judge_ac_node",
+            attempt=1,
+        ),
+        _lifecycle(
+            spec.spec_id,
+            WorkflowLifecycleEventType.RUN_COMPLETED,
+            when=_at(13),
+        ),
+    ]
+    report = validate_workflow_lifecycle_conformance(spec, lifecycle)
+    assert report.ok, report.issues
+
+
+# ---------------------------------------------------------------------------
+# Test 2: negative case — events that reference an unknown node id still
+# produce a valid projection, and the mismatch is surfaced by the IR's
+# existing conformance helper without introducing a new flag.
+# ---------------------------------------------------------------------------
+
+
+def test_projection_builds_when_events_reference_unknown_node_id() -> None:
+    """Mis-linked synthetic events do not crash the projection.
+
+    Locks the negative-path behavior described in
+    ``docs/agentos/workflow-ir-projection-mapping.md`` § "Verification":
+
+    * The projection builder is spec-agnostic by design, so an event
+      whose ``call_id`` is not a known ``WorkflowNode.node_id`` still
+      produces a well-formed ``StepRecord``. No new flag is added to
+      either side to represent the mismatch.
+    * The mismatch is surfaced via the existing IR-side helper
+      ``validate_workflow_lifecycle_conformance``, which emits an
+      ``unknown_node_id`` conformance issue when a lifecycle event names
+      a node that the spec does not declare.
+    * The projected step's ``legacy_inferred`` flag is **not** flipped
+      on by this builder for unknown ids — the mapping contract relies on
+      the IR-side conformance helper, not on a new projection flag.
+    """
+
+    spec = _fixture_spec()
+
+    # Event references a node that is NOT in the spec — the mis-link.
+    events: list[BaseEvent] = [
+        _tool_started(
+            call_id="ghost_node",
+            tool_name="Bash",
+            when=_at(20),
+        ),
+        _tool_returned(
+            call_id="ghost_node",
+            tool_name="Bash",
+            when=_at(21),
+        ),
+    ]
+
+    result = ProjectionBuilder(seed_id="seed_ir_proj_negative").add_events(events).build()
+
+    # The projection still builds.
+    assert len(result.steps) == 1
+    ghost_step = result.steps[0]
+    assert ghost_step.ac_id == "ghost_node"
+    assert ghost_step.legacy_inferred is False  # no new flag introduced
+    assert ghost_step.source_event_ids == (
+        "evt_ghost_node_started",
+        "evt_ghost_node_returned",
+    )
+    # The projection's step_id is derivable, but the IR has no node it
+    # corresponds to.
+    expected_step_id = _stable_step_id("execution:exec_ir_proj", "tool", "ghost_node")
+    assert ghost_step.step_id == expected_step_id
+    assert "ghost_node" not in {node.node_id for node in spec.nodes}
+
+    # The mismatch is detected by the IR's existing conformance helper
+    # rather than by adding a new projection flag.
+    lifecycle = [
+        _lifecycle(
+            spec.spec_id,
+            WorkflowLifecycleEventType.RUN_CREATED,
+            when=_at(19),
+        ),
+        _lifecycle(
+            spec.spec_id,
+            WorkflowLifecycleEventType.NODE_STARTED,
+            when=_at(20),
+            node_id="ghost_node",
+            attempt=1,
+        ),
+    ]
+    report = validate_workflow_lifecycle_conformance(spec, lifecycle)
+    assert not report.ok
+    unknown_codes = {issue.code for issue in report.errors}
+    assert "unknown_node_id" in unknown_codes, report.issues

From b88324d6e14525d51c363634606c61a167cffd3f Mon Sep 17 00:00:00 2001
From: Junghwan <70629228+shaun0927@users.noreply.github.com>
Date: Wed, 20 May 2026 12:11:06 +0900
Subject: [PATCH 2/5] fix(harness): promote stable_step_id to public API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses PR #1150 review findings:

HIGH: Rename `_stable_step_id` to `stable_step_id` and expose it via
`__all__` so the IR ↔ projection mapping test can import the helper
without reaching into a private name. The test file documents the
mapping contract, so the helper it relies on must be a public API.

MEDIUM: Add an inline comment in Test 2 explaining that the
`source_key` is derived from `aggregate_id="exec_ir_proj"` in the
`_tool_started` / `_tool_returned` helpers, so the expected step id
construction is self-documenting.

No projection schema or behavior change; doc updated to reference the
new public name.

Refs #1142, #946, #956
---
 docs/agentos/workflow-ir-projection-mapping.md      |  2 +-
 src/ouroboros/harness/projection_builder.py         | 13 +++++++------
 tests/integration/test_ir_projection_consistency.py |  7 ++++---
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/docs/agentos/workflow-ir-projection-mapping.md b/docs/agentos/workflow-ir-projection-mapping.md
index 2d0ee56c8..5c63a8f57 100644
--- a/docs/agentos/workflow-ir-projection-mapping.md
+++ b/docs/agentos/workflow-ir-projection-mapping.md
@@ -41,7 +41,7 @@ following way; they do **not** share storage.
 
 | Node owner / kind | Projection target | How identifiers line up |
 | --- | --- | --- |
-| `NodeOwner.AGENT`, `NodeOwner.PLUGIN`, `NodeOwner.HARNESS` with tool/LLM work | `StepRecord` | Runtime callers set `event.data["call_id"] == WorkflowNode.node_id` on the paired `tool.call.started` / `tool.call.returned` (or `llm.call.requested` / `llm.call.returned`) rows. `ProjectionBuilder._stable_step_id(source_key, family, call_id)` then yields a deterministic `StepRecord.step_id` keyed off the node id. The IR side never stores the step id; it stays purely derivable from journal rows. |
+| `NodeOwner.AGENT`, `NodeOwner.PLUGIN`, `NodeOwner.HARNESS` with tool/LLM work | `StepRecord` | Runtime callers set `event.data["call_id"] == WorkflowNode.node_id` on the paired `tool.call.started` / `tool.call.returned` (or `llm.call.requested` / `llm.call.returned`) rows. `ProjectionBuilder.stable_step_id(source_key, family, call_id)` then yields a deterministic `StepRecord.step_id` keyed off the node id. The IR side never stores the step id; it stays purely derivable from journal rows. |
 | `NodeOwner.AGENT`, `NodeOwner.PLUGIN` producing acceptance evidence | `StepRecord.ac_id` | When the node is the acceptance-criterion anchor for the work, `event.data["ac_id"]` carries the same identifier the IR plan uses for that AC (typically `WorkflowNode.node_id` or a metadata-attached AC label). `ProjectionBuilder._extract_ac_id` lifts it onto the projected `StepRecord` without invention. |
 | `NodeOwner.VERIFIER` | `VerdictRecord` | The verifier's `harness.verdict.recorded` / `evaluation.verdict.recorded` event sets `event.data["scope"] = "ac"` and `event.data["ac_id"] == WorkflowNode.node_id` for the AC the verifier judged. `_verdict_from_event` projects that into `VerdictRecord.ac_id`. Run-scope verdicts (`scope == "run"`) project against the run, not a node. |
 | `NodeOwner.HUMAN_GATE` | _(not projected in v1)_ | HITL WAIT/RESUME authority lives under #960 and is explicitly deferred by `projection-v1-scope.md`. The mapping leaves these node ids dangling on purpose; the projection has no record kind for them today. |
diff --git a/src/ouroboros/harness/projection_builder.py b/src/ouroboros/harness/projection_builder.py
index 58205edf8..cc0d84cc5 100644
--- a/src/ouroboros/harness/projection_builder.py
+++ b/src/ouroboros/harness/projection_builder.py
@@ -204,7 +204,7 @@ def build(self) -> ProjectionBuildResult:
         ended_at = self._last_event_at
 
         step_ids_by_slot_key = {
-            slot_key: _stable_step_id(source_key, *_slot_parts(slot_key))
+            slot_key: stable_step_id(source_key, *_slot_parts(slot_key))
             for slot_key in self._steps
         }
         valid_step_ids = frozenset(step_ids_by_slot_key.values())
@@ -310,7 +310,7 @@ def _handle_tool_returned(self, returned_event: BaseEvent) -> None:
             schema_version=PROJECTION_SCHEMA_VERSION,
             step_id=previous.step_id
             if previous is not None
-            else _stable_step_id("pending", "tool", call_id),
+            else stable_step_id("pending", "tool", call_id),
             run_id="run_placeholder",  # rewritten in build()
             stage_id="stage_placeholder",
             kind=kind,
@@ -348,7 +348,7 @@ def _handle_llm_returned(self, returned_event: BaseEvent) -> None:
             schema_version=PROJECTION_SCHEMA_VERSION,
             step_id=previous.step_id
             if previous is not None
-            else _stable_step_id("pending", "llm", call_id),
+            else stable_step_id("pending", "llm", call_id),
             run_id="run_placeholder",  # rewritten in build()
             stage_id="stage_placeholder",
             kind=StepKind.MODEL_CALL,
@@ -541,7 +541,7 @@ def _slot_key(family: str, call_id: str) -> str:
     return f"{family}:{call_id}"
 
 
-def _stable_step_id(source_key: str, family: str, call_id: str) -> str:
+def stable_step_id(source_key: str, family: str, call_id: str) -> str:
     digest = uuid5(
         NAMESPACE_URL,
         f"ouroboros:harness:step:{source_key}:{family}:{call_id}",
@@ -591,7 +591,7 @@ def _artifact_from_event(
     if call_id is None:
         return None
     family = _optional_str(event.data.get("step_family")) or "tool"
-    step_id = _stable_step_id(source_key, family, call_id)
+    step_id = stable_step_id(source_key, family, call_id)
     artifact_id = _optional_str(event.data.get("artifact_id")) or _stable_artifact_id(
         source_key, event.id
     )
@@ -767,7 +767,7 @@ def _step_from_start_only(
             metadata["args_preview"] = preview
     return StepRecord(
         schema_version=PROJECTION_SCHEMA_VERSION,
-        step_id=_stable_step_id("pending", family, call_id),
+        step_id=stable_step_id("pending", family, call_id),
         run_id=run_id,
         stage_id=stage_id,
         kind=kind,
@@ -785,4 +785,5 @@ def _step_from_start_only(
     "ProjectionBuildResult",
     "ProjectionBuilder",
     "build_projection",
+    "stable_step_id",
 ]
diff --git a/tests/integration/test_ir_projection_consistency.py b/tests/integration/test_ir_projection_consistency.py
index 98daca80e..25268eee5 100644
--- a/tests/integration/test_ir_projection_consistency.py
+++ b/tests/integration/test_ir_projection_consistency.py
@@ -21,7 +21,7 @@
 from ouroboros.harness.projection import StepKind, VerdictOutcome
 from ouroboros.harness.projection_builder import (
     ProjectionBuilder,
-    _stable_step_id,
+    stable_step_id,
 )
 from ouroboros.orchestrator.workflow_ir import (
     EdgeKind,
@@ -250,7 +250,8 @@ def test_projection_identifiers_match_workflow_ir_plan() -> None:
     # 3. One StepRecord whose step_id matches the IR-derived stable id.
     assert len(result.steps) == 1, [step.name for step in result.steps]
     step = result.steps[0]
-    expected_step_id = _stable_step_id("execution:exec_ir_proj", "tool", "run_tool_node")
+    # Source key derived from aggregate_id="exec_ir_proj" in _tool_started/_tool_returned helpers.
+    expected_step_id = stable_step_id("execution:exec_ir_proj", "tool", "run_tool_node")
     assert step.step_id == expected_step_id, (
         "WorkflowNode.node_id must derive a deterministic StepRecord.step_id "
         "via the documented call_id mapping"
@@ -384,7 +385,7 @@ def test_projection_builds_when_events_reference_unknown_node_id() -> None:
     )
     # The projection's step_id is derivable, but the IR has no node it
     # corresponds to.
-    expected_step_id = _stable_step_id("execution:exec_ir_proj", "tool", "ghost_node")
+    expected_step_id = stable_step_id("execution:exec_ir_proj", "tool", "ghost_node")
     assert ghost_step.step_id == expected_step_id
     assert "ghost_node" not in {node.node_id for node in spec.nodes}
 

From cf8dea538f5935cf92cf76c1684ee0fa4c822589 Mon Sep 17 00:00:00 2001
From: Junghwan <70629228+shaun0927@users.noreply.github.com>
Date: Wed, 20 May 2026 12:13:29 +0900
Subject: [PATCH 3/5] chore(ruff): fix lint violations

Refs #1142.
---
 src/ouroboros/harness/projection_builder.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/ouroboros/harness/projection_builder.py b/src/ouroboros/harness/projection_builder.py
index cc0d84cc5..02d58f202 100644
--- a/src/ouroboros/harness/projection_builder.py
+++ b/src/ouroboros/harness/projection_builder.py
@@ -204,8 +204,7 @@ def build(self) -> ProjectionBuildResult:
         ended_at = self._last_event_at
 
         step_ids_by_slot_key = {
-            slot_key: stable_step_id(source_key, *_slot_parts(slot_key))
-            for slot_key in self._steps
+            slot_key: stable_step_id(source_key, *_slot_parts(slot_key)) for slot_key in self._steps
         }
         valid_step_ids = frozenset(step_ids_by_slot_key.values())
         artifacts = tuple(

From f2e4576a2841c5c1710cd178ae6e5ec36d22c1e6 Mon Sep 17 00:00:00 2001
From: shaun0927 <shaun0927@users.noreply.github.com>
Date: Wed, 20 May 2026 03:24:25 +0000
Subject: [PATCH 4/5] docs(agentos): align IR projection test scope wording

---
 tests/integration/test_ir_projection_consistency.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_ir_projection_consistency.py b/tests/integration/test_ir_projection_consistency.py
index 25268eee5..76a6e838c 100644
--- a/tests/integration/test_ir_projection_consistency.py
+++ b/tests/integration/test_ir_projection_consistency.py
@@ -9,8 +9,9 @@
 the default boundary fixture pairs a validated ``WorkflowSpec`` with
 synthetic ``EventStore`` rows to prove source-event linkage, and it must
 not add dispatch, cache, persistence, or projection-record embedding to
-the IR. No production source file under ``src/`` is modified to make
-these tests pass.
+the IR. The only production-source dependency is the existing projection
+builder API used to derive stable step identifiers; the tests do not add
+runtime behavior.
 """
 
 from __future__ import annotations

From 3a04a2094bc55ca13954c48306344fbab56cb13b Mon Sep 17 00:00:00 2001
From: shaun0927 <shaun0927@users.noreply.github.com>
Date: Wed, 20 May 2026 03:29:50 +0000
Subject: [PATCH 5/5] chore: trigger PR review refresh