From 12cc733356e0134599b98d04beb434fc7806a580 Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Sat, 13 Jun 2026 20:17:42 +0530 Subject: [PATCH 1/5] intake: resolve-agent --alias flag (Claude-Code model alias adapter) --- .../.history.jsonl | 4 + .../.status.yaml | 35 ++++ .../intake.md | 192 ++++++++++++++++++ 3 files changed, 231 insertions(+) create mode 100644 fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl create mode 100644 fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml create mode 100644 fab/changes/260613-yky7-resolve-agent-alias-flag/intake.md diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl b/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl new file mode 100644 index 00000000..a0c7cc0d --- /dev/null +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl @@ -0,0 +1,4 @@ +{"action":"enter","driver":"fab-new","event":"stage-transition","stage":"intake","ts":"2026-06-13T12:44:37Z"} +{"args":"Add --alias flag to fab resolve-agent that emits Claude-Code short model alias (opus/sonnet/haiku/fable) instead of full model ID, fixing Agent-tool dispatch enum rejection","cmd":"fab-new","event":"command","ts":"2026-06-13T12:44:37Z"} +{"delta":"+3.8","event":"confidence","score":3.8,"trigger":"calc-score","ts":"2026-06-13T12:45:53Z"} +{"delta":"+0.0","event":"confidence","score":3.8,"trigger":"calc-score","ts":"2026-06-13T12:45:58Z"} diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml b/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml new file mode 100644 index 00000000..c485c505 --- /dev/null +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml @@ -0,0 +1,35 @@ +id: yky7 +name: 260613-yky7-resolve-agent-alias-flag +created: 2026-06-13T12:44:37Z +created_by: sahil-noon +change_type: fix +issues: [] +progress: + intake: ready + apply: pending + review: pending + hydrate: pending + ship: pending + review-pr: pending +plan: + generated: false + task_count: 0 + acceptance_count: 0 + acceptance_completed: 0 +confidence: + certain: 3 + confident: 4 + tentative: 0 + unresolved: 0 + score: 3.8 + fuzzy: true + dimensions: + signal: 82.1 + reversibility: 81.4 + competence: 87.9 + disambiguation: 85.0 +stage_metrics: + intake: {started_at: "2026-06-13T12:44:37Z", driver: fab-new, iterations: 1} +prs: [] +# true_impact: lazily created on first apply-finish (no placeholder here). +last_updated: 2026-06-13T12:46:03Z diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/intake.md b/fab/changes/260613-yky7-resolve-agent-alias-flag/intake.md new file mode 100644 index 00000000..4cde62f8 --- /dev/null +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/intake.md @@ -0,0 +1,192 @@ +# Intake: resolve-agent --alias flag (Claude-Code model alias adapter) + +**Change**: 260613-yky7-resolve-agent-alias-flag +**Created**: 2026-06-13 + +## Origin + +Surfaced during a `/fab-discuss` session investigating a live pipeline failure. A previous +agent, dispatching the **apply** stage sub-agent, resolved `fab resolve-agent apply` → +`model=claude-opus-4-8` and passed that full model ID verbatim into the **Agent tool's `model` +parameter**. The dispatch failed with **"Invalid tool parameters."** The agent recovered by +hand-mapping `claude-opus-4-8` → `opus`. + +The user initially hypothesized the `agent.tiers` config structure was wrong. Investigation +disproved that: the config, the `defaultTiers` Go map, and `fab resolve-agent`'s verbatim output +are all **correct per `docs/specs/stage-models.md`** (provider-neutral, full-ID, drift-guarded). +The real fault is a **vocabulary mismatch between two distinct Claude-Code surfaces**: + +| Surface | Accepts full ID `claude-opus-4-8`? | Accepts alias `opus`? | +|---|---|---| +| `claude` **CLI** `--model` flag (operator launcher, `spawn_command`) | Yes | Yes | +| **Agent tool** `model` param (orchestrator sub-agent dispatch) | **No — hard enum** | Yes | + +The Agent tool's `model` parameter is a hard JSON-schema enum: `["sonnet","opus","haiku","fable"]`. +It rejects full IDs. The CLI `--model` flag accepts both (`claude --help`: "Provide an alias ... +or a model's full name"). + +Interaction mode: conversational. The user chose the fix shape explicitly: +- **Fix via a `--alias` flag** (encode the mapping deterministically in Go), rejecting the + prompt-only hand-mapping option — *because the live failure was precisely an agent fumbling the + hand-map*. +- The user also raised "put aliases in the tiers instead of full IDs"; this was analyzed and + **rejected** (it would break provider-neutrality, weaken the Fable version-pin discipline, and + require a coordinated edit across the Go map + two drift-guarded spec tables + config comments + + migration — pushing a harness quirk into the provider-neutral core). See Design Decisions. + +## Why + +**Problem.** The orchestrated pipeline (`/fab-ff`, `/fab-fff`, `/fab-continue`) is *broken at the +Agent-tool dispatch seam* whenever a stage resolves to a full model ID — which is every stage +under the shipped defaults. Per-stage model selection (feature #406/#407) cannot actually dispatch +a sub-agent with its resolved model without a manual workaround. + +**Consequence if unfixed.** Every orchestrated run either fails at first sub-agent dispatch or +silently depends on the agent improvising a full-ID→alias mapping by hand — exactly the +error-prone step that failed in the live run. Per-stage model selection is effectively unusable in +its primary (orchestrated) mode. + +**Why this approach.** The mismatch is genuinely **specific to the Claude-Code Agent-tool +surface**, not to fab's resolution logic. `stage-models.md` already names "injecting the resolved +model into the Agent dispatch" as the **harness-adapter boundary** — the one Claude-Code-specific +layer. The fix belongs *exactly there*: a `--alias` flag that emits the Claude-Code alias on +demand, leaving the provider-neutral default (full ID) untouched for the CLI/operator path. +Encoding the mapping in Go (vs. prompt-side) makes it deterministic — the resolver, not a +per-dispatch agent guess, owns the translation. + +## What Changes + +### 1. New `--alias` flag on `fab resolve-agent ` + +A boolean `--alias` flag on the cobra command in `src/go/fab/cmd/fab/resolve_agent.go`. Default +(absent) = today's behavior exactly (emit full model ID). When set, the `model=` line emits the +Claude-Code short alias instead. The `effort=` line is **unaffected** by `--alias`. + +``` +$ fab resolve-agent apply +model=claude-opus-4-8 +effort=high + +$ fab resolve-agent apply --alias +model=opus +effort=high +``` + +### 2. Alias mapping in `internal/agent` + +A new exported function in `src/go/fab/internal/agent/agent.go` (the natural home — alongside the +tier tables and `Resolve`). Signature shape (final name decided in plan): + +```go +// ModelAlias maps a full Claude model ID to its Claude-Code short alias +// (the Agent tool's `model` enum: opus/sonnet/haiku/fable). Returns the input +// VERBATIM when no mapping applies (empty string, or an unrecognized/non-Claude +// ID) — preserving provider-neutrality: --alias is a Claude-Code adapter, not a +// validator. Prefix-matched so claude-haiku-4-5-20251001 → haiku. +func ModelAlias(model string) string +``` + +Mapping (prefix-based to absorb dated variants like `claude-haiku-4-5-20251001`): + +| Full ID prefix | Alias | +|---|---| +| `claude-opus-` | `opus` | +| `claude-sonnet-` | `sonnet` | +| `claude-haiku-` | `haiku` | +| `claude-fable-` | `fable` | + +- **Empty model** → empty (no-op): preserves the "inherit the session model" signal. The `model=` + line stays empty under `--alias`. +- **Unmapped / non-Claude ID** (e.g. `gpt-5`) → **returned verbatim** (pass-through). This keeps + `--alias` from becoming a Claude-only validator: a project that overrode a tier to another + provider's model still gets its string through unchanged. (Decision point — see Open Questions + / Assumptions; leaning verbatim pass-through.) + +The flag wiring formats output via the existing `formatAgentProfile` path — `resolveAgentCmd` +applies `ModelAlias` to `profile.Model` before formatting when `--alias` is set (alternatively, +pass a flag into a small format variant; plan decides the cleanest seam). The `model=` / +`effort=` line contract and byte-stability are otherwise unchanged. + +### 3. Skill wiring — orchestrator/dispatch use `--alias` for the Agent-tool path + +The canonical instruction lives in **`src/kit/skills/_preamble.md` § Subagent Dispatch → +Per-Stage Model Resolution**, specifically the **"Harness-adapter boundary (Claude Code)"** +paragraph (~line 346). Today it says the resolved model goes into "the Agent tool's `model` +parameter" but does not account for the enum mismatch. Update it to instruct: **for the +Claude-Code Agent-tool dispatch, resolve with `fab resolve-agent --alias`** so the emitted +`model=` is already an Agent-tool-valid alias; pass it straight into the Agent tool `model` param +(empty ⇒ omit/inherit, unchanged). + +The three consuming skills echo the preamble and must be updated to pass `--alias` (or to defer to +the preamble's updated instruction — keep the existing defer-to-preamble pattern, just ensure the +`--alias` requirement is unambiguous): +- `src/kit/skills/fab-ff.md` (line ~37 note) +- `src/kit/skills/fab-fff.md` (lines ~37, ~47, ~57 notes) +- `src/kit/skills/fab-continue.md` (line ~154 review-dispatch note) + +**Operator launcher path is NOT changed.** `fab operator` / the `_cli-fab.md:554` path appends +`--model ` to a `claude` **CLI** invocation, which accepts full IDs. It must keep +resolving **without** `--alias`. The CLI and Agent-tool paths deliberately diverge. + +### 4. CLI reference — `_cli-fab.md` + +Constitution constraint (CLI changes MUST update `_cli-fab.md`): update the `fab resolve-agent` +entry (§ around line 227) to document the `--alias` flag — what it emits, that it's a Claude-Code +Agent-tool adapter, that default behavior is unchanged (full ID), and that empty/non-Claude models +pass through verbatim. + +### 5. Spec — `docs/specs/stage-models.md` + +Update **§ Harness-adapter boundary (the only Claude-Code-specific layer)** to document the +two-surface vocabulary split and the `--alias` flag as the adapter mechanism for the Agent-tool +path. This is a spec-of-design update (the spec already frames this boundary as the Claude-Code +adapter — we're making the alias mechanism concrete). The two **drift-guarded tables** (default +tier profiles, stage→tier mapping) are **NOT touched** — full IDs stay canonical, so +`TestDocTablesMatchAgentMaps` is unaffected. + +### 6. Go test coverage + +Add tests (test-alongside, `**/*_test.go`): +- `ModelAlias` unit tests: each of opus/sonnet/haiku/fable full IDs → alias; dated haiku variant → + `haiku`; empty → empty; unmapped (`gpt-5`) → verbatim. +- `resolve-agent --alias` command-level test: `apply --alias` emits `model=opus` + `effort=high`; + without `--alias` still emits `model=claude-opus-4-8` (regression guard for the unchanged + default); a tier with empty model under `--alias` still emits an empty `model=` line. + +## Affected Memory + +- `pipeline/stage-models`: (modify) — the resolve-agent `--alias` flag and the two-surface + (CLI vs Agent-tool) adapter split. *(Exact memory file path confirmed at hydrate — the pipeline + domain owns stage-models / dispatch wiring; this may be the stage-models memory file or the + dispatch-wiring file. No new spec-level behavior beyond the documented adapter mechanism.)* + +## Impact + +- `src/go/fab/internal/agent/agent.go` — new `ModelAlias` function (+ its test file). +- `src/go/fab/cmd/fab/resolve_agent.go` — new `--alias` bool flag; apply mapping pre-format (+ test). +- `src/kit/skills/_preamble.md` — Harness-adapter boundary paragraph (canonical instruction). +- `src/kit/skills/fab-ff.md`, `fab-fff.md`, `fab-continue.md` — per-stage-model dispatch notes. +- `src/kit/skills/_cli-fab.md` — resolve-agent signature gains `--alias`. +- `docs/specs/stage-models.md` — § Harness-adapter boundary (no drift-guarded table touched). +- **No change**: `defaultTiers` / `stageTiers` maps, the operator launcher path, `agent.tiers` + config schema, the `model=`/`effort=` default output contract. + +## Open Questions + +- Unmapped / non-Claude model ID under `--alias`: pass through verbatim, or error? (Leaning + verbatim pass-through to preserve provider-neutrality — `--alias` is a best-effort Claude-Code + adapter, not a validator. Recorded as a Confident assumption below.) + +## Assumptions + +| # | Grade | Decision | Rationale | Scores | +|---|-------|----------|-----------|--------| +| 1 | Certain | Fix via a Go-side `--alias` flag on `fab resolve-agent`, not prompt-side hand-mapping | User chose this explicitly in discussion; the live failure was an agent fumbling the hand-map, so determinism in Go is the durable fix | S:95 R:80 A:90 D:95 | +| 2 | Certain | Do NOT switch tier defaults to aliases; full IDs stay canonical in `defaultTiers` + drift-guarded spec tables | User raised this alternative; analyzed and rejected — breaks provider-neutrality, weakens the Fable version-pin, and forces a coordinated multi-file edit pushing a harness quirk into the provider-neutral core | S:90 R:75 A:90 D:90 | +| 3 | Certain | Default behavior (no `--alias`) is byte-identical to today (full model ID); CLI/operator path unchanged | The `claude` CLI `--model` flag accepts full IDs (`claude --help` confirms); only the Agent-tool enum rejects them. Two surfaces must diverge | S:95 R:85 A:95 D:95 | +| 4 | Confident | Mapping is prefix-based (`claude-haiku-` → `haiku`) so dated variants (`claude-haiku-4-5-20251001`) resolve | The Agent enum is family-level (opus/sonnet/haiku/fable); full IDs carry version/date suffixes. Prefix match is the robust mapping | S:75 R:80 A:85 D:80 | +| 5 | Confident | Unmapped / non-Claude model under `--alias` passes through verbatim (not an error) | Preserves provider-neutrality — `--alias` is a Claude-Code adapter, not a validator; a non-Claude override still flows. Open question, but the leaning is clear and low-risk (reversible) | S:65 R:80 A:75 D:70 | +| 6 | Confident | `ModelAlias` lives in `internal/agent` (alongside the tier tables + `Resolve`) | That package already owns the model vocabulary and is the drift-guard's subject; cohesive home | S:75 R:85 A:90 D:80 | +| 7 | Confident | Skills keep the defer-to-preamble pattern; canonical `--alias` instruction lives in `_preamble.md` Harness-adapter boundary, echoed by ff/fff/continue | Matches the existing single-source convention (the three skills already defer to the preamble for per-stage model resolution) | S:80 R:85 A:90 D:85 | + +7 assumptions (3 certain, 4 confident, 0 tentative, 0 unresolved). From 13283bfe3bc03a31620b8779d9c45c50b46a0a38 Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Sat, 13 Jun 2026 20:42:52 +0530 Subject: [PATCH 2/5] fix: add --alias flag to fab resolve-agent for Agent-tool dispatch --- docs/memory/_shared/context-loading.md | 11 +- docs/memory/_shared/index.md | 2 +- docs/memory/memory-docs/index.md | 2 +- docs/memory/pipeline/execution-skills.md | 3 +- docs/memory/pipeline/index.md | 2 +- docs/specs/skills/SPEC-_preamble.md | 7 +- docs/specs/stage-models.md | 25 +- .../.history.jsonl | 8 + .../.status.yaml | 52 ++-- .../intake.md | 160 ++++++------ .../plan.md | 229 ++++++++++++++++++ src/go/fab/cmd/fab/resolve_agent.go | 15 +- src/go/fab/cmd/fab/resolve_agent_test.go | 45 ++++ src/go/fab/internal/agent/agent.go | 27 +++ src/go/fab/internal/agent/agent_test.go | 22 ++ src/kit/skills/_cli-fab.md | 14 +- src/kit/skills/_pipeline.md | 12 +- src/kit/skills/_preamble.md | 4 +- src/kit/skills/fab-continue.md | 14 +- src/kit/skills/fab-ff.md | 2 +- src/kit/skills/fab-fff.md | 6 +- 21 files changed, 535 insertions(+), 127 deletions(-) create mode 100644 fab/changes/260613-yky7-resolve-agent-alias-flag/plan.md diff --git a/docs/memory/_shared/context-loading.md b/docs/memory/_shared/context-loading.md index 81389ca2..3f7c74b7 100644 --- a/docs/memory/_shared/context-loading.md +++ b/docs/memory/_shared/context-loading.md @@ -1,5 +1,5 @@ --- -description: "Smart context loading convention — descriptive 7-file always-load layer (skill file wins; exception set rule-derived from skill files, never enumerated in the preamble — d9rs), opt-in skill helpers (7-value allowlist incl. `_srad`/`_pipeline`/`_intake`) + stage-conditional loading, standard subagent context (orchestrators incl. `/fab-proceed`), per-stage model resolution at the dispatch seam (`fab resolve-agent `, provider-neutral, Claude-Code adapter named, review-resolves-once — l3ja; applies on every post-intake stage since the single-dispatch collapse, advisory only for a genuinely no-dispatch run — fgxx; the two halves dispatch through two seams — model via the Agent `model` param (short alias opus/sonnet/haiku/fable, id→alias mapped at the seam), effort via an imperative subagent-prompt instruction (no Agent effort param; omit when empty), plus a compliance-visibility expectation that each site surface the resolved `model=/effort=` — m3d4; residual = a per-sub-agent effort param on the Agent tool, a harness ask), selective domain loading, SRAD protocol pointer, scoped Next Steps Convention, generic fab-command failure rule (unconditional non-zero exit → STOP; `fab log command` exits 0 by contract)" +description: "Smart context loading convention — descriptive 7-file always-load layer (skill file wins; exception set rule-derived from skill files, never enumerated in the preamble — d9rs), opt-in skill helpers (7-value allowlist incl. `_srad`/`_pipeline`/`_intake`) + stage-conditional loading, standard subagent context (orchestrators incl. `/fab-proceed`), per-stage model resolution at the dispatch seam (`fab resolve-agent `, provider-neutral, Claude-Code adapter named, review-resolves-once — l3ja; applies on every post-intake stage since the single-dispatch collapse, advisory only for a genuinely no-dispatch run — fgxx; the two halves dispatch through two seams — model via the Agent `model` param (short-alias enum opus/sonnet/haiku/fable, resolved with `fab resolve-agent --alias` so the alias is emitted directly — the deterministic adapter that superseded m3d4's prompt-side id→alias hand-map; yky7), effort via an imperative subagent-prompt instruction (no Agent effort param; omit when empty), plus a compliance-visibility expectation that each site surface the resolved `model=/effort=` — m3d4; residual = a per-sub-agent effort param on the Agent tool, a harness ask), selective domain loading, SRAD protocol pointer, scoped Next Steps Convention, generic fab-command failure rule (unconditional non-zero exit → STOP; `fab log command` exits 0 by contract)" --- # Context Loading @@ -125,7 +125,7 @@ Per-stage model selection is wired into the **sub-agent dispatch seam** (`_pream **Compliance visibility (m3d4).** Each dispatch site MUST **surface the resolved `model=/effort=` lines** — carry them into the dispatch prompt (the effort line is already there per the seam above; co-locate the model line) and/or echo them in the orchestrator's own step output — so a *skipped* `fab resolve-agent` call (the sub-agent silently inherits the session profile) or a mis-resolved tier is **visible rather than silent**. There is no code-level guard fab can install (dispatch is harness-internal), so visibility is the available seam; the canonical contract also notes an all-empty resolution (both `model=` and `effort=` empty) is itself worth surfacing/asserting rather than dispatching blind. -**Harness-adapter boundary (Claude Code).** The resolution (stage→tier→`{model, effort}`) is **provider-neutral**; injecting it into the actual dispatch is **harness-specific**. For Claude Code the model adapter is the **Agent tool's `model` parameter** and the effort adapter is the subagent-prompt instruction above. One concrete harness detail: the Agent tool's `model` param takes a **short alias** (`opus`/`sonnet`/`haiku`/`fable`), **not** the full versioned id (`claude-opus-4-8`) that `fab resolve-agent` emits — the orchestrator maps the resolved id → alias at the dispatch seam. Named explicitly as the Claude-Code adapter, not as universal truth; the coupling is not new (fab's entire subagent-dispatch design is already Claude-Code-shaped), so per-stage selection is exactly as portable as fab's existing dispatch. +**Harness-adapter boundary (Claude Code).** The resolution (stage→tier→`{model, effort}`) is **provider-neutral**; injecting it into the actual dispatch is **harness-specific**. For Claude Code the model adapter is the **Agent tool's `model` parameter** and the effort adapter is the subagent-prompt instruction above. One concrete harness detail: the Agent tool's `model` param is a **hard enum of short aliases** (`opus`/`sonnet`/`haiku`/`fable`), **not** the full versioned id (`claude-opus-4-8`) that the plain `fab resolve-agent` emits — so for Agent-tool dispatch the model half is resolved with **`fab resolve-agent --alias`** (yky7), which emits the Agent-tool-valid short alias directly on the `model=` line (a deterministic Go-side translation: prefix-matched so dated variants resolve, empty ⇒ empty inherit-signal, a non-Claude override passes through verbatim). This **supersedes the earlier prompt-side hand-mapping** (where the orchestrator was told to map the resolved id → alias by hand at the dispatch seam — m3d4's instruction, which was brittle and easy to fumble); no agent ever hand-maps now. Named explicitly as the Claude-Code adapter, not as universal truth; the coupling is not new (fab's entire subagent-dispatch design is already Claude-Code-shaped), so per-stage selection is exactly as portable as fab's existing dispatch. *(The operator launcher path is the deliberate exception — it appends `--model ` to a `claude` CLI invocation, which accepts full IDs, so it resolves WITHOUT `--alias`.)* **Review resolves once.** The `review` stage spawns two reviewer sub-agents (inward + outward) plus a merge. The dispatcher resolves `fab resolve-agent review` **once** and applies the same model AND the same effort-prompt instruction to all three — the mechanical merge runs at the reviewer's tier, an accepted stage-granularity tradeoff. @@ -176,6 +176,12 @@ The exception set is **declared by the skill files themselves** (the preamble ne ## Design Decisions +### `--alias` Flag — Deterministic Go-Side id→alias Translation at the Agent-Tool Seam +**Decision**: The Agent-tool model half is resolved with `fab resolve-agent --alias`, a boolean flag that emits the Claude-Code short alias (`opus`/`sonnet`/`haiku`/`fable`) directly on the `model=` line. The two Claude-Code surfaces deliberately diverge: the `claude` **CLI** `--model` flag (operator launcher / `spawn_command`) accepts full IDs **and** aliases, so it keeps resolving WITHOUT `--alias` (full ID); the **Agent tool's `model` param** is a hard JSON-schema enum (`["sonnet","opus","haiku","fable"]`) that rejects full IDs, so orchestrator/sequencer sub-agent dispatch resolves WITH `--alias`. The mapping lives in `agent.ModelAlias` (`internal/agent`, alongside the tier tables + `Resolve`): prefix-matched (`claude-opus-` → `opus`, etc.) so dated variants (`claude-haiku-4-5-20251001` → `haiku`) resolve; empty → empty (preserves the inherit signal); an unmapped/non-Claude ID (`gpt-5`) → verbatim pass-through (adapter, not validator — preserves provider-neutrality). Applied in `resolveAgentCmd`'s RunE as a one-line pre-format mutation of `profile.Model`; `formatAgentProfile` stays a pure, byte-stable formatter. Default (no flag) is byte-identical to today (full ID); `--alias` touches only the `model=` line, never `effort=`. +**Why**: This **supersedes PR #413's (m3d4) prompt-side hand-mapping** — the prose instruction "the orchestrator maps the resolved id → alias at the dispatch seam," which told every dispatching agent to translate the id by hand on each dispatch. The live failure this fixes *was* an agent fumbling exactly that hand-map (it passed `claude-opus-4-8` verbatim into the Agent tool's `model` param, hitting "Invalid tool parameters"). Encoding the map in Go at the harness-adapter boundary `stage-models.md` already names makes the failure mode impossible to reproduce. Tier defaults were deliberately NOT switched to aliases (rejected — see below), so the provider-neutral full-ID default and the drift-guarded spec tables stay untouched. +**Rejected**: (a) Keeping the #413 prompt-only hand-mapping (brittle; the failure mode that prompted this change). (b) Switching `defaultTiers` / the two drift-guarded `stage-models.md` tables to aliases (breaks provider-neutrality, weakens the Fable version-pin discipline, and forces a coordinated edit across the Go map + spec tables + config comments + migration — pushing a harness quirk into the provider-neutral core; `TestDocTablesMatchAgentMaps` stays unaffected by keeping full IDs canonical). (c) Threading a bool into `formatAgentProfile` (couples the formatter to a flag it doesn't need — the empty-model branch already does the right thing because `ModelAlias("")` → `""`). (d) Making `--alias` a Claude-only validator that errors on non-Claude IDs (would break the provider-neutral pass-through). +*Introduced by*: 260613-yky7-resolve-agent-alias-flag + ### Preamble Context Diet — Consumer-Specific Content Moves to Opt-In Homes **Decision**: Content in the always-loaded `_preamble.md` that serves only a subset of skills (or no live skill) is relocated to opt-in homes, with short pointers left behind: the SRAD framework → new `_srad.md` helper (declared by the 6 planning skills); confidence-scoring schema/formula/template → `_cli-fab.md` § fab score (extended) (preamble keeps Gate Threshold + Invocation); Bulk Confirm → one-sentence pointer (`fab-clarify.md` Step 2 is sole authority); the dormant `[AUTO-MODE]` Skill Invocation Protocol → `fab-clarify.md` (its sole referencer; Auto Mode retained — user decision: move, not delete); Operator Spawning Rules → `_cli-external.md` wt section (one repo-targeting rule, duplicate dropped). The §1 always-load contract and the Next:-line MUST become **descriptive with a skill-file-wins override**, and the helper model gains **stage-conditional in-body loading** (used by `/fab-continue` for `_generation`/`_review`). Preamble: 32,790 → 22,313 B (−32.0%); every non-planning skill saves the full ~10.5KB per invocation; relocated content is paid only by its consumers. **Why**: The preamble was 2–26x the body of the skill being run and roughly a third of it served a small subset of skills. Duplicated copies (bulk-confirm trigger, spawning rules, restated context lists in fab-proceed/fab-discuss) had already drifted once. The existing `helpers:` mechanism plus fab-kit's `listSkills` auto-deploy (`internal/skills.go`; lived in `sync.go` until the 260612-tb6f split) meant the reduction needed zero Go changes and zero semantic loss — content moves, it doesn't disappear. @@ -229,6 +235,7 @@ The exception set is **declared by the skill files themselves** (the preamble ne | Change | Date | Summary | |--------|------|---------| +| 260613-yky7-resolve-agent-alias-flag | 2026-06-13 | **`fab resolve-agent --alias` is the deterministic Agent-tool model-half adapter** — replacing m3d4's prompt-side id→alias hand-map. New boolean `--alias` flag on `fab resolve-agent`: when set, the resolved model is mapped to its Claude-Code short alias (`opus`/`sonnet`/`haiku`/`fable`) on the `model=` line via the new exported `agent.ModelAlias` (`internal/agent`); default (absent) is byte-identical to today (full ID), the `effort=` line is unaffected, and empty/non-Claude models pass through verbatim. The mapping is prefix-based (`claude-opus-` → `opus`, so `claude-haiku-4-5-20251001` → `haiku`). The **two Claude-Code surfaces deliberately diverge**: the `claude` CLI `--model` flag (operator launcher) accepts full IDs and keeps resolving WITHOUT `--alias`; the Agent tool's `model` param is a hard enum that rejects full IDs, so all orchestrator/sequencer sub-agent dispatch (`_preamble.md` § Harness-adapter boundary + the `_pipeline.md`/`fab-ff`/`fab-fff`/`fab-continue` dispatch sites + `SPEC-_preamble.md`) resolves WITH `--alias`. The earlier m3d4 changelog row below is preserved as historical record of #413; only the LIVE Harness-adapter-boundary prose (here and in [pipeline/execution-skills.md](../pipeline/execution-skills.md)) is repointed. Tier defaults and the two drift-guarded `stage-models.md` tables are NOT touched (`TestDocTablesMatchAgentMaps` unaffected). New Design Decision "`--alias` Flag — Deterministic Go-Side id→alias Translation at the Agent-Tool Seam". | | 260613-3xaj-extract-intake-helper | 2026-06-13 | **Helper allowlist grows to seven values**: `_intake` added to the `_preamble.md` § Skill Helper Declaration allowed values (`_generation`, `_review`, `_cli-fab`, `_cli-external`, `_srad`, `_pipeline`, `_intake`). The new `_intake.md` internal helper holds the shared pre-boundary Create-Intake Procedure (fab-new Steps 0–9, parameterized by `{questioning-mode}`); `fab-new`/`fab-draft` declare `helpers: [_generation, _srad, _intake]` (keep declaring `_generation`/`_srad` directly per the `_pipeline` precedent), `_intake` itself carries no `helpers:`, and `/fab-proceed` keeps none (dispatches `_intake` as a subagent prompt). Helper mapping table updated (fab-new/fab-draft row; "all others" 17→16). Added the **one-shared-helper-per-pipeline-phase symmetry table** (`_generation`/`_review`/`_intake`/`_pipeline`) — `_intake` is the pre-boundary counterpart to the post-boundary `_pipeline`, both mirroring the shared-body + knob + call-site-tail shape. Standard Subagent Context prefix-step list updated (`/fab-proceed` dispatches `_intake`, not full `/fab-new`). See [pipeline/planning-skills.md](../pipeline/planning-skills.md) § The `_intake` Shared Create-Intake Procedure. | | 260613-m3d4-uniform-stage-model-tier | 2026-06-13 | **Per-Stage Model Resolution gains the two-seam dispatch contract + compliance visibility** (Change C — skills/docs prose only, no Go; gated on `260613-fgxx`). The resolved tier's **two halves now dispatch through two seams**: the **model** rides the Agent tool's `model` param (unchanged), the **effort** is injected as an imperative instruction in the subagent prompt (``Operate at `` reasoning effort for this task.``, omitted when empty) — closing Gap 2's effort half, which fgxx had left as a deferred follow-up (the Agent tool has no `effort` param). Added a **compliance-visibility** expectation (Gap 1b): every dispatch site surfaces the resolved `model=/effort=` (into the dispatch prompt and/or step output) so a skipped/mis-resolved `fab resolve-agent` call is visible rather than silent, plus a prose all-empty-resolution surface/assert note (no Go guard — dispatch is harness-internal). Harness-adapter boundary now records the **id→alias** detail (the `model` param takes `opus`/`sonnet`/`haiku`/`fable`, not the full `claude-*` id the resolver emits — the orchestrator maps at the seam) and names the prompt as the effort adapter. The lone residual is the **harness ask** — a first-class per-sub-agent `effort` param on the Agent tool, out of fab's control, not built. Mirrors `_preamble.md` § Per-Stage Model Resolution; the `_pipeline.md`/`fab-ff`/`fab-fff`/`fab-continue` dispatch sites and their SPEC mirrors carry the same two-seam phrasing (see [pipeline/execution-skills.md](../pipeline/execution-skills.md)); `docs/findings/per-stage-model-tier-application.md` and `docs/specs/stage-models.md` reconciled to this reality. | | 260613-fgxx-collapse-post-intake-dual-mode | 2026-06-13 | **Per-Stage Model Resolution reconciled to the single post-intake dispatch mode** (minimal reconcile — full uniform-tier closure deferred to a follow-up). The "Foreground is advisory-only" paragraph (and its `description:` frontmatter mention) is rewritten: since fgxx collapsed the post-intake dual execution mode, **every** post-intake stage dispatches a sub-agent — including plain `/fab-continue`, now a one-stage sequencer that resolves `fab resolve-agent ` and dispatches its stage's block — so per-stage selection applies uniformly across apply/review/hydrate regardless of caller, and the residual advisory case narrows to a stage skill genuinely run with no dispatch at all. Intake stays pre-boundary (not tiered). Does NOT write the full uniform-tiering / per-sub-agent-effort-knob story. Mirrors `_preamble.md` § Per-Stage Model Resolution (see [pipeline/execution-skills.md](../pipeline/execution-skills.md) for the fab-continue rewrite). | diff --git a/docs/memory/_shared/index.md b/docs/memory/_shared/index.md index b8d8b454..f757f8a3 100644 --- a/docs/memory/_shared/index.md +++ b/docs/memory/_shared/index.md @@ -8,4 +8,4 @@ description: "Cross-cutting concerns spanning all domains — config.yaml/consti | File | Description | Last Updated | |------|-------------|-------------| | [configuration](configuration.md) | `config.yaml` schema (incl. `fab_version`, `review_tools`, `true_impact_exclude`, `test_paths`, `stage_hooks`, `agent.tiers` per-stage-model override — sole surface, per-field merge over fab-kit defaults, NO validation/provider-neutral, fixed non-overridable stage→tier mapping, l3ja; `stage_directives` + `model_tiers` removed in c5tr via migration 2.1.6-to-2.2.0), single fab-module parser `internal/config` + nil-safe accessors incl. the coupled-Unmarshal fallback caveat (ye8r), companion files (`context.md`, `code-quality.md`, `code-review.md` incl. `## Parsimony Pass` toggle and the wired `## Rework Budget` Max-cycles knob), `constitution.md` governance, 5 Cs of Quality, lifecycle management | 2026-06-13 | -| [context-loading](context-loading.md) | Smart context loading convention — descriptive 7-file always-load layer (skill file wins; exception set rule-derived from skill files, never enumerated in the preamble — d9rs), opt-in skill helpers (7-value allowlist incl. `_srad`/`_pipeline`/`_intake`) + stage-conditional loading, standard subagent context (orchestrators incl. `/fab-proceed`), per-stage model resolution at the dispatch seam (`fab resolve-agent `, provider-neutral, Claude-Code adapter named, review-resolves-once — l3ja; applies on every post-intake stage since the single-dispatch collapse, advisory only for a genuinely no-dispatch run — fgxx; the two halves dispatch through two seams — model via the Agent `model` param (short alias opus/sonnet/haiku/fable, id→alias mapped at the seam), effort via an imperative subagent-prompt instruction (no Agent effort param; omit when empty), plus a compliance-visibility expectation that each site surface the resolved `model=/effort=` — m3d4; residual = a per-sub-agent effort param on the Agent tool, a harness ask), selective domain loading, SRAD protocol pointer, scoped Next Steps Convention, generic fab-command failure rule (unconditional non-zero exit → STOP; `fab log command` exits 0 by contract) | 2026-06-13 | +| [context-loading](context-loading.md) | Smart context loading convention — descriptive 7-file always-load layer (skill file wins; exception set rule-derived from skill files, never enumerated in the preamble — d9rs), opt-in skill helpers (7-value allowlist incl. `_srad`/`_pipeline`/`_intake`) + stage-conditional loading, standard subagent context (orchestrators incl. `/fab-proceed`), per-stage model resolution at the dispatch seam (`fab resolve-agent `, provider-neutral, Claude-Code adapter named, review-resolves-once — l3ja; applies on every post-intake stage since the single-dispatch collapse, advisory only for a genuinely no-dispatch run — fgxx; the two halves dispatch through two seams — model via the Agent `model` param (short-alias enum opus/sonnet/haiku/fable, resolved with `fab resolve-agent --alias` so the alias is emitted directly — the deterministic adapter that superseded m3d4's prompt-side id→alias hand-map; yky7), effort via an imperative subagent-prompt instruction (no Agent effort param; omit when empty), plus a compliance-visibility expectation that each site surface the resolved `model=/effort=` — m3d4; residual = a per-sub-agent effort param on the Agent tool, a harness ask), selective domain loading, SRAD protocol pointer, scoped Next Steps Convention, generic fab-command failure rule (unconditional non-zero exit → STOP; `fab log command` exits 0 by contract) | 2026-06-13 | diff --git a/docs/memory/memory-docs/index.md b/docs/memory/memory-docs/index.md index 616bb066..0093da98 100644 --- a/docs/memory/memory-docs/index.md +++ b/docs/memory/memory-docs/index.md @@ -11,4 +11,4 @@ description: "Authoring docs/memory & docs/specs — the hydrate skills (ingest | [hydrate-generate](hydrate-generate.md) | `/docs-hydrate-memory` generate mode — codebase scanning, gap detection, interactive scoping, memory file generation + placement rules (target path, domain/sub-domain index stubs, shape bounds — d9rs) | 2026-06-12 | | [hydrate-specs](hydrate-specs.md) | `/docs-hydrate-specs` skill — structural gap detection between memory and specs, interactive propose-then-apply (incl. the no-target new-spec-file branch and aligned prompt/handler tokens — d9rs) | 2026-06-12 | | [specs-index](specs-index.md) | `docs/specs/` directory — pre-implementation specs, distinction from memory, bootstrap and context integration, per-skill SPEC mirror coverage + naming policy (`SPEC-{source-filename}.md`; `_cli-fab`/`_cli-external` excluded — uliv); mirrors are reserved paths for `docs-reorg-specs` (d9rs) | 2026-06-12 | -| [templates](templates.md) | Artifact templates (intake, plan), skill frontmatter (incl. `helpers:` + the one-shared-helper-per-phase decomposition `_generation`/`_review`/`_intake`/`_pipeline`, symmetry completed by 3xaj), and memory file format (incl. `description:` frontmatter + generated domains-only index, tciy). `plan.md` (`## Requirements` + `## Tasks` + `## Acceptance` + optional review-owned `## Deletion Candidates`) absorbs the former `spec.md` (j6cs) and the prior `tasks.md` + `checklist.md` pair; acceptance R# mandate scoped to requirement-derived categories, plan Assumptions = three grades, no Status header lines (uliv) | 2026-06-12 | +| [templates](templates.md) | Artifact templates (intake, plan), skill frontmatter (incl. `helpers:` + the one-shared-helper-per-phase decomposition `_generation`/`_review`/`_intake`/`_pipeline`, symmetry completed by 3xaj), and memory file format (incl. `description:` frontmatter + generated domains-only index, tciy). `plan.md` (`## Requirements` + `## Tasks` + `## Acceptance` + optional review-owned `## Deletion Candidates`) absorbs the former `spec.md` (j6cs) and the prior `tasks.md` + `checklist.md` pair; acceptance R# mandate scoped to requirement-derived categories, plan Assumptions = three grades, no Status header lines (uliv) | 2026-06-13 | diff --git a/docs/memory/pipeline/execution-skills.md b/docs/memory/pipeline/execution-skills.md index 76f621b6..12e316b6 100644 --- a/docs/memory/pipeline/execution-skills.md +++ b/docs/memory/pipeline/execution-skills.md @@ -17,7 +17,7 @@ Execution behavior (apply, review, hydrate) is accessed via `/fab-continue`, whi **Status-transition ownership (orchestrator/sequencer owns — single post-intake dispatch mode, fgxx)**: Intake is the **sole context boundary** — it is the only stage `/fab-continue` runs in the main session. **Every post-intake stage (apply / review / hydrate) is ALWAYS dispatched as a sub-agent**, the SAME dispatch the orchestrators (`_pipeline.md`) perform; there is no foreground execution path for those stages. The dual execution mode was collapsed in fgxx: the three former caller-aware "When invoked as a subagent: do NOT run `fab status`" conditionals (one each in Apply/Review/Hydrate Behavior) are removed, because with one execution mode the conditional has no second branch to express. The dispatch is identical whether the caller is **manual `/fab-continue`** (itself a **one-stage sequencer**) or an **orchestrator** (`/fab-ff`/`/fab-fff`). -The owning sequencer is a **pure sequencer**: *dispatch block → read returned status/findings → decide proceed / loop / stop*. It runs `fab resolve-agent ` immediately before each post-intake dispatch, **surfaces the resolved `model=/effort=`** (carried into the dispatch prompt and/or echoed in step output — Gap 1b compliance visibility, so a skipped/mis-resolved tier is visible rather than silent), and applies the tier through **two seams** (m3d4): the **model** half via the Agent tool's `model` param (empty ⇒ omit/inherit; the param takes a short alias `opus`/`sonnet`/`haiku`/`fable`, not the full `claude-*` id the resolver emits — the orchestrator maps id→alias at the seam) and the **effort** half via an imperative instruction in the subagent prompt (``Operate at `` reasoning effort for this task.``; omitted when empty — the Agent tool has no `effort` param). This same two-seam + visibility contract governs the whole `fab-ff`/`fab-fff` orchestrator family — the shared `_pipeline.md` bracket's per-stage dispatch note + Steps 1–3 + Auto-Rework re-apply/re-review, and `fab-fff.md` Steps 4–5 (ship/review-pr) — with `review` resolving **once** for both reviewers + merge (same model + same effort instruction). The canonical contract is `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution (mirrored in [_shared/context-loading.md](../_shared/context-loading.md) § Per-Stage Model Resolution); the lone residual is a first-class per-sub-agent `effort` param on the Agent tool, a harness ask outside fab's control. It owns the `fab status` transitions (`finish`/`fail`/`reset`) itself; it never reaches into block internals. Every `/fab-continue`-behavior subagent prompt includes "do NOT run `fab status` commands; return results only" — this is now the **universal block contract** carried in the dispatch prompt (stated once in `_pipeline.md`'s Behavior note), not a per-caller skip rule baked into the block. The block returns results/findings only; it never branches on caller and takes no §Verdict-style decision itself. (For hydrate the orchestrator runs the finish, e.g. `fab status finish hydrate fab-ff` for the auto path or `... fab-continue` for the manual sequencer.) The **failure-policy fork is invocation-level**, in the orchestrator, not the block: Path A (manual `/fab-continue`) runs the interactive § Verdict rework menu on the dispatched review block's returned findings; Paths B/C/D (`/fab-ff`/`/fab-fff`) run `_pipeline.md`'s autonomous Auto-Rework Loop. The block always returns findings; **who** acts on a fail verdict is the orchestrator's concern — no "skip §Verdict when subagent" flag re-encodes the removed conditional. +The owning sequencer is a **pure sequencer**: *dispatch block → read returned status/findings → decide proceed / loop / stop*. It runs `fab resolve-agent --alias` immediately before each post-intake dispatch, **surfaces the resolved `model=/effort=`** (carried into the dispatch prompt and/or echoed in step output — Gap 1b compliance visibility, so a skipped/mis-resolved tier is visible rather than silent), and applies the tier through **two seams** (m3d4): the **model** half via the Agent tool's `model` param (empty ⇒ omit/inherit; the param is a hard short-alias enum `opus`/`sonnet`/`haiku`/`fable`, not the full `claude-*` id the plain command emits — so the model half is resolved with `fab resolve-agent --alias`, which emits the alias directly on the `model=` line, a deterministic Go-side translation that superseded m3d4's prompt-side id→alias hand-map; yky7) and the **effort** half via an imperative instruction in the subagent prompt (``Operate at `` reasoning effort for this task.``; omitted when empty — the Agent tool has no `effort` param). This same two-seam + visibility contract governs the whole `fab-ff`/`fab-fff` orchestrator family — the shared `_pipeline.md` bracket's per-stage dispatch note + Steps 1–3 + Auto-Rework re-apply/re-review, and `fab-fff.md` Steps 4–5 (ship/review-pr) — with `review` resolving **once** for both reviewers + merge (same model + same effort instruction). The canonical contract is `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution (mirrored in [_shared/context-loading.md](../_shared/context-loading.md) § Per-Stage Model Resolution); the lone residual is a first-class per-sub-agent `effort` param on the Agent tool, a harness ask outside fab's control. It owns the `fab status` transitions (`finish`/`fail`/`reset`) itself; it never reaches into block internals. Every `/fab-continue`-behavior subagent prompt includes "do NOT run `fab status` commands; return results only" — this is now the **universal block contract** carried in the dispatch prompt (stated once in `_pipeline.md`'s Behavior note), not a per-caller skip rule baked into the block. The block returns results/findings only; it never branches on caller and takes no §Verdict-style decision itself. (For hydrate the orchestrator runs the finish, e.g. `fab status finish hydrate fab-ff` for the auto path or `... fab-continue` for the manual sequencer.) The **failure-policy fork is invocation-level**, in the orchestrator, not the block: Path A (manual `/fab-continue`) runs the interactive § Verdict rework menu on the dispatched review block's returned findings; Paths B/C/D (`/fab-ff`/`/fab-fff`) run `_pipeline.md`'s autonomous Auto-Rework Loop. The block always returns findings; **who** acts on a fail verdict is the orchestrator's concern — no "skip §Verdict when subagent" flag re-encodes the removed conditional. Ship and review-pr are the exception: `/git-pr` and `/git-pr-review` manage their own stage transitions internally, so `/fab-continue`'s ship and review-pr dispatch rows — which since w7dp pass the resolved change explicitly (`/git-pr {name}` / `/git-pr-review {name}`, the transient-override + branch-guard contract) and cite `active` only (`ready` is unreachable: ship's AllowedStates are `{pending, active, done, skipped}`, review-pr's add `failed`) — run `finish` only if the stage is still `active` after the behavior returns. The review-pr **Fail** branch carries the same only-if-still-active guard (uliv) — git-pr-review's Step 6 normally runs `fail` itself, so fab-continue runs `fail review-pr` only when the stage is still `active` after the behavior returns, never as a second (CLI-rejected) `fail`. And the review-pr **timeout** outcome deliberately leaves the stage `active` — report and stop, no re-finish. This matches `_review.md`'s "verdict transitions remain in each orchestrator's own file" and eliminates textually-mandated double-finish CLI errors. @@ -439,6 +439,7 @@ Steps execute 1→3 for safety. If interrupted, re-run detects folder already in | Change | Date | Summary | |--------|------|---------| +| 260613-yky7-resolve-agent-alias-flag | 2026-06-13 | **Every Agent-tool-dispatch `fab resolve-agent` call repointed to `--alias`** — the deterministic Go-side id→alias adapter that supersedes m3d4's prompt-side hand-map. The orchestrator/sequencer's per-stage model half is now resolved with `fab resolve-agent --alias` (the flag emits the Agent-tool-valid short alias directly on the `model=` line) at every dispatch site: `_pipeline.md`'s per-stage-model Behavior note + Steps 1/2/3 (apply/review/hydrate) + Auto-Rework re-apply/re-review items 3/4, `fab-fff.md` Steps 4–5 (ship/review-pr) + its per-stage-model note, `fab-ff.md`'s sibling note, and `fab-continue.md`'s one-stage-sequencer header note + post-intake dispatch contract + nested-reviewers note + dispatch-table rows (apply/review/hydrate). The Status-transition-ownership prose here (the pure-sequencer paragraph) is repointed: the model half resolves with `--alias`, no agent hand-maps the id. The effort seam (imperative subagent-prompt instruction) is unchanged from m3d4. The operator path deliberately stays on the plain `fab resolve-agent apply` (CLI `--model` accepts full IDs) — see [runtime/operator.md](../runtime/operator.md). Go: new `--alias` flag on `resolveAgentCmd` + exported `agent.ModelAlias` (prefix-matched, empty/non-Claude verbatim). The m3d4 row below is preserved as historical record of #413. Canonical contract + Design Decision: [_shared/context-loading.md](../_shared/context-loading.md) § Per-Stage Model Resolution + § `--alias` Flag. | | 260613-3xaj-extract-intake-helper | 2026-06-13 | **`/fab-proceed` create-intake dispatch rerouted to `_intake(promptless-defer)`** (Change B of the skill-architecture refactor trio — markdown/skill only, no Go changes). The `/fab-proceed` dispatch table's two **create-new rows** now read `_intake → /fab-switch → /git-branch → /fab-fff` (was `/fab-new → /fab-fff`): the create-an-intake sub-operation dispatches the shared `_intake` Create-Intake Procedure (`{questioning-mode} = promptless-defer`) instead of the full `/fab-new` skill, and because `_intake` stops at intake `ready` without activating or branching, the `/fab-switch` (activate) + `/git-branch` (branch) chain is now **required** for parity — **inverting** the w7dp claim that the create-new rows could drop `/git-branch` as a no-op (that held only while they invoked full `/fab-new`, whose Step 11 branched inline). `/fab-proceed`'s state-detection, relevance assessment, asymmetric-bias rule, bypass notes, Conversation Context Synthesis, and terminal `/fab-fff` delegation are all unchanged — only the create-an-intake leaf is rerouted. `/fab-draft` reconciled to a thin `_intake(interactive)` call-site (superseding the szxd "thin delta over fab-new" description; momentum warning retired). The `/fab-proceed.md` § heading renamed `fab-new Dispatch` → `Create-Intake Dispatch`. Stale prose swept (bypass-notes, prefix-step list, change-creation inventory, dispatch table + rationale). Full `_intake` extraction recorded in [planning-skills.md](planning-skills.md) § The `_intake` Shared Create-Intake Procedure. | | 260613-m3d4-uniform-stage-model-tier | 2026-06-13 | **Per-stage dispatch sites surface the resolved tier + inject effort via the subagent prompt (two-seam dispatch contract)** (Change C — skills/docs prose only, no Go; gated on `260613-fgxx`). Every per-stage dispatch site in the `fab-ff`/`fab-fff` orchestrator family — the shared `_pipeline.md` bracket (Behavior "Per-stage model resolution" note + Steps 1 apply / 2 review / 3 hydrate + Auto-Rework re-apply/re-review items 3/4) and `fab-fff.md` Steps 4–5 (ship/review-pr) — now (1) **surfaces** the resolved `model=/effort=` after `fab resolve-agent ` (carried into the dispatch prompt and/or echoed in step output, so a skipped/mis-resolved tier is **visible rather than silent** — Gap 1b compliance visibility), and (2) applies the tier through **two seams**: the **model** half via the Agent tool's `model` param (unchanged) and the **effort** half via an imperative instruction in the subagent prompt (``Operate at `` reasoning effort for this task.``, omitted when empty — Gap 2's effort half, previously left as fgxx's deferred follow-up; the Agent tool has no `effort` param). `review` resolves **once** for both reviewers + merge (same model + same effort instruction). `fab-continue.md`'s one-stage-sequencer header note + post-intake dispatch contract + nested-reviewers note carry the same two-seam phrasing; `fab-ff.md`'s sibling note (structural twin of fab-fff's) reconciled to match. The harness-adapter boundary records the **id→alias** detail (the `model` param takes `opus`/`sonnet`/`haiku`/`fable`, not the full `claude-*` id the resolver emits). The lone residual is the **harness ask** — a first-class per-sub-agent `effort` param on the Agent tool, out of fab's control, not built. SPEC mirrors updated same-PR: `SPEC-_pipeline.md`, `SPEC-_preamble.md`, `SPEC-fab-fff.md`, `SPEC-fab-ff.md`, `SPEC-fab-continue.md`. `docs/findings/per-stage-model-tier-application.md` (Gap 1a closed by fgxx, Gap 1b + Gap 2-effort addressed here, item 4 = residual) and `docs/specs/stage-models.md` reconciled. Canonical contract: [_shared/context-loading.md](../_shared/context-loading.md) § Per-Stage Model Resolution. | | 260613-fgxx-collapse-post-intake-dual-mode | 2026-06-13 | **Collapsed the post-intake dual execution mode — one execution mode, orchestrators as pure sequencers** (skills+specs prose refactor + one Go test; no production Go change, no new CLI command). Guiding principle (newly named): **intake is the sole context boundary** — the main session runs up to and including intake; after intake the intake artifact IS the context and every post-intake stage (apply/review/hydrate) runs as a dispatched sub-agent fed by `intake.md` + `plan.md` + `docs/memory/`. `fab-continue.md`: the three caller-aware "When invoked as a subagent: do NOT run `fab status`" blockquote conditionals (Apply/Review/Hydrate Behavior) are **removed** — with one execution mode there is no second branch; plain `/fab-continue` is now a **one-stage sequencer** that runs `fab resolve-agent ` then dispatches the stage's block exactly as the orchestrators do, and owns the `finish`/`fail`/`reset` transition after it returns. The "do NOT run `fab status`; return results only" prompt instruction is now the **universal block contract** (carried in the dispatch prompt), not a per-caller skip rule in the block; it is NOT re-encoded as any "skip §Verdict when subagent" flag. The interactive § Verdict rework menu (Path A) and `_pipeline.md`'s autonomous Auto-Rework Loop (Paths B/C/D) **remain** as the invocation-level failure-policy fork. Header note reconciled from "foreground = advisory-only" to the one-stage-sequencer framing. `_pipeline.md` Behavior note affirmed as **pure sequencer** (dispatch → read → proceed/loop/stop) with the universal-contract framing; per-cycle rework choreography (fail+reset pair) and the history-shape invariant **unchanged in semantics**. `_preamble.md` § Per-Stage Model Resolution "Foreground is advisory-only" minimally reconciled (residual advisory only for a genuinely no-dispatch run; closes Gap 1a — uniform tiering across apply/review/hydrate; full uniform-tier closure deferred to a follow-up). New Go test `TestHistoryShape_IdenticalRegardlessOfDriver` in `internal/status/mutators_test.go` pins that the manual (`driver=""`) and dispatched (`driver="fab-fff"`) paths leave `.history.jsonl` entries of identical shape — agreeing on every caller-blind field (stage/action/from/reason), equal modulo the per-run `ts` timestamp and the optional `driver` field (see [schemas.md](schemas.md)). SPEC mirrors updated same-PR: `SPEC-fab-continue.md` (f006 → universal block contract), `SPEC-_pipeline.md`, `SPEC-_preamble.md`, `docs/specs/stage-models.md` § Foreground limitation. `runtime/operator.md` verified — no foreground-post-intake assumption (no-op). | diff --git a/docs/memory/pipeline/index.md b/docs/memory/pipeline/index.md index 7f572351..2a74eeaa 100644 --- a/docs/memory/pipeline/index.md +++ b/docs/memory/pipeline/index.md @@ -10,6 +10,6 @@ description: "The change pipeline — stage lifecycle & state machine, planning/ | [change-lifecycle](change-lifecycle.md) | Change naming, folder structure, `.status.yaml` (6-stage pipeline + `plan:` block, flock-serialized writes + fsync durability + sparse-key insertion), `.fab-status.yaml` symlink (atomic temp+rename swap, dangling-target validation), `stage_hooks` pre/post commands around start/finish (documented in c5tr), git integration (bookkeeping decoupled; ship-time branch↔change hard guard since w7dp), `/fab-status` (six-glyph progress legend incl. `⏭` skipped), `/fab-switch` (six-state `{state}` qualifier), backlog scanning | 2026-06-12 | | [clarify](clarify.md) | `/fab-clarify` skill — intake-only (j6cs), dual modes (suggest/auto), intake taxonomy scan, structured questions, coverage reports, audit trail, grade reclassification, always-recompute intake score; hosts the [AUTO-MODE] Skill Invocation Protocol and is sole bulk-confirm authority (zc9m); bulk confirm evaluated before the zero-gaps exit, confirmations graded by recomputed composite (no fiat Certain), unified audit-trail placement rules (c5tr) | 2026-06-12 | | [execution-skills](execution-skills.md) | Apply (unified `plan.md` `## Requirements`+`## Tasks`+`## Acceptance` generated at entry), review, hydrate, archive, and orchestrator behavior — `/fab-continue` for pipeline stages, `/fab-archive` for housekeeping, `/fab-proceed` for context-aware pipeline orchestration; `/git-pr` ship behavior with the mechanically-rendered `## Meta` block via `fab pr-meta`, a unified Step 0 resolution (szxd), and git-state hardening — detached-HEAD STOP, expected-area staging guard, PR-state branching, resolved default-branch guard (g8st); `/git-pr-review` split commit/push failure semantics + unpushed-commit re-run gate (g8st); fab-continue loads `_generation`/`_review` stage-conditionally at point of use (zc9m); the shared ff/fff bracket lives in `_pipeline.md` with once-stated rework choreography (cycle cap = the `{max_cycles}` knob from code-review.md § Rework Budget, default 3 — c5tr), exhaustion terminal state review:failed, and fab-continue's review-failed rework-menu dispatch row (szxd); re-archiving a genuinely archived change soft-skips (exit 0) and restore `--switch` surfaces `pointer: failed` (k4ge); archive/restore index writes are atomic with honest `index: failed` reporting on both paths (hv7t); `/git-pr`/`/git-pr-review` accept an optional explicit `` argument and guard branch↔change correspondence before any mutation, fab-continue gained the review-pr/failed dispatch row + idempotent reset, and recovery guidance is override-aware (w7dp). Operator coordination moved to runtime/operator.md. | 2026-06-13 | -| [planning-skills](planning-skills.md) | `/fab-new`, `/fab-continue`, `/fab-ff`, `/fab-clarify` — the planning stage (intake only) and the shared `_generation.md` partial (Intake + unified Plan procedures); requirement capture + plan generation live at apply entry (spec stage removed in j6cs); fab-new/fab-draft re-run/resume semantics (ID collisions route to resume, 9u91); change_type is hook-owned — skills verify via .status.yaml and override only if wrong (uliv); SRAD framework lives in the `_srad` helper, declared by the 6 planning skills (zc9m); pre-boundary intake creation (fab-new Steps 0–9) lives in the shared `_intake` helper parameterized by `{questioning-mode}` (interactive | promptless-defer) — fab-new/fab-draft/fab-proceed are thin call-sites, completing the one-shared-helper-per-phase symmetry with `_pipeline`; fab-new keeps the activate/branch tail, fab-draft stops at ready (momentum warning retired), fab-proceed dispatches promptless-defer + chains /fab-switch→/git-branch (3xaj); fab-ff/fab-fff are thin wrappers over the shared `_pipeline` bracket, fab-new Step 11 is a single first-match-wins branch table (szxd; 6 rows since g8st — remote-only `--track` checkout, dirty-tree carried-over note excluding the change's own artifacts, same-change rename); SRAD grades by half-open thresholds with one `R<25 AND A<25` Critical-Rule definition, the plan walk emits `## Assumptions` explicitly, artifacts always carry the section (omit-when-zero is display-only), fab-new output puts the Assumptions block last (c5tr); fab-new's backlog-ID collision pre-check is exact-ID anchored (`fab resolve --id` equality) and fab-proceed's promptless `_intake` dispatch carries the defer-and-surface contract with the matching `_srad` Critical-Rule carve-out (w7dp); fab-new/fab-draft `Next:` lines derive at runtime per the _preamble Lookup Procedure and `_generation` names fab-continue in both consumer groups (d9rs) | 2026-06-12 | +| [planning-skills](planning-skills.md) | `/fab-new`, `/fab-continue`, `/fab-ff`, `/fab-clarify` — the planning stage (intake only) and the shared `_generation.md` partial (Intake + unified Plan procedures); requirement capture + plan generation live at apply entry (spec stage removed in j6cs); fab-new/fab-draft re-run/resume semantics (ID collisions route to resume, 9u91); change_type is hook-owned — skills verify via .status.yaml and override only if wrong (uliv); SRAD framework lives in the `_srad` helper, declared by the 6 planning skills (zc9m); pre-boundary intake creation (fab-new Steps 0–9) lives in the shared `_intake` helper parameterized by `{questioning-mode}` (interactive | promptless-defer) — fab-new/fab-draft/fab-proceed are thin call-sites, completing the one-shared-helper-per-phase symmetry with `_pipeline`; fab-new keeps the activate/branch tail, fab-draft stops at ready (momentum warning retired), fab-proceed dispatches promptless-defer + chains /fab-switch→/git-branch (3xaj); fab-ff/fab-fff are thin wrappers over the shared `_pipeline` bracket, fab-new Step 11 is a single first-match-wins branch table (szxd; 6 rows since g8st — remote-only `--track` checkout, dirty-tree carried-over note excluding the change's own artifacts, same-change rename); SRAD grades by half-open thresholds with one `R<25 AND A<25` Critical-Rule definition, the plan walk emits `## Assumptions` explicitly, artifacts always carry the section (omit-when-zero is display-only), fab-new output puts the Assumptions block last (c5tr); fab-new's backlog-ID collision pre-check is exact-ID anchored (`fab resolve --id` equality) and fab-proceed's promptless `_intake` dispatch carries the defer-and-surface contract with the matching `_srad` Critical-Rule carve-out (w7dp); fab-new/fab-draft `Next:` lines derive at runtime per the _preamble Lookup Procedure and `_generation` names fab-continue in both consumer groups (d9rs) | 2026-06-13 | | [preflight](preflight.md) | `lib/preflight.sh` script — validation, accessor-based architecture, structured YAML output, skill integration | 2026-06-12 | | [schemas](schemas.md) | Workflow schema authority — the Go state machine (`internal/status` transitions + `internal/statusfile` stage order/progress; declarative `workflow.yaml` retired in c5tr): 6-stage pipeline, states, transitions, validation rules; `.status.yaml` `plan:` (`## Requirements`-aware), `confidence:` (indicative retired), and lazy `true_impact:` block schemas (incl. the `tests` sub-block + render-time `impl` residual, 7t5a); `fab impact` and `fab pr-meta` helper subcommands (rj31); allowed-states-enforced transition targets, `fab score --check-gate` non-zero gate-fail exit, iterations-preserving reset cascade (k4ge); `fab score` normal-mode hard-fail on load/persist/read errors (hv7t); per-stage allowed-states + transition-event tables enumerated, pinned by the exhaustive 216-cell matrix test (tb6f) | 2026-06-13 | diff --git a/docs/specs/skills/SPEC-_preamble.md b/docs/specs/skills/SPEC-_preamble.md index 24314fa3..707016e0 100644 --- a/docs/specs/skills/SPEC-_preamble.md +++ b/docs/specs/skills/SPEC-_preamble.md @@ -2,7 +2,7 @@ ## Summary -Shared context preamble loaded by every Fab skill. Defines path conventions, context loading layers (always-load — descriptive, with a skill-file-wins override and a derived, never-enumerated exception set; change context; memory lookup; source code), the **Skill Helper Declaration** frontmatter convention (including stage-conditional in-body loading), inlined **Naming Conventions**, inlined **Run-Kit (rk) Reference**, the **Common fab Commands** headline table, the next-steps convention (with a skill-file-declared ending opt-out) with state table, a pointer to the skill invocation protocol (defined in `fab-clarify.md` since 260611-zc9m), subagent dispatch pattern with standard subagent context and **Per-Stage Model Resolution** (260613-l3ja — `fab resolve-agent ` before each pipeline-stage dispatch; resolved model+effort passed to the Agent dispatch with empty ⇒ omit/inherit; review resolves once for both reviewers + merge; per-stage selection applies on every post-intake stage — every post-intake stage now dispatches a sub-agent (including plain `/fab-continue` as a one-stage sequencer), so `fab resolve-agent` applies uniformly across apply/review/hydrate, with the residual advisory case narrowed to a stage skill genuinely run with no dispatch at all (260613-fgxx); **the two halves dispatch through two seams (260613-m3d4)** — model via the Agent tool `model` param (which takes a short alias `opus`/`sonnet`/`haiku`/`fable`, so the orchestrator maps the resolved full id → alias at the seam) and effort via an explicit imperative instruction in the subagent prompt (the Agent tool has no effort param; omitted when empty), plus a **compliance-visibility** expectation that each site surface the resolved `model=/effort=` so a skipped/mis-resolved tier is visible rather than silent; resolution itself stays provider-neutral; the lone residual is a first-class per-sub-agent effort param on the Agent tool — a harness ask outside fab's control), a pointer to the SRAD autonomy framework (extracted to `_srad.md` in 260611-zc9m), and slimmed confidence scoring (gate threshold + invocation; schema/formula/template moved to `_cli-fab.md` § fab score). +Shared context preamble loaded by every Fab skill. Defines path conventions, context loading layers (always-load — descriptive, with a skill-file-wins override and a derived, never-enumerated exception set; change context; memory lookup; source code), the **Skill Helper Declaration** frontmatter convention (including stage-conditional in-body loading), inlined **Naming Conventions**, inlined **Run-Kit (rk) Reference**, the **Common fab Commands** headline table, the next-steps convention (with a skill-file-declared ending opt-out) with state table, a pointer to the skill invocation protocol (defined in `fab-clarify.md` since 260611-zc9m), subagent dispatch pattern with standard subagent context and **Per-Stage Model Resolution** (260613-l3ja — `fab resolve-agent ` before each pipeline-stage dispatch; resolved model+effort passed to the Agent dispatch with empty ⇒ omit/inherit; review resolves once for both reviewers + merge; per-stage selection applies on every post-intake stage — every post-intake stage now dispatches a sub-agent (including plain `/fab-continue` as a one-stage sequencer), so `fab resolve-agent` applies uniformly across apply/review/hydrate, with the residual advisory case narrowed to a stage skill genuinely run with no dispatch at all (260613-fgxx); **the two halves dispatch through two seams (260613-m3d4)** — model via the Agent tool `model` param (a hard enum of short aliases `opus`/`sonnet`/`haiku`/`fable`, resolved with `fab resolve-agent --alias` so the alias is emitted directly — the deterministic Agent-tool adapter that replaced the earlier prompt-side id→alias hand-mapping; 260613-yky7) and effort via an explicit imperative instruction in the subagent prompt (the Agent tool has no effort param; omitted when empty), plus a **compliance-visibility** expectation that each site surface the resolved `model=/effort=` so a skipped/mis-resolved tier is visible rather than silent; resolution itself stays provider-neutral; the lone residual is a first-class per-sub-agent effort param on the Agent tool — a harness ask outside fab's control), a pointer to the SRAD autonomy framework (extracted to `_srad.md` in 260611-zc9m), and slimmed confidence scoring (gate threshold + invocation; schema/formula/template moved to `_cli-fab.md` § fab score). This is an internal partial (`user-invocable: false`) — it is never invoked directly. Skills reference it via the opening instruction: "Read the `_preamble` skill first (deployed to `.claude/skills/` via `fab sync`). Then follow its instructions before proceeding." @@ -93,8 +93,9 @@ Skill reads _preamble.md │ resolved model=/effort= (visibility — a skip is │ then detectable; 260613-m3d4), then dispatch via │ TWO SEAMS: model → Agent tool `model` param -│ (empty ⇒ omit/inherit; param takes a short alias -│ opus/sonnet/haiku/fable, orchestrator maps id→alias) +│ (empty ⇒ omit/inherit; param is a short-alias enum +│ opus/sonnet/haiku/fable — resolve with +│ `fab resolve-agent --alias`, yky7) │ and effort → imperative instruction in the subagent │ prompt (no Agent effort param; empty ⇒ omit; m3d4). │ Resolution itself is provider-neutral; diff --git a/docs/specs/stage-models.md b/docs/specs/stage-models.md index d91b9736..05bc47d0 100644 --- a/docs/specs/stage-models.md +++ b/docs/specs/stage-models.md @@ -208,7 +208,7 @@ resolved `model=/effort=` lines (so a skipped or mis-resolved tier is visible in silent — the available stand-in for an enforcement guard, since dispatch is harness-internal), and apply the resolved **model AND effort** through their two seams: -- **Model → the Agent tool's `model` param.** Empty model → omit it (inherit session/orchestrator model — today's behavior). +- **Model → the Agent tool's `model` param.** The Agent `model` param is a hard enum of short aliases (`opus`/`sonnet`/`haiku`/`fable`) that rejects full IDs, so the model half is resolved with `fab resolve-agent --alias` — the `--alias` flag emits the Agent-tool-valid short alias directly on the `model=` line (see § Harness-adapter boundary). Empty model → omit it (inherit session/orchestrator model — today's behavior). - **Effort → an explicit instruction in the subagent prompt.** The Agent tool has no `effort` param, so the resolved effort is injected as an imperative line in the dispatched prompt (e.g., ``Operate at `xhigh` reasoning effort for this task.``) and the sub-agent self-selects. Empty effort → omit the instruction. (The effort half is therefore **no longer dropped** — earlier wiring had no seam for it; it now rides the prompt. The clean fix, a first-class per-sub-agent effort parameter on the Agent tool, is a harness ask outside fab's control — see § Foreground limitation's scope note.) The **`review` stage resolves once** and applies the same `{model, effort}` to BOTH reviewer sub-agents @@ -231,14 +231,21 @@ Per-stage selection is **provider-neutral by construction**, not Claude-locked: and nothing in fab rejects it. - *Harness-specific layer (the adapter):* injecting the resolved model+effort into the actual sub-agent dispatch is harness behavior, and the two halves use **two different seams** in Claude - Code. **The model rides the Agent tool's `model` parameter** (which takes a short alias — - `opus`/`sonnet`/`haiku`/`fable` — not the full versioned id `fab resolve-agent` emits, so the - orchestrator maps id → alias at the seam); **the effort rides an instruction in the subagent prompt** - (the Agent tool exposes no effort parameter). The skill wiring names both explicitly as the - Claude-Code adapter, not as universal truth. This coupling is **not introduced by this feature** — - fab's entire existing subagent-dispatch design (`_preamble.md` § Subagent Dispatch) is already - Claude-Code-shaped. Per-stage selection is exactly as portable as fab's existing dispatch: no more, - no less. + Code. **The model rides the Agent tool's `model` parameter** — a hard enum that takes a short alias + (`opus`/`sonnet`/`haiku`/`fable`), not the full versioned id the plain resolver emits — so the model + half is resolved with **`fab resolve-agent --alias`**, the deterministic Agent-tool adapter: + the `--alias` flag maps the resolved full ID to its short alias on the `model=` line (prefix-matched, + so dated variants like `claude-haiku-4-5-20251001` resolve to `haiku`; empty ⇒ empty inherit-signal; + a non-Claude override passes through verbatim). This replaces the earlier prompt-side hand-mapping + instruction (where the orchestrator was told to translate the id by hand on every dispatch — brittle + and easy to fumble) with a Go-side translation that cannot be skipped. **The effort rides an + instruction in the subagent prompt** (the Agent tool exposes no effort parameter). The skill wiring + names both explicitly as the Claude-Code adapter, not as universal truth. This coupling is **not + introduced by this feature** — fab's entire existing subagent-dispatch design (`_preamble.md` § + Subagent Dispatch) is already Claude-Code-shaped. Per-stage selection is exactly as portable as fab's + existing dispatch: no more, no less. *(The operator launcher path is the deliberate exception — it + appends `--model ` to a `claude` CLI invocation, which accepts full IDs, so it resolves + WITHOUT `--alias`.)* - *Claude-flavored data (overridable):* fab-kit's shipped default table uses Claude model IDs/effort. These are documented as "fab-kit's Claude defaults," fully replaceable via `agent.tiers`. - *v1 scope is architecture-neutral + documented — NOT shipped/tested against a non-Claude harness.* No diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl b/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl index a0c7cc0d..9c4c037b 100644 --- a/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl @@ -2,3 +2,11 @@ {"args":"Add --alias flag to fab resolve-agent that emits Claude-Code short model alias (opus/sonnet/haiku/fable) instead of full model ID, fixing Agent-tool dispatch enum rejection","cmd":"fab-new","event":"command","ts":"2026-06-13T12:44:37Z"} {"delta":"+3.8","event":"confidence","score":3.8,"trigger":"calc-score","ts":"2026-06-13T12:45:53Z"} {"delta":"+0.0","event":"confidence","score":3.8,"trigger":"calc-score","ts":"2026-06-13T12:45:58Z"} +{"delta":"+0.3","event":"confidence","score":4.1,"trigger":"calc-score","ts":"2026-06-13T14:49:59Z"} +{"delta":"+0.0","event":"confidence","score":4.1,"trigger":"calc-score","ts":"2026-06-13T14:50:03Z"} +{"cmd":"fab-fff","event":"command","ts":"2026-06-13T14:50:35Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"apply","ts":"2026-06-13T14:50:39Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"review","ts":"2026-06-13T14:59:57Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"hydrate","ts":"2026-06-13T15:06:52Z"} +{"event":"review","result":"passed","ts":"2026-06-13T15:06:52Z"} +{"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"ship","ts":"2026-06-13T15:11:58Z"} diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml b/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml index c485c505..d0065156 100644 --- a/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml @@ -5,31 +5,49 @@ created_by: sahil-noon change_type: fix issues: [] progress: - intake: ready - apply: pending - review: pending - hydrate: pending - ship: pending + intake: done + apply: done + review: done + hydrate: done + ship: active review-pr: pending plan: - generated: false - task_count: 0 - acceptance_count: 0 - acceptance_completed: 0 + generated: true + task_count: 12 + acceptance_count: 15 + acceptance_completed: 15 confidence: - certain: 3 - confident: 4 + certain: 4 + confident: 3 tentative: 0 unresolved: 0 - score: 3.8 + score: 4.1 fuzzy: true dimensions: - signal: 82.1 + signal: 83.6 reversibility: 81.4 - competence: 87.9 - disambiguation: 85.0 + competence: 88.1 + disambiguation: 85.7 stage_metrics: - intake: {started_at: "2026-06-13T12:44:37Z", driver: fab-new, iterations: 1} + intake: {started_at: "2026-06-13T12:44:37Z", driver: fab-new, iterations: 1, completed_at: "2026-06-13T14:50:39Z"} + apply: {started_at: "2026-06-13T14:50:39Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T14:59:57Z"} + review: {started_at: "2026-06-13T14:59:57Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T15:06:52Z"} + hydrate: {started_at: "2026-06-13T15:06:52Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T15:11:58Z"} + ship: {started_at: "2026-06-13T15:11:58Z", driver: fab-fff, iterations: 1} prs: [] +true_impact: + added: 231 + deleted: 0 + net: 231 + excluding: + added: 0 + deleted: 0 + net: 0 + tests: + added: 0 + deleted: 0 + net: 0 + computed_at: "2026-06-13T15:11:58Z" + computed_at_stage: hydrate # true_impact: lazily created on first apply-finish (no placeholder here). -last_updated: 2026-06-13T12:46:03Z +last_updated: 2026-06-13T15:11:58Z diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/intake.md b/fab/changes/260613-yky7-resolve-agent-alias-flag/intake.md index 4cde62f8..ade44f5f 100644 --- a/fab/changes/260613-yky7-resolve-agent-alias-flag/intake.md +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/intake.md @@ -25,34 +25,42 @@ The Agent tool's `model` parameter is a hard JSON-schema enum: `["sonnet","opus" It rejects full IDs. The CLI `--model` flag accepts both (`claude --help`: "Provide an alias ... or a model's full name"). -Interaction mode: conversational. The user chose the fix shape explicitly: -- **Fix via a `--alias` flag** (encode the mapping deterministically in Go), rejecting the - prompt-only hand-mapping option — *because the live failure was precisely an agent fumbling the - hand-map*. -- The user also raised "put aliases in the tiers instead of full IDs"; this was analyzed and - **rejected** (it would break provider-neutrality, weaken the Fable version-pin discipline, and - require a coordinated edit across the Go map + two drift-guarded spec tables + config comments + - migration — pushing a harness quirk into the provider-neutral core). See Design Decisions. +**Prior art — PR #413 (`m3d4`, merged) already documents this mismatch, but as a manual +instruction.** #413 ("Apply per-stage model tier uniformly") rewrote the dispatch-wiring prose +across `_preamble.md`, `fab-ff/fff/continue.md`, and `stage-models.md` and added an explicit note: +the Agent `model` param takes a short alias, *"so the orchestrator maps the resolved id to the +alias at the dispatch seam."* That is the **prompt-side hand-mapping** approach — it tells the +dispatching agent to translate the id by hand on every dispatch. **This change replaces that +brittle instruction with a deterministic Go-side translation**: a `--alias` flag on +`fab resolve-agent` that emits the alias directly, so no agent ever hand-maps. (The live failure +*was* an agent fumbling exactly that hand-map — encoding it in Go removes the failure mode.) + +Interaction mode: conversational. Decisions taken in discussion: +- **Fix via a `--alias` flag** (encode the mapping deterministically in Go), explicitly rejecting + the prompt-only hand-mapping option (which is what #413 currently ships). +- **Do NOT switch tier defaults to aliases.** Analyzed and rejected — it would break + provider-neutrality, weaken the Fable version-pin discipline, and force a coordinated edit across + the Go map + two drift-guarded spec tables + config comments + migration, pushing a harness quirk + into the provider-neutral core. See Design Decisions / Assumptions. ## Why -**Problem.** The orchestrated pipeline (`/fab-ff`, `/fab-fff`, `/fab-continue`) is *broken at the -Agent-tool dispatch seam* whenever a stage resolves to a full model ID — which is every stage -under the shipped defaults. Per-stage model selection (feature #406/#407) cannot actually dispatch -a sub-agent with its resolved model without a manual workaround. +**Problem.** The orchestrated pipeline (`/fab-ff`, `/fab-fff`, `/fab-continue`) dispatches every +post-intake stage as a sub-agent, and per #413 each dispatch is *instructed to hand-map* the +resolved full model ID to the Agent-tool alias. Hand-mapping at the prompt layer is exactly the +step that failed in the live run — it is brittle and easy for a dispatching agent to skip or get +wrong. -**Consequence if unfixed.** Every orchestrated run either fails at first sub-agent dispatch or -silently depends on the agent improvising a full-ID→alias mapping by hand — exactly the -error-prone step that failed in the live run. Per-stage model selection is effectively unusable in -its primary (orchestrated) mode. +**Consequence if unfixed.** Every orchestrated dispatch carries an avoidable failure mode: the +agent must remember to translate `claude-opus-4-8` → `opus` (and the dated/family variants) +correctly, every time, by following prose. A single miss reproduces the original +"Invalid tool parameters" failure. **Why this approach.** The mismatch is genuinely **specific to the Claude-Code Agent-tool -surface**, not to fab's resolution logic. `stage-models.md` already names "injecting the resolved -model into the Agent dispatch" as the **harness-adapter boundary** — the one Claude-Code-specific -layer. The fix belongs *exactly there*: a `--alias` flag that emits the Claude-Code alias on -demand, leaving the provider-neutral default (full ID) untouched for the CLI/operator path. -Encoding the mapping in Go (vs. prompt-side) makes it deterministic — the resolver, not a -per-dispatch agent guess, owns the translation. +surface**, which `stage-models.md` already names as the **harness-adapter boundary**. Moving the +translation from a per-dispatch prose instruction into a deterministic resolver flag (`--alias`) +puts it at exactly that boundary and makes it impossible to fumble. The provider-neutral default +(full ID) is untouched for the CLI/operator path; `--alias` is an opt-in Claude-Code adapter. ## What Changes @@ -99,50 +107,59 @@ Mapping (prefix-based to absorb dated variants like `claude-haiku-4-5-20251001`) line stays empty under `--alias`. - **Unmapped / non-Claude ID** (e.g. `gpt-5`) → **returned verbatim** (pass-through). This keeps `--alias` from becoming a Claude-only validator: a project that overrode a tier to another - provider's model still gets its string through unchanged. (Decision point — see Open Questions - / Assumptions; leaning verbatim pass-through.) + provider's model still gets its string through unchanged. The flag wiring formats output via the existing `formatAgentProfile` path — `resolveAgentCmd` applies `ModelAlias` to `profile.Model` before formatting when `--alias` is set (alternatively, pass a flag into a small format variant; plan decides the cleanest seam). The `model=` / `effort=` line contract and byte-stability are otherwise unchanged. -### 3. Skill wiring — orchestrator/dispatch use `--alias` for the Agent-tool path - -The canonical instruction lives in **`src/kit/skills/_preamble.md` § Subagent Dispatch → -Per-Stage Model Resolution**, specifically the **"Harness-adapter boundary (Claude Code)"** -paragraph (~line 346). Today it says the resolved model goes into "the Agent tool's `model` -parameter" but does not account for the enum mismatch. Update it to instruct: **for the -Claude-Code Agent-tool dispatch, resolve with `fab resolve-agent --alias`** so the emitted -`model=` is already an Agent-tool-valid alias; pass it straight into the Agent tool `model` param -(empty ⇒ omit/inherit, unchanged). - -The three consuming skills echo the preamble and must be updated to pass `--alias` (or to defer to -the preamble's updated instruction — keep the existing defer-to-preamble pattern, just ensure the -`--alias` requirement is unambiguous): -- `src/kit/skills/fab-ff.md` (line ~37 note) -- `src/kit/skills/fab-fff.md` (lines ~37, ~47, ~57 notes) -- `src/kit/skills/fab-continue.md` (line ~154 review-dispatch note) - -**Operator launcher path is NOT changed.** `fab operator` / the `_cli-fab.md:554` path appends -`--model ` to a `claude` **CLI** invocation, which accepts full IDs. It must keep +### 3. Skill wiring — REPOINT the existing (post-#413) adapter prose at `--alias` + +> **Note**: #413 already wrote the id→alias adapter prose into these files as a *hand-mapping* +> instruction ("the orchestrator maps the resolved id to the alias at the dispatch seam"). This +> change does NOT add new adapter docs — it **edits the existing prose** to say "resolve with +> `fab resolve-agent --alias` (emits the alias directly)" instead of "map the id by hand". +> Each site below currently contains a hand-map phrasing that must be repointed. + +- **`src/kit/skills/_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution** — the + "Harness-adapter boundary (Claude Code)" paragraph currently says "the orchestrator maps the + resolved id to the alias at the dispatch seam." Repoint to: resolve the model half with + `fab resolve-agent --alias`, which emits an Agent-tool-valid alias on the `model=` line; + pass it straight into the Agent `model` param (empty ⇒ omit/inherit). (Canonical instruction.) +- **`src/kit/skills/fab-ff.md`** (per-stage-model note, ~line 37) — currently "model via the Agent + tool's `model` param". Repoint the resolve call to `fab resolve-agent --alias` for the + model half. +- **`src/kit/skills/fab-fff.md`** (per-stage-model note ~line 37; ship-resolve ~line 47; review-pr + resolve ~line 57) — same repointing at each `fab resolve-agent ...` call used for an Agent-tool + dispatch. +- **`src/kit/skills/fab-continue.md`** (lines ~19, ~52, ~161) — the one-stage-sequencer note, the + sub-agent dispatch contract, and the nested-reviewers note each resolve a stage for Agent-tool + dispatch; repoint each to `--alias`. + +**Effort half is unchanged.** #413 routes effort via a subagent-prompt instruction (the Agent tool +has no effort param). `--alias` touches only the `model=` line; the effort-prompt seam is left +exactly as #413 shipped it. + +**Operator launcher path is NOT changed.** `fab operator` / the `_cli-fab.md` operator path +appends `--model ` to a `claude` **CLI** invocation, which accepts full IDs. It keeps resolving **without** `--alias`. The CLI and Agent-tool paths deliberately diverge. ### 4. CLI reference — `_cli-fab.md` Constitution constraint (CLI changes MUST update `_cli-fab.md`): update the `fab resolve-agent` -entry (§ around line 227) to document the `--alias` flag — what it emits, that it's a Claude-Code -Agent-tool adapter, that default behavior is unchanged (full ID), and that empty/non-Claude models -pass through verbatim. +entry (§ around line 217) to document the `--alias` flag — what it emits (short alias on the +`model=` line), that it's a Claude-Code Agent-tool adapter, that default behavior is unchanged +(full ID), that the `effort=` line is unaffected, and that empty/non-Claude models pass through +verbatim. ### 5. Spec — `docs/specs/stage-models.md` -Update **§ Harness-adapter boundary (the only Claude-Code-specific layer)** to document the -two-surface vocabulary split and the `--alias` flag as the adapter mechanism for the Agent-tool -path. This is a spec-of-design update (the spec already frames this boundary as the Claude-Code -adapter — we're making the alias mechanism concrete). The two **drift-guarded tables** (default -tier profiles, stage→tier mapping) are **NOT touched** — full IDs stay canonical, so -`TestDocTablesMatchAgentMaps` is unaffected. +Update the adapter prose (§ Skill wiring and § Harness-adapter boundary) where #413 wrote "the +orchestrator maps the resolved id to the alias at the dispatch seam" — change it to describe the +`--alias` flag as the deterministic mechanism for the Agent-tool model half. The two +**drift-guarded tables** (default tier profiles, stage→tier mapping) are **NOT touched** — full +IDs stay canonical, so `TestDocTablesMatchAgentMaps` is unaffected. ### 6. Go test coverage @@ -156,37 +173,38 @@ Add tests (test-alongside, `**/*_test.go`): ## Affected Memory - `pipeline/stage-models`: (modify) — the resolve-agent `--alias` flag and the two-surface - (CLI vs Agent-tool) adapter split. *(Exact memory file path confirmed at hydrate — the pipeline - domain owns stage-models / dispatch wiring; this may be the stage-models memory file or the - dispatch-wiring file. No new spec-level behavior beyond the documented adapter mechanism.)* + (CLI vs Agent-tool) adapter split; supersedes the #413 hand-mapping instruction with a + deterministic resolver flag. *(Exact memory file path confirmed at hydrate — the pipeline + domain owns stage-models / dispatch wiring.)* ## Impact - `src/go/fab/internal/agent/agent.go` — new `ModelAlias` function (+ its test file). - `src/go/fab/cmd/fab/resolve_agent.go` — new `--alias` bool flag; apply mapping pre-format (+ test). -- `src/kit/skills/_preamble.md` — Harness-adapter boundary paragraph (canonical instruction). -- `src/kit/skills/fab-ff.md`, `fab-fff.md`, `fab-continue.md` — per-stage-model dispatch notes. +- `src/kit/skills/_preamble.md` — Harness-adapter boundary paragraph (repoint hand-map → `--alias`). +- `src/kit/skills/fab-ff.md`, `fab-fff.md`, `fab-continue.md` — per-stage-model dispatch notes + (repoint each Agent-tool-dispatch resolve call to `--alias`). - `src/kit/skills/_cli-fab.md` — resolve-agent signature gains `--alias`. -- `docs/specs/stage-models.md` — § Harness-adapter boundary (no drift-guarded table touched). +- `docs/specs/stage-models.md` — adapter prose (repoint hand-map → `--alias`); no drift-guarded + table touched. - **No change**: `defaultTiers` / `stageTiers` maps, the operator launcher path, `agent.tiers` - config schema, the `model=`/`effort=` default output contract. + config schema, the `model=`/`effort=` default output contract, the effort-prompt seam from #413. ## Open Questions -- Unmapped / non-Claude model ID under `--alias`: pass through verbatim, or error? (Leaning - verbatim pass-through to preserve provider-neutrality — `--alias` is a best-effort Claude-Code - adapter, not a validator. Recorded as a Confident assumption below.) +- (Resolved as a Confident assumption.) Unmapped / non-Claude model ID under `--alias`: pass + through verbatim, not error — `--alias` is a best-effort Claude-Code adapter, not a validator. ## Assumptions | # | Grade | Decision | Rationale | Scores | |---|-------|----------|-----------|--------| -| 1 | Certain | Fix via a Go-side `--alias` flag on `fab resolve-agent`, not prompt-side hand-mapping | User chose this explicitly in discussion; the live failure was an agent fumbling the hand-map, so determinism in Go is the durable fix | S:95 R:80 A:90 D:95 | -| 2 | Certain | Do NOT switch tier defaults to aliases; full IDs stay canonical in `defaultTiers` + drift-guarded spec tables | User raised this alternative; analyzed and rejected — breaks provider-neutrality, weakens the Fable version-pin, and forces a coordinated multi-file edit pushing a harness quirk into the provider-neutral core | S:90 R:75 A:90 D:90 | -| 3 | Certain | Default behavior (no `--alias`) is byte-identical to today (full model ID); CLI/operator path unchanged | The `claude` CLI `--model` flag accepts full IDs (`claude --help` confirms); only the Agent-tool enum rejects them. Two surfaces must diverge | S:95 R:85 A:95 D:95 | -| 4 | Confident | Mapping is prefix-based (`claude-haiku-` → `haiku`) so dated variants (`claude-haiku-4-5-20251001`) resolve | The Agent enum is family-level (opus/sonnet/haiku/fable); full IDs carry version/date suffixes. Prefix match is the robust mapping | S:75 R:80 A:85 D:80 | -| 5 | Confident | Unmapped / non-Claude model under `--alias` passes through verbatim (not an error) | Preserves provider-neutrality — `--alias` is a Claude-Code adapter, not a validator; a non-Claude override still flows. Open question, but the leaning is clear and low-risk (reversible) | S:65 R:80 A:75 D:70 | -| 6 | Confident | `ModelAlias` lives in `internal/agent` (alongside the tier tables + `Resolve`) | That package already owns the model vocabulary and is the drift-guard's subject; cohesive home | S:75 R:85 A:90 D:80 | -| 7 | Confident | Skills keep the defer-to-preamble pattern; canonical `--alias` instruction lives in `_preamble.md` Harness-adapter boundary, echoed by ff/fff/continue | Matches the existing single-source convention (the three skills already defer to the preamble for per-stage model resolution) | S:80 R:85 A:90 D:85 | - -7 assumptions (3 certain, 4 confident, 0 tentative, 0 unresolved). +| 1 | Certain | Fix via a Go-side `--alias` flag on `fab resolve-agent`, replacing #413's prompt-side hand-mapping instruction | User chose this explicitly; the live failure was an agent fumbling the hand-map, and #413 currently ships exactly that hand-map as prose — determinism in Go removes the failure mode | S:95 R:80 A:92 D:95 | +| 2 | Certain | Do NOT switch tier defaults to aliases; full IDs stay canonical in `defaultTiers` + drift-guarded spec tables | User raised this alternative; rejected — breaks provider-neutrality, weakens the Fable version-pin, forces a coordinated multi-file edit pushing a harness quirk into the provider-neutral core | S:90 R:75 A:90 D:90 | +| 3 | Certain | Default behavior (no `--alias`) byte-identical to today (full model ID); CLI/operator path and the #413 effort-prompt seam unchanged | The `claude` CLI `--model` flag accepts full IDs; only the Agent-tool enum rejects them. `--alias` touches only the `model=` line | S:95 R:85 A:95 D:95 | +| 4 | Certain | This change EDITS the post-#413 adapter prose (repoints hand-map → `--alias`); it does not add new adapter documentation | #413 already wrote the id→alias prose into _preamble/ff/fff/continue/stage-models as a hand-map instruction; verified post-rebase. Editing-not-adding is the accurate scope | S:90 R:85 A:90 D:90 | +| 5 | Confident | Mapping is prefix-based (`claude-haiku-` → `haiku`) so dated variants (`claude-haiku-4-5-20251001`) resolve | The Agent enum is family-level; full IDs carry version/date suffixes. Prefix match is the robust mapping | S:75 R:80 A:85 D:80 | +| 6 | Confident | Unmapped / non-Claude model under `--alias` passes through verbatim (not an error) | Preserves provider-neutrality — `--alias` is a Claude-Code adapter, not a validator; a non-Claude override still flows. Low-risk and reversible | S:65 R:80 A:75 D:70 | +| 7 | Confident | `ModelAlias` lives in `internal/agent` (alongside the tier tables + `Resolve`) | That package already owns the model vocabulary and is the drift-guard's subject; cohesive home | S:75 R:85 A:90 D:80 | + +7 assumptions (4 certain, 3 confident, 0 tentative, 0 unresolved). diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/plan.md b/fab/changes/260613-yky7-resolve-agent-alias-flag/plan.md new file mode 100644 index 00000000..aee94ae5 --- /dev/null +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/plan.md @@ -0,0 +1,229 @@ +# Plan: resolve-agent --alias flag (Claude-Code model alias adapter) + +**Change**: 260613-yky7-resolve-agent-alias-flag +**Intake**: `intake.md` + +## Requirements + + + +### Alias mapping: `internal/agent` + +#### R1: ModelAlias prefix-based family mapping +The `agent` package MUST expose an exported `ModelAlias(model string) string` that maps a full +Claude model ID to its Claude-Code short alias (`opus` / `sonnet` / `haiku` / `fable`) using a +**prefix** match (`claude-opus-` → `opus`, `claude-sonnet-` → `sonnet`, `claude-haiku-` → `haiku`, +`claude-fable-` → `fable`), so dated/versioned variants resolve. + +- **GIVEN** a full ID `claude-opus-4-8` +- **WHEN** `ModelAlias` is called +- **THEN** it returns `opus` +- **AND** `claude-sonnet-4-6` → `sonnet`, `claude-haiku-4-5` → `haiku`, `claude-fable-1` → `fable` +- **AND** the dated variant `claude-haiku-4-5-20251001` → `haiku` (prefix match absorbs the date suffix) + +#### R2: ModelAlias verbatim pass-through for empty and unmapped inputs +`ModelAlias` MUST return its input VERBATIM when no mapping applies — an empty string stays empty +(preserving the "inherit the session model" signal), and an unrecognized / non-Claude ID is returned +unchanged (so `--alias` stays a best-effort Claude-Code adapter, NOT a validator that rejects other +providers' models). + +- **GIVEN** an empty model string `""` +- **WHEN** `ModelAlias("")` is called +- **THEN** it returns `""` (empty in, empty out) +- **GIVEN** a non-Claude ID `gpt-5` +- **WHEN** `ModelAlias("gpt-5")` is called +- **THEN** it returns `gpt-5` verbatim (no mapping, pass-through) + +### CLI: `fab resolve-agent --alias` + +#### R3: --alias flag emits the short alias on the model= line +`fab resolve-agent ` MUST accept a boolean `--alias` flag. When set, the resolved +`profile.Model` SHALL be passed through `ModelAlias` before formatting, so the `model=` line emits the +short alias. The `effort=` line MUST be unaffected by `--alias`. + +- **GIVEN** the default config (no `agent.tiers` override) +- **WHEN** `fab resolve-agent apply --alias` runs +- **THEN** stdout is `model=opus\neffort=high\n` +- **AND** the `effort=` line is identical to the non-`--alias` output + +#### R4: default (no --alias) is byte-identical to today +With `--alias` absent, `fab resolve-agent ` MUST emit exactly today's output (the full model +ID). The CLI/operator path and the byte-stable two-line contract are unchanged. + +- **GIVEN** the default config +- **WHEN** `fab resolve-agent apply` runs (no flag) +- **THEN** stdout is `model=claude-opus-4-8\neffort=high\n` (regression guard — full ID preserved) + +#### R5: empty-model tier under --alias still emits an empty model= line +When the resolved tier has an empty model (the "inherit" signal), `--alias` MUST leave the `model=` +line empty (since `ModelAlias("")` → `""`). + +- **GIVEN** a tier whose resolved model is empty +- **WHEN** `fab resolve-agent --alias` runs +- **THEN** the `model=` line is empty (`model=\n`) — the inherit signal is preserved under `--alias` + +### Documentation & skill wiring + +#### R6: CLI reference documents --alias +`src/kit/skills/_cli-fab.md` § fab resolve-agent MUST document the `--alias` flag: that it emits the +short alias on the `model=` line, that it is the Claude-Code Agent-tool adapter, that the default +(absent) behavior is unchanged (full ID), that the `effort=` line is unaffected, and that +empty/non-Claude models pass through verbatim. + +- **GIVEN** the CLI reference for `fab resolve-agent` +- **WHEN** a reader looks up the command signature +- **THEN** the `--alias` flag and its semantics are documented + +#### R7: dispatch prose repointed from hand-map to --alias +The post-#413 adapter prose that currently instructs the orchestrator to "map the resolved id to the +alias at the dispatch seam" (the hand-map instruction) MUST be repointed to resolve the model half with +`fab resolve-agent --alias` (which emits an Agent-tool-valid alias directly). This applies to +the canonical `_preamble.md` § Harness-adapter boundary paragraph and every sibling site that carries +the same hand-map / Agent-tool-model-half dispatch instruction: `fab-ff.md`, `fab-fff.md`, +`fab-continue.md`, the shared bracket `_pipeline.md`, and the spec `docs/specs/stage-models.md`. No +sibling SHALL be left on the stale "maps id → alias by hand" / "short alias" phrasing. + +- **GIVEN** the `_preamble.md` Harness-adapter boundary paragraph +- **WHEN** it is read after this change +- **THEN** it instructs resolving the model half via `fab resolve-agent --alias`, not a manual id→alias map +- **AND** a tree-wide grep for the old "short alias … orchestrator maps … id … alias" hand-map phrasing returns no skill/spec prose still on the manual instruction + +#### R8: operator launcher path keeps resolving WITHOUT --alias +The operator launcher (`fab operator` / `_cli-fab.md` operator section) appends `--model ` to a +`claude` CLI invocation, which accepts full IDs. It MUST continue to resolve WITHOUT `--alias` — the +CLI and Agent-tool paths deliberately diverge. + +- **GIVEN** the operator launcher resolving the doing-tier model +- **WHEN** this change ships +- **THEN** it still runs `fab resolve-agent apply` (no `--alias`) and appends the full model ID + +#### R9: SPEC mirrors updated where they reference the hand-map +Per the Constitution (skill behavior changes update the corresponding `SPEC-*.md`), the SPEC mirrors +that reference the id→alias hand-mapping (`docs/specs/skills/SPEC-_preamble.md`) MUST be updated to the +`--alias` mechanism. SPEC mirrors that only mention the model-param seam generically (no hand-map +phrasing) need no edit. + +- **GIVEN** `SPEC-_preamble.md` describing the dispatch seam with the "short alias … maps id→alias" phrasing +- **WHEN** read after this change +- **THEN** it describes the `--alias` resolver flag as the deterministic Agent-tool model-half mechanism + +### Non-Goals + +- Switching tier defaults to aliases — explicitly rejected (full IDs stay canonical in `defaultTiers` + and the two drift-guarded spec tables; provider-neutrality + Fable version-pin preserved). +- Touching the two drift-guarded tables in `stage-models.md` (default tier profiles, stage→tier mapping) + or the `defaultTiers`/`stageTiers` Go maps — `TestDocTablesMatchAgentMaps` must stay unaffected. +- Adding an effort param/flag — `--alias` touches only the `model=` line; the #413 effort-prompt seam + is left exactly as shipped. +- Validating models — `--alias` is a best-effort adapter, not a Claude-only validator. + +### Design Decisions + +1. **Apply `ModelAlias` to `profile.Model` before formatting (in the RunE), not a format variant**: + the cleanest seam — `formatAgentProfile` stays a pure formatter with the unchanged byte contract; + the `--alias` transform is a one-line pre-format mutation in `resolveAgentCmd`. — *Why*: keeps the + omit-when-empty / inherit branches of the formatter untouched and independently testable. — + *Rejected*: threading a bool into `formatAgentProfile` (couples the formatter to a flag it doesn't + need; the empty-model branch already does the right thing because `ModelAlias("")` → `""`). +2. **Prefix match over exact map**: absorbs dated variants (`claude-haiku-4-5-20251001` → `haiku`) + without enumerating every version. — *Why*: the Agent enum is family-level; full IDs carry + version/date suffixes. — *Rejected*: exact-string map (brittle; breaks on the next dated release). +3. **Repoint `_pipeline.md` too** (the shared bracket), though the intake's explicit list names only + `_preamble/ff/fff/continue/stage-models`: the thin wrappers `fab-ff.md`/`fab-fff.md` delegate their + Steps 1–3 dispatch to the `_pipeline.md` bracket, whose per-stage-model note carries the same + Agent-tool-model-half dispatch instruction. Leaving the canonical bracket un-repointed while the + wrappers say `--alias` would be an inconsistent stale sibling. — *Why*: the intake's own directive is + "sweep ALL siblings; do not leave a stale sibling." — *Rejected*: editing only the literal list (would + contradict the sweep directive). + +## Tasks + +### Phase 1: Core Implementation (Go) + +- [x] T001 Add exported `ModelAlias(model string) string` to `src/go/fab/internal/agent/agent.go` — prefix-based family map (`claude-opus-`→`opus`, `claude-sonnet-`→`sonnet`, `claude-haiku-`→`haiku`, `claude-fable-`→`fable`), empty→empty, unmapped/non-Claude→verbatim; placed alongside the tier tables / `Resolve` +- [x] T002 Add a `--alias` bool flag to `resolveAgentCmd` in `src/go/fab/cmd/fab/resolve_agent.go`; when set, apply `agent.ModelAlias` to `profile.Model` before `formatAgentProfile`. Default path unchanged (full ID). Update the command doc comment to note the flag. + +### Phase 2: Go Test Coverage (test-alongside) + +- [x] T003 [P] Unit-test `ModelAlias` in `src/go/fab/internal/agent/agent_test.go`: all four families → alias; dated `claude-haiku-4-5-20251001` → `haiku`; empty → empty; `gpt-5` → verbatim +- [x] T004 [P] Command-level test in `src/go/fab/cmd/fab/resolve_agent_test.go`: `apply --alias` → `model=opus\neffort=high\n`; `apply` (no flag) → `model=claude-opus-4-8\neffort=high\n` (regression guard); empty-model under `--alias` → empty `model=` line (asserted at alias+formatter level — no config resolves to an empty model) + +### Phase 3: Documentation & Skill-Prose Repoint + +- [x] T005 [P] `src/kit/skills/_cli-fab.md` § fab resolve-agent (~line 217): document the `--alias` flag (short alias on `model=`; Claude-Code Agent-tool adapter; default unchanged = full ID; `effort=` line unaffected; empty/non-Claude pass through verbatim) +- [x] T006 [P] `src/kit/skills/_preamble.md` § Harness-adapter boundary (~line 353) + the two-seam model bullet (~line 348): repoint the hand-map sentence ("the orchestrator maps the resolved id to the alias at the dispatch seam") to resolving the model half with `fab resolve-agent --alias` +- [x] T007 [P] `src/kit/skills/fab-ff.md` (~line 37) and `src/kit/skills/fab-fff.md` (~lines 37, 47, 57): repoint each Agent-tool-dispatch resolve call to `fab resolve-agent --alias` for the model half +- [x] T008 [P] `src/kit/skills/fab-continue.md` (~lines 19, 52, 161 + dispatch-table rows 56/58/59/61): repoint the one-stage-sequencer note, the sub-agent dispatch contract, the nested-reviewers note, and the table resolve calls to `--alias` +- [x] T009 [P] `src/kit/skills/_pipeline.md` (per-stage-model note + apply/review/hydrate/rework resolve calls): repoint each Agent-tool-dispatch resolve call to `--alias` for the model half (the shared bracket — keep it consistent with the wrappers) +- [x] T010 [P] `docs/specs/stage-models.md` § Skill wiring + § Harness-adapter boundary: repoint the "orchestrator maps id → alias at the seam" prose to describe `--alias` as the deterministic Agent-tool model-half mechanism. Two drift-guarded tables NOT touched (confirmed via diff). +- [x] T011 [P] `docs/specs/skills/SPEC-_preamble.md` (~line 5 description + ~lines 96–97 ASCII box): repoint the "short alias … orchestrator maps id→alias" phrasing to the `--alias` resolver flag + +### Phase 4: Verification + +- [x] T012 Build + full `go test ./...` under `src/go/fab/` (all packages pass); `ModelAlias` tests, `resolve-agent --alias` command tests, and the drift-guard `TestDocTablesMatchAgentMaps` all pass; locally-built binary exercised (`apply --alias`→`model=opus`, no-flag→`model=claude-opus-4-8`). Tree-wide grep confirms no skill/spec prose left on the stale hand-map instruction; operator launcher path confirmed still resolving WITHOUT `--alias`. + +## Execution Order + +- T001 blocks T002, T003, T004 (the function must exist first) +- T003, T004 depend on T001/T002 but are independent of each other and of the doc tasks +- T005–T011 are independent docs/prose edits (parallelizable) +- T012 runs last (verification gate) + +## Acceptance + +### Functional Completeness + +- [x] A-001 R1: `ModelAlias` maps all four Claude families by prefix; a dated variant resolves to its family alias +- [x] A-002 R2: `ModelAlias` returns empty for empty input and passes an unmapped/non-Claude ID through verbatim +- [x] A-003 R3: `fab resolve-agent apply --alias` emits `model=opus` with the `effort=` line unaffected +- [x] A-004 R6: `_cli-fab.md` § fab resolve-agent documents the `--alias` flag and its semantics +- [x] A-005 R7: the `_preamble.md` Harness-adapter boundary paragraph and all sibling dispatch sites (fab-ff/fff/continue, _pipeline, stage-models) resolve the model half via `--alias` instead of a hand-map +- [x] A-006 R9: `SPEC-_preamble.md` describes `--alias` as the Agent-tool model-half mechanism + +### Behavioral Correctness + +- [x] A-007 R4: default (no `--alias`) output is byte-identical to today (`model=claude-opus-4-8\neffort=high\n`) — regression guard test present and passing +- [x] A-008 R8: the operator launcher path still resolves WITHOUT `--alias` (full ID appended to the `claude` CLI) + +### Edge Cases & Error Handling + +- [x] A-009 R5: an empty-model tier under `--alias` still emits an empty `model=` line (inherit signal preserved) +- [x] A-010 R2: a non-Claude override (`gpt-5`) under `--alias` flows through unchanged (adapter, not validator) + +### Scenario Coverage + +- [x] A-011 R3 R4: command-level tests cover both `--alias` (alias) and no-flag (full ID) on the same stage + +### Code Quality + +- [x] A-012 Pattern consistency: `ModelAlias` follows the package's existing exported-helper style; the flag wiring follows the existing cobra command pattern in `resolve_agent.go` +- [x] A-013 No unnecessary duplication: reuses `formatAgentProfile` (no parallel formatter); `ModelAlias` is the single mapping site + +### Documentation Accuracy (checklist.extra_categories) + +- [x] A-014 Doc prose accurately describes `--alias` (no claim the default changed; effort line explicitly noted as unaffected) + +### Cross References (checklist.extra_categories) + +- [x] A-015 The drift-guarded tables in `stage-models.md` and the `defaultTiers`/`stageTiers` Go maps are untouched; `TestDocTablesMatchAgentMaps` still passes + +## Notes + +- Check items as you review: `- [x]` +- The PATH `fab` is the installed 2.3.1 release; the new flag must be exercised via the locally-built + binary (`go run ./cmd/fab resolve-agent apply --alias` from `src/go/fab/`), not the PATH `fab`. + +## Assumptions + +| # | Grade | Decision | Rationale | Scores | +|---|-------|----------|-----------|--------| +| 1 | Certain | Fix via a Go-side `--alias` flag on `fab resolve-agent`, replacing #413's prompt-side hand-mapping (carried from intake) | User chose this explicitly; encoding the map in Go removes the live failure mode | S:95 R:80 A:92 D:95 | +| 2 | Certain | Default (no `--alias`) byte-identical to today; CLI/operator path + #413 effort-prompt seam unchanged (carried from intake) | The `claude` CLI accepts full IDs; only the Agent-tool enum rejects them; `--alias` touches only `model=` | S:95 R:85 A:95 D:95 | +| 3 | Certain | Do NOT switch tier defaults to aliases; full IDs stay canonical in `defaultTiers` + drift-guarded tables (carried from intake) | Preserves provider-neutrality + Fable version-pin; avoids a coordinated multi-file edit; keeps the drift-guard unaffected | S:90 R:75 A:90 D:90 | +| 4 | Confident | Mapping is prefix-based so dated variants resolve (carried from intake) | Agent enum is family-level; full IDs carry date/version suffixes; prefix match is the robust mapping | S:75 R:80 A:85 D:80 | +| 5 | Confident | Unmapped/non-Claude under `--alias` passes through verbatim, not an error (carried from intake) | `--alias` is a Claude-Code adapter, not a validator; a non-Claude override still flows; low-risk, reversible | S:65 R:80 A:75 D:70 | +| 6 | Confident | Apply `ModelAlias` to `profile.Model` in the RunE before `formatAgentProfile` (not a format variant) | Keeps the formatter a pure, byte-stable function with its omit-when-empty branches intact; one-line pre-format transform is the cleanest seam | S:80 R:85 A:85 D:80 | +| 7 | Confident | Repoint `_pipeline.md` (the shared bracket) too, beyond the intake's literal file list | fab-ff/fff delegate Steps 1–3 dispatch to the `_pipeline.md` bracket, which carries the same model-half dispatch instruction; the intake directs "sweep ALL siblings — do not leave a stale sibling," so consistency requires it | S:70 R:85 A:80 D:75 | + +7 assumptions (3 certain, 4 confident, 0 tentative). diff --git a/src/go/fab/cmd/fab/resolve_agent.go b/src/go/fab/cmd/fab/resolve_agent.go index 29229a5a..fc6c7db4 100644 --- a/src/go/fab/cmd/fab/resolve_agent.go +++ b/src/go/fab/cmd/fab/resolve_agent.go @@ -25,8 +25,15 @@ import ( // model emits an empty `model=` line, signaling "inherit the session/orchestrator // model". Non-zero exit only on a real error: malformed/unreadable config, or an // unknown stage name. A stage resolving to a default is success. +// +// The optional `--alias` flag is the Claude-Code Agent-tool adapter: when set, +// the resolved model is mapped to its short alias (opus/sonnet/haiku/fable) on the +// `model=` line via agent.ModelAlias, since the Agent tool's `model` enum rejects +// full IDs. Default (absent) is byte-identical to today (full ID). The `effort=` +// line is unaffected by `--alias`; empty/non-Claude models pass through verbatim. func resolveAgentCmd() *cobra.Command { - return &cobra.Command{ + var alias bool + cmd := &cobra.Command{ Use: "resolve-agent ", Short: "Resolve a pipeline stage to its {model, effort} agent profile", Args: cobra.ExactArgs(1), @@ -46,10 +53,16 @@ func resolveAgentCmd() *cobra.Command { return err } + if alias { + profile.Model = agent.ModelAlias(profile.Model) + } + fmt.Fprint(cmd.OutOrStdout(), formatAgentProfile(profile)) return nil }, } + cmd.Flags().BoolVar(&alias, "alias", false, "emit the Claude-Code short model alias (opus/sonnet/haiku/fable) on the model= line instead of the full ID (Agent-tool adapter)") + return cmd } // formatAgentProfile renders a resolved profile as the byte-stable stdout diff --git a/src/go/fab/cmd/fab/resolve_agent_test.go b/src/go/fab/cmd/fab/resolve_agent_test.go index 407dbaf9..437bf44d 100644 --- a/src/go/fab/cmd/fab/resolve_agent_test.go +++ b/src/go/fab/cmd/fab/resolve_agent_test.go @@ -143,6 +143,51 @@ func TestResolveAgentPrintsEmptyEffortOmitted(t *testing.T) { } } +// TestResolveAgentAliasEmitsShortAlias: with --alias, a doing stage emits the +// short alias on the model= line while the effort= line is unaffected. +func TestResolveAgentAliasEmitsShortAlias(t *testing.T) { + resolveAgentTestRepo(t, "project:\n name: test\n") + + out, err := runResolveAgentCmd(t, "apply", "--alias") + if err != nil { + t.Fatalf("resolve-agent apply --alias: %v", err) + } + if out != "model=opus\neffort=high\n" { + t.Errorf("output = %q, want %q", out, "model=opus\neffort=high\n") + } +} + +// TestResolveAgentNoAliasEmitsFullID: without --alias the default output is the +// full model ID, byte-identical to today (regression guard against the alias +// transform leaking into the default path). +func TestResolveAgentNoAliasEmitsFullID(t *testing.T) { + resolveAgentTestRepo(t, "project:\n name: test\n") + + out, err := runResolveAgentCmd(t, "apply") + if err != nil { + t.Fatalf("resolve-agent apply: %v", err) + } + if out != "model=claude-opus-4-8\neffort=high\n" { + t.Errorf("output = %q, want %q", out, "model=claude-opus-4-8\neffort=high\n") + } +} + +// TestResolveAgentAliasEmptyModelInheritSignal: under --alias, an empty resolved +// model still yields an empty model= line (the inherit signal is preserved — +// ModelAlias("") is ""). Asserted at the alias+formatter level, because today's +// configs never RESOLVE to an empty model (an empty override is a no-op merge +// that keeps the default — see agent.TestResolveEmptyModelInherit), the same +// reason TestResolveAgentPrintsEmptyEffortOmitted tests the empty effort branch +// at the formatter level. +func TestResolveAgentAliasEmptyModelInheritSignal(t *testing.T) { + if got := agent.ModelAlias(""); got != "" { + t.Fatalf("ModelAlias(\"\") = %q, want empty (inherit signal preserved under --alias)", got) + } + if got := formatAgentProfile(agent.Profile{Model: agent.ModelAlias(""), Effort: "high"}); got != "model=\neffort=high\n" { + t.Errorf("empty model under --alias = %q, want %q", got, "model=\neffort=high\n") + } +} + // TestResolveAgentByteStable: repeated resolution of the same stage on the same // config is byte-identical. func TestResolveAgentByteStable(t *testing.T) { diff --git a/src/go/fab/internal/agent/agent.go b/src/go/fab/internal/agent/agent.go index a90f09bf..bfac7e9b 100644 --- a/src/go/fab/internal/agent/agent.go +++ b/src/go/fab/internal/agent/agent.go @@ -98,6 +98,33 @@ func StageNames() []string { return names } +// modelAliasPrefixes maps a Claude full-ID family prefix to the Claude-Code +// short alias the Agent tool's `model` enum accepts (opus/sonnet/haiku/fable). +// Prefix-matched so dated/versioned variants (claude-haiku-4-5-20251001) resolve +// to their family alias. +var modelAliasPrefixes = []struct{ prefix, alias string }{ + {"claude-opus-", "opus"}, + {"claude-sonnet-", "sonnet"}, + {"claude-haiku-", "haiku"}, + {"claude-fable-", "fable"}, +} + +// ModelAlias maps a full Claude model ID to its Claude-Code short alias (the +// Agent tool's `model` enum: opus/sonnet/haiku/fable). Returns the input VERBATIM +// when no mapping applies — an empty string (preserving the "inherit the session +// model" signal) or an unrecognized/non-Claude ID. This keeps the alias adapter +// from becoming a Claude-only validator (provider neutrality): a tier overridden +// to another provider's model still gets its string through unchanged. Matched by +// family prefix so claude-haiku-4-5-20251001 → haiku. +func ModelAlias(model string) string { + for _, m := range modelAliasPrefixes { + if strings.HasPrefix(model, m.prefix) { + return m.alias + } + } + return model +} + // Resolve maps a stage → its fixed tier → a concrete {model, effort} profile. // // The tier profile is the project's agent.tiers. override PER-FIELD merged diff --git a/src/go/fab/internal/agent/agent_test.go b/src/go/fab/internal/agent/agent_test.go index 6458e540..457ac9e4 100644 --- a/src/go/fab/internal/agent/agent_test.go +++ b/src/go/fab/internal/agent/agent_test.go @@ -148,6 +148,28 @@ func TestResolveUnknownStage(t *testing.T) { } } +// TestModelAlias: full Claude IDs (incl. dated variants) map to their family +// alias by prefix; empty and unmapped/non-Claude inputs pass through verbatim. +func TestModelAlias(t *testing.T) { + cases := map[string]string{ + "claude-opus-4-8": "opus", + "claude-sonnet-4-6": "sonnet", + "claude-haiku-4-5": "haiku", + "claude-fable-1": "fable", + "claude-haiku-4-5-20251001": "haiku", // dated variant resolves by prefix + "": "", // empty in, empty out (inherit signal) + "gpt-5": "gpt-5", // non-Claude passes through verbatim + "some-unrecognized-model-id": "some-unrecognized-model-id", + } + for in, want := range cases { + t.Run(in, func(t *testing.T) { + if got := ModelAlias(in); got != want { + t.Errorf("ModelAlias(%q) = %q, want %q", in, got, want) + } + }) + } +} + // TestTablesExhaustive: every stage's tier has a default profile, and the stage // set is exactly the six pipeline stages. func TestTablesExhaustive(t *testing.T) { diff --git a/src/kit/skills/_cli-fab.md b/src/kit/skills/_cli-fab.md index 6b2c7623..734b593b 100644 --- a/src/kit/skills/_cli-fab.md +++ b/src/kit/skills/_cli-fab.md @@ -219,7 +219,7 @@ The five output-mode flags are **mutually exclusive** — passing two (e.g. `--s Pure query (no side effects) — resolves a pipeline stage to its `{model, effort}` agent profile for sub-agent dispatch. Consumed by the orchestrators (`/fab-ff`, `/fab-fff`, `/fab-proceed`) and `/fab-continue`'s sub-agent dispatch, which call it immediately before dispatching each stage's sub-agent. ``` -fab resolve-agent +fab resolve-agent [--alias] ``` `` is one of the six pipeline stages: `intake`, `apply`, `review`, `hydrate`, `ship`, `review-pr`. @@ -236,6 +236,18 @@ effort= - The `effort=` line is **omitted** when the resolved tier has no effort (empty/absent). - An **empty model** emits an empty `model=` line — signals "inherit the session/orchestrator model" (today's foreground/no-override behavior). Callers omit the dispatch `model` param in that case. +**`--alias` (Claude-Code Agent-tool adapter)**: when set, the `model=` line emits the Claude-Code **short alias** (`opus` / `sonnet` / `haiku` / `fable`) instead of the full versioned ID. This exists because the Claude Code **Agent tool's `model` parameter is a hard enum** that rejects full IDs — sub-agent dispatch must pass an alias. The mapping is prefix-based (`claude-opus-` → `opus`, etc.), so dated variants like `claude-haiku-4-5-20251001` resolve to `haiku`. The **default (flag absent) is unchanged** — the full ID, byte-identical to today (the `claude` CLI `--model` flag, used by the operator launcher, accepts full IDs and keeps resolving WITHOUT `--alias`). The **`effort=` line is unaffected** by `--alias`. **Empty / non-Claude models pass through verbatim** (an empty `model=` line stays empty — the inherit signal; an unrecognized/non-Claude ID like `gpt-5` is emitted unchanged) — `--alias` is a best-effort adapter, not a Claude-only validator. + +``` +$ fab resolve-agent apply +model=claude-opus-4-8 +effort=high + +$ fab resolve-agent apply --alias +model=opus +effort=high +``` + **No validation — verbatim pass-through**: `fab resolve-agent` does NOT validate the model or effort against any provider's accepted set (provider neutrality, Constitution I). It echoes both strings as-is — `xhigh`, `reasoning_effort:high`, an empty effort, whatever. A misconfigured pair (e.g. Sonnet + `xhigh`) is NOT corrected by fab; it surfaces as a dispatch-time error in the harness. There is no effort-enum enforcement and no degrade-gracefully drop. **Exit code**: non-zero only on a real error — an unreadable/malformed config, or an unknown stage name. A stage resolving to a default is success (exit 0). diff --git a/src/kit/skills/_pipeline.md b/src/kit/skills/_pipeline.md index 4c4a0349..21a71823 100644 --- a/src/kit/skills/_pipeline.md +++ b/src/kit/skills/_pipeline.md @@ -47,7 +47,7 @@ Load per `_preamble.md` Sections 1-3 (config, constitution, intake, memory index > > **Dispatch**: All sub-skill invocations use the Agent tool (`general-purpose` subagent) per `_preamble.md` § Subagent Dispatch. Each subagent reads the target skill file, follows the specified behavior, and returns a structured result to the pipeline. Every `/fab-continue`-behavior subagent prompt MUST include: **"do NOT run `fab status` commands; return results only"** — the orchestrator runs those stages' transitions (finish/fail/reset) itself. This is the **universal block contract**, not an override this orchestrator imposes: post-intake `/fab-continue` blocks (Apply / Review / Hydrate Behavior) never own their transitions for **any** caller — plain `/fab-continue` is itself a one-stage sequencer that dispatches the block identically and runs the transition after it returns (see `fab-continue.md` Normal Flow Step 1). The orchestrator here is therefore a **pure sequencer**: dispatch block → read returned status/findings → decide proceed / loop / stop; it never reaches into block internals. > -> **Per-stage model resolution** (see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution for the canonical contract): immediately **before** dispatching each stage's sub-agent, run `fab resolve-agent ` and **surface** the resolved `model=/effort=` lines (carry them into the dispatch prompt and/or echo them in this orchestrator's step output, so a skipped or mis-resolved tier is visible rather than silent), then dispatch through the two seams: the **model** half via the Agent tool's `model` parameter (empty model ⇒ omit/inherit), and the **effort** half via an explicit imperative instruction in the dispatch prompt — ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit the instruction; the Agent tool has no effort param). The Claude Code adapter is the Agent tool's `model` parameter; the resolution itself is provider-neutral. The `review` stage (Step 2) resolves **once** and applies the same model AND the same effort-prompt instruction to both reviewer sub-agents AND the merge. +> **Per-stage model resolution** (see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution for the canonical contract): immediately **before** dispatching each stage's sub-agent, run `fab resolve-agent --alias` and **surface** the resolved `model=/effort=` lines (carry them into the dispatch prompt and/or echo them in this orchestrator's step output, so a skipped or mis-resolved tier is visible rather than silent), then dispatch through the two seams: the **model** half via the Agent tool's `model` parameter — the `--alias` flag emits the Agent-tool-valid short alias directly on the `model=` line (empty model ⇒ omit/inherit) — and the **effort** half via an explicit imperative instruction in the dispatch prompt — ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit the instruction; the Agent tool has no effort param). The Claude Code adapter is the Agent tool's `model` parameter; the resolution itself is provider-neutral. The `review` stage (Step 2) resolves **once** and applies the same model AND the same effort-prompt instruction to both reviewer sub-agents AND the merge. ### Resumability @@ -57,7 +57,7 @@ Check `progress` from preflight. Skip stages already `done`. If `{terminal}: don *(Skip if `progress.apply` is `done`.)* Since the intake gate already passed in pre-flight, if `progress.intake` is not `done`, finish intake: `fab status finish intake {driver}` (auto-activates apply). -Resolve the apply model: run `fab resolve-agent apply`, surface the resolved `model=/effort=` (echo them and/or carry them into the dispatch prompt — a skipped resolution is then visible), and apply both halves to the dispatch below — model via the Agent `model` param (empty ⇒ omit/inherit), effort via an imperative prompt instruction ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit). Dispatch `/fab-continue` as subagent — Apply Behavior, change: `{id}` (prompt includes: do NOT run `fab status`; return results only). The subagent runs both apply sub-steps in a single invocation: (1) Plan Generation — co-generate `plan.md` (`## Requirements` + `## Tasks` + `## Acceptance`) from `intake.md` per **Plan Generation Procedure** (`_generation.md`), unless `plan.md` already exists; (2) Task Execution — parse unchecked tasks under `## Tasks`, execute in dependency order, run tests, mark `[x]` on completion. Returns completion status or failure with task ID and reason. +Resolve the apply model: run `fab resolve-agent apply --alias` (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line), surface the resolved `model=/effort=` (echo them and/or carry them into the dispatch prompt — a skipped resolution is then visible), and apply both halves to the dispatch below — model via the Agent `model` param (empty ⇒ omit/inherit), effort via an imperative prompt instruction ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit). Dispatch `/fab-continue` as subagent — Apply Behavior, change: `{id}` (prompt includes: do NOT run `fab status`; return results only). The subagent runs both apply sub-steps in a single invocation: (1) Plan Generation — co-generate `plan.md` (`## Requirements` + `## Tasks` + `## Acceptance`) from `intake.md` per **Plan Generation Procedure** (`_generation.md`), unless `plan.md` already exists; (2) Task Execution — parse unchecked tasks under `## Tasks`, execute in dependency order, run tests, mark `[x]` on completion. Returns completion status or failure with task ID and reason. No `/fab-clarify` runs here. Under-specified requirements are resolved inline by the apply agent as graded SRAD assumptions in `plan.md` `## Assumptions` — not via any clarify ceremony. @@ -69,7 +69,7 @@ On success: run `fab status finish apply {driver}`. *(Skip if `progress.review` is `done`.)* -Resolve the review model **once**: run `fab resolve-agent review`, surface the resolved `model=/effort=`, and apply both halves to the review subagent dispatch — the same model AND the same effort-prompt instruction (``Operate at `` reasoning effort for this task.``) govern both reviewer sub-agents (inward + outward) and the merge (empty model ⇒ omit/inherit; empty effort ⇒ omit the instruction). Dispatch `/fab-continue` as subagent — Review Behavior, change: `{id}` (prompt includes: do NOT run `fab status`; return results only — verdict transitions belong to this orchestrator). The subagent reads `_review.md` for review dispatch instructions — both inward and outward sub-agents are defined there. It dispatches both sub-agents in parallel, merges their findings, and returns structured findings (must-fix / should-fix / nice-to-have) with pass/fail status. +Resolve the review model **once**: run `fab resolve-agent review --alias` (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line), surface the resolved `model=/effort=`, and apply both halves to the review subagent dispatch — the same model AND the same effort-prompt instruction (``Operate at `` reasoning effort for this task.``) govern both reviewer sub-agents (inward + outward) and the merge (empty model ⇒ omit/inherit; empty effort ⇒ omit the instruction). Dispatch `/fab-continue` as subagent — Review Behavior, change: `{id}` (prompt includes: do NOT run `fab status`; return results only — verdict transitions belong to this orchestrator). The subagent reads `_review.md` for review dispatch instructions — both inward and outward sub-agents are defined there. It dispatches both sub-agents in parallel, merges their findings, and returns structured findings (must-fix / should-fix / nice-to-have) with pass/fail status. **Pass**: run `fab status finish review {driver}`. Proceed to Step 3 (Hydrate). @@ -85,8 +85,8 @@ The agent triages the sub-agent's prioritized findings and autonomously selects 1. **Status pair**: run `fab status fail review` then `fab status reset apply {driver}`. This fail+reset pair repeats on **every** failed review verdict that starts a new cycle — not just the first failure — so every conforming run leaves the same `.status.yaml` history shape. 2. **Triage + rework action**: triage the prioritized findings, select exactly one path per the decision heuristics below, and apply its edits (uncheck tasks / edit `plan.md` / edit `## Requirements`). -3. **Re-dispatch apply**: re-run `fab resolve-agent apply`, surface the resolved `model=/effort=`, and apply both halves (model via the Agent `model` param; effort via the imperative prompt instruction ``Operate at `` reasoning effort for this task.``, omitted when empty), then dispatch `/fab-continue` as a subagent — Apply Behavior, same prompt contract as Step 1 (do NOT run `fab status`; return results only). On success, run `fab status finish apply {driver}` (auto-activates review). -4. **Fresh re-review**: re-run `fab resolve-agent review` (once, governing both reviewers + merge), surface the resolved `model=/effort=`, and apply both halves (same model + same effort-prompt instruction to both reviewers and the merge, omitted when empty), then dispatch a **fresh** `/fab-continue` Review Behavior subagent, same prompt contract as Step 2. Never reuse a prior review subagent's context. +3. **Re-dispatch apply**: re-run `fab resolve-agent apply --alias` (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line), surface the resolved `model=/effort=`, and apply both halves (model via the Agent `model` param; effort via the imperative prompt instruction ``Operate at `` reasoning effort for this task.``, omitted when empty), then dispatch `/fab-continue` as a subagent — Apply Behavior, same prompt contract as Step 1 (do NOT run `fab status`; return results only). On success, run `fab status finish apply {driver}` (auto-activates review). +4. **Fresh re-review**: re-run `fab resolve-agent review --alias` (once, governing both reviewers + merge), surface the resolved `model=/effort=`, and apply both halves (same model + same effort-prompt instruction to both reviewers and the merge, omitted when empty), then dispatch a **fresh** `/fab-continue` Review Behavior subagent, same prompt contract as Step 2. Never reuse a prior review subagent's context. 5. **Verdict**: pass → run `fab status finish review {driver}` and proceed to Step 3. Fail → if fewer than `{max_cycles}` cycles have run, start the next cycle at item 1 (the fail+reset pair fires again); after the `{max_cycles}`-th failed cycle, stop per **Stop** below. **Decision heuristics** (applied at item 2 of each cycle — disjoint: each failure description routes to exactly one path): @@ -114,7 +114,7 @@ Run /fab-continue for manual rework options. *(Skip if `progress.hydrate` is `done`.)* -Resolve the hydrate model: run `fab resolve-agent hydrate`, surface the resolved `model=/effort=`, and apply both halves to the dispatch below — model via the Agent `model` param (empty ⇒ omit/inherit), effort via the imperative prompt instruction ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit). Dispatch `/fab-continue` as subagent — Hydrate Behavior, change: `{id}` (prompt includes: do NOT run `fab status`; return results only). The subagent validates review passed, hydrates into `docs/memory/`, and returns completion status. +Resolve the hydrate model: run `fab resolve-agent hydrate --alias` (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line), surface the resolved `model=/effort=`, and apply both halves to the dispatch below — model via the Agent `model` param (empty ⇒ omit/inherit), effort via the imperative prompt instruction ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit). Dispatch `/fab-continue` as subagent — Hydrate Behavior, change: `{id}` (prompt includes: do NOT run `fab status`; return results only). The subagent validates review passed, hydrates into `docs/memory/`, and returns completion status. On success: run `fab status finish hydrate {driver}`. diff --git a/src/kit/skills/_preamble.md b/src/kit/skills/_preamble.md index cc21a4d9..5f9f7d70 100644 --- a/src/kit/skills/_preamble.md +++ b/src/kit/skills/_preamble.md @@ -345,12 +345,12 @@ and passes the resolved profile into the Agent dispatch: **The two halves dispatch through different seams (model param + prompt instruction).** The resolved profile has a model half and an effort half, and Claude Code consumes them through *different* seams: -- **Model → the Agent tool's `model` parameter.** Empty model ⇒ omit it (inherit the session/orchestrator model). +- **Model → the Agent tool's `model` parameter.** The Agent `model` param is a hard enum of short aliases (`opus`/`sonnet`/`haiku`/`fable`), so resolve the model half with `fab resolve-agent --alias` (emits the alias directly — see the Harness-adapter boundary note below). Empty model ⇒ omit it (inherit the session/orchestrator model). - **Effort → an explicit instruction in the subagent prompt.** The Agent tool has **no `effort` parameter**, so the only available seam for the effort half is the dispatched prompt itself: append an imperative line such as ``Operate at `xhigh` reasoning effort for this task.`` so the sub-agent self-selects its reasoning effort. Empty effort ⇒ omit the instruction entirely (mirroring the empty-model omit rule). This is imperfect — it relies on the sub-agent honoring the instruction — but it is the only per-sub-agent effort seam available today; a per-sub-agent `effort` parameter on the Agent tool would close it cleanly and is tracked as the residual harness ask (`docs/findings/per-stage-model-tier-application.md` § Suggested directions item 4). **Compliance visibility.** Each dispatch site MUST **surface the resolved `model=/effort=` lines** — carry them into the dispatch prompt (the effort line is already there per the seam above; co-locate the model line) and/or emit them in the orchestrator's own step output — so a *skipped* `fab resolve-agent` call (the sub-agent silently inherits the session profile) or a mis-resolved tier is **visible in output rather than silent**. There is no code-level guard fab can install (dispatch is harness-internal), so visibility is the available seam. An all-empty resolution (both `model=` and `effort=` empty) is itself worth surfacing — treat a non-empty resolved line as the expected case and a fully-empty result as a signal to flag rather than dispatch blind. -**Harness-adapter boundary (Claude Code).** The resolution (stage→tier→`{model, effort}`) is **provider-neutral**. Injecting the resolved model into the actual dispatch is **harness-specific**: for Claude Code that adapter is the **Agent tool's `model` parameter**, and the effort half is injected via the subagent-prompt instruction described above. One concrete harness detail: the Agent tool's `model` param takes a **short alias** (`opus` / `sonnet` / `haiku` / `fable`), **not** the full versioned id (`claude-opus-4-8`) that `fab resolve-agent` emits — the orchestrator maps the resolved id to the alias at the dispatch seam. This is named explicitly as the Claude-Code adapter, not as universal truth — and the coupling is not new (fab's entire subagent-dispatch design is already Claude-Code-shaped). +**Harness-adapter boundary (Claude Code).** The resolution (stage→tier→`{model, effort}`) is **provider-neutral**. Injecting the resolved model into the actual dispatch is **harness-specific**: for Claude Code that adapter is the **Agent tool's `model` parameter**, and the effort half is injected via the subagent-prompt instruction described above. One concrete harness detail: the Agent tool's `model` param takes a **short alias** (`opus` / `sonnet` / `haiku` / `fable`), **not** the full versioned id (`claude-opus-4-8`) the plain command emits. So for Agent-tool dispatch, **resolve the model half with `fab resolve-agent --alias`** — the `--alias` flag emits the Agent-tool-valid short alias directly on the `model=` line (empty ⇒ omit/inherit; a non-Claude override passes through verbatim), so no agent ever hand-maps the id. This is named explicitly as the Claude-Code adapter, not as universal truth — and the coupling is not new (fab's entire subagent-dispatch design is already Claude-Code-shaped). *(The operator launcher path is the deliberate exception: it appends `--model ` to a `claude` **CLI** invocation, which accepts full IDs, so it resolves WITHOUT `--alias`.)* **Review resolves once.** The `review` stage spawns two reviewer sub-agents (inward + outward) plus a merge. Resolve `fab resolve-agent review` **once** and apply the same profile to all three — the same model and the same effort-prompt instruction govern both reviewers and the merge; the mechanical merge runs at the reviewer's tier (an accepted stage-granularity tradeoff). diff --git a/src/kit/skills/fab-continue.md b/src/kit/skills/fab-continue.md index 3db8d78e..8c8e6c51 100644 --- a/src/kit/skills/fab-continue.md +++ b/src/kit/skills/fab-continue.md @@ -16,7 +16,7 @@ helpers: [_srad] Advance through the 6-stage Fab pipeline one step at a time. Each invocation handles the current stage's work and transitions to the next. When called with a stage argument, resets to that stage and re-runs from there. -> **Per-stage model (one-stage sequencer)**: post-intake `/fab-continue` is a **one-stage sequencer** — it dispatches its stage as a sub-agent (see Normal Flow Step 1), and per-stage model selection (`fab resolve-agent ` → `agent.tiers`, see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution) is resolved **once, immediately before that dispatch**, surfaced (so a skipped/mis-resolved tier is visible), and applied through both seams — model via the Agent tool's `model` param, effort via an imperative instruction in the sub-agent's prompt (empty effort ⇒ omit). There is no foreground execution path for apply/review/hydrate to leave the tier merely advisory: every post-intake stage runs dispatched, so `fab resolve-agent ` applies uniformly. (Intake is pre-boundary — it runs in the main session and is not tiered by `/fab-continue`.) +> **Per-stage model (one-stage sequencer)**: post-intake `/fab-continue` is a **one-stage sequencer** — it dispatches its stage as a sub-agent (see Normal Flow Step 1), and per-stage model selection (`fab resolve-agent ` → `agent.tiers`, see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution) is resolved **once, immediately before that dispatch**, surfaced (so a skipped/mis-resolved tier is visible), and applied through both seams — model via the Agent tool's `model` param (resolved with `fab resolve-agent --alias` so the alias is Agent-tool-valid), effort via an imperative instruction in the sub-agent's prompt (empty effort ⇒ omit). There is no foreground execution path for apply/review/hydrate to leave the tier merely advisory: every post-intake stage runs dispatched, so `fab resolve-agent ` applies uniformly. (Intake is pre-boundary — it runs in the main session and is not tiered by `/fab-continue`.) --- @@ -49,16 +49,16 @@ Dispatch on preflight's derived `stage` and `display_state`. If progress is `pen - **`active`** (intake) → Generate intake if missing and advance to `ready` (backward compat for interrupted generations) — main session, no dispatch - **`active`/`ready`** (post-intake execution) → resolve the stage's model, dispatch the stage's block as a sub-agent, then finish/fail/reset per the returned result -**Sub-agent dispatch contract (post-intake stages).** For apply / review / hydrate, before dispatching run `fab resolve-agent `, surface the resolved `model=/effort=` (so a skipped/mis-resolved tier is visible), and apply both halves to the Agent dispatch — model via the Agent `model` param (empty ⇒ omit/inherit), effort via an imperative instruction in the sub-agent's prompt (empty effort ⇒ omit; the Agent tool has no effort param) — per `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. Dispatch a sub-agent (Agent tool, `general-purpose`) to execute the named Behavior section *of this file* — this is a fresh-context sub-agent run of that block, NOT a recursive `/fab-continue` invocation — and the prompt MUST include **"do NOT run `fab status` commands; return results only"** plus the standard subagent context files. The sequencer (this `/fab-continue` invocation) runs the `finish`/`fail`/`reset` transition after the sub-agent returns. This is the universal block contract — the block never owns its transitions, regardless of caller. +**Sub-agent dispatch contract (post-intake stages).** For apply / review / hydrate, before dispatching run `fab resolve-agent --alias` (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line), surface the resolved `model=/effort=` (so a skipped/mis-resolved tier is visible), and apply both halves to the Agent dispatch — model via the Agent `model` param (empty ⇒ omit/inherit), effort via an imperative instruction in the sub-agent's prompt (empty effort ⇒ omit; the Agent tool has no effort param) — per `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. Dispatch a sub-agent (Agent tool, `general-purpose`) to execute the named Behavior section *of this file* — this is a fresh-context sub-agent run of that block, NOT a recursive `/fab-continue` invocation — and the prompt MUST include **"do NOT run `fab status` commands; return results only"** plus the standard subagent context files. The sequencer (this `/fab-continue` invocation) runs the `finish`/`fail`/`reset` transition after the sub-agent returns. This is the universal block contract — the block never owns its transitions, regardless of caller. | Derived stage | State | Action | |---------------|-------|--------| -| `intake` | `ready` | finish intake (auto-activates apply) → run the apply sequencer: `fab resolve-agent apply` → dispatch the apply sub-agent (its entry sub-step generates `plan.md` — including its `## Requirements` — then runs tasks) → on success `finish apply fab-continue` (auto-activates review) | +| `intake` | `ready` | finish intake (auto-activates apply) → run the apply sequencer: `fab resolve-agent apply --alias` → dispatch the apply sub-agent (its entry sub-step generates `plan.md` — including its `## Requirements` — then runs tasks) → on success `finish apply fab-continue` (auto-activates review) | | `intake` | `active` | generate intake if missing (read `.claude/skills/_generation/SKILL.md` first — Intake Generation Procedure) → advance to `ready` (main session — no dispatch) | -| `apply` | `active`/`ready` | `fab resolve-agent apply` → dispatch the apply sub-agent (entry: generate `plan.md` if absent; main: run tasks) → on completion run `finish apply fab-continue` (auto-activates review) | -| `review` | `active`/`ready` | `fab resolve-agent review` (once, for both reviewers + merge) → dispatch the review sub-agent → it returns merged findings + pass/fail. Pass: run `finish review fab-continue` (auto-activates hydrate). Fail: run `fail review` then `reset apply fab-continue`, then present the § Verdict rework menu (Path A) | +| `apply` | `active`/`ready` | `fab resolve-agent apply --alias` → dispatch the apply sub-agent (entry: generate `plan.md` if absent; main: run tasks) → on completion run `finish apply fab-continue` (auto-activates review) | +| `review` | `active`/`ready` | `fab resolve-agent review --alias` (once, for both reviewers + merge) → dispatch the review sub-agent → it returns merged findings + pass/fail. Pass: run `finish review fab-continue` (auto-activates hydrate). Fail: run `fail review` then `reset apply fab-continue`, then present the § Verdict rework menu (Path A) | | `review` | `failed` | *(Keys on `progress.review == failed` via the guard above. Preflight does surface a parked failure — `display_stage`/`display_state` read `review`/`failed` via DisplayStage's failed tier — but the derived routing `stage` lands on the next pending stage, so the progress map is the reliable key.)* Run `reset apply fab-continue` (the same post-fail reset the Verdict fail path runs — review cascades to `pending`, apply re-activates), then present the rework menu (Review Behavior § Verdict, **Fail** options table) directly and stop for the user's choice — do NOT re-run review first | -| `hydrate` | `active`/`ready` | `fab resolve-agent hydrate` → dispatch the hydrate sub-agent → on success run `finish hydrate fab-continue` | +| `hydrate` | `active`/`ready` | `fab resolve-agent hydrate --alias` → dispatch the hydrate sub-agent → on success run `finish hydrate fab-continue` | | `ship` | `active` | *(`ready` is unreachable — ship's AllowedStates are `{pending, active, done, skipped}`.)* Execute `/git-pr` behavior **with the resolved change as the explicit `` argument** (`/git-pr {name}` — transient override; its branch guard verifies the checked-out branch) → git-pr finishes ship internally (its Step 4b); only if the stage is still `active` after it returns, run `finish ship fab-continue` (auto-activates review-pr) | | `review-pr` | `active` | *(`ready` is unreachable — review-pr's AllowedStates are `{pending, active, done, failed, skipped}`.)* Execute `/git-pr-review` behavior **with the resolved change as the explicit `` argument** (`/git-pr-review {name}` — same transient-override + branch-guard contract) → it routes all terminal paths through its Step 6 and runs its own transitions. Pass/no-reviews: only if the stage is still `active` after it returns, run `finish review-pr fab-continue`. Timeout (Copilot review requested but not yet available): the stage is deliberately left `active` — report and stop, no re-finish. Fail: only if the stage is still `active` after it returns (its Step 6 normally runs `fail` itself), run `fail review-pr` | | `review-pr` | `failed` | *(Keys on `progress.review-pr == failed` via the guard above — the same progress-map mechanism as the `review`/`failed` row.)* Re-execute `/git-pr-review` behavior **with the resolved change as the explicit `` argument** — its Step 0 runs `fab status start review-pr`, and the CLI's review-pr `start` transition accepts `failed → active`; from there it routes terminal paths through its Step 6 with the same only-if-still-active guards as the row above. Do NOT route through `reset` — reset's From-set is `{done, ready, skipped}` (excludes `failed`); the CLI would error | @@ -158,7 +158,7 @@ Plan Generation sub-step is skipped when `plan.md` already exists (idempotent on Read `.claude/skills/_review/SKILL.md` (if not already loaded), then execute its **Shared Review Dispatch** end-to-end (Preconditions → Inward + Outward Sub-Agent Dispatch → Parallel Dispatch → Findings Merge). The `_review.md` skill defines both sub-agent dispatches (inward + outward) run in parallel, their preconditions, validation steps, structured output format, and the findings merge procedure. When dispatching the inward sub-agent, read `change_type` from the change's `.status.yaml` and pass it in the prompt per `_review.md`'s context contract (its Steps 7–8 skip condition keys on it). -> **Per-stage model resolution (nested reviewers)**: before dispatching the inward + outward reviewer sub-agents, run `fab resolve-agent review` **once**, surface the resolved `model=/effort=`, and apply both halves to BOTH reviewers and the merge — the same model via the Agent `model` param (empty ⇒ omit/inherit) and the same imperative effort instruction (``Operate at `` reasoning effort for this task.``) in each reviewer's prompt (empty effort ⇒ omit) — per `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. (This is the review block resolving the tier for its own nested sub-agents; it is independent of the sequencer's resolution of the `review` stage when it dispatched this block.) The Claude Code adapter is the Agent tool `model` parameter (effort rides the prompt); the resolution itself is provider-neutral. +> **Per-stage model resolution (nested reviewers)**: before dispatching the inward + outward reviewer sub-agents, run `fab resolve-agent review --alias` **once** (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line), surface the resolved `model=/effort=`, and apply both halves to BOTH reviewers and the merge — the same model via the Agent `model` param (empty ⇒ omit/inherit) and the same imperative effort instruction (``Operate at `` reasoning effort for this task.``) in each reviewer's prompt (empty effort ⇒ omit) — per `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. (This is the review block resolving the tier for its own nested sub-agents; it is independent of the sequencer's resolution of the `review` stage when it dispatched this block.) The Claude Code adapter is the Agent tool `model` parameter (effort rides the prompt); the resolution itself is provider-neutral. ### Verdict diff --git a/src/kit/skills/fab-ff.md b/src/kit/skills/fab-ff.md index bfe6d957..0a6f9978 100644 --- a/src/kit/skills/fab-ff.md +++ b/src/kit/skills/fab-ff.md @@ -34,7 +34,7 @@ Execute the **shared pipeline bracket** (`_pipeline.md`, loaded via `helpers:`) The bracket defines everything else: pre-flight (intake prerequisite + intake gate), context loading, resumability, Steps 1–3 (apply → review → hydrate), the auto-rework loop with its per-cycle choreography, and the exhaustion stop. -> **Per-stage model**: each stage dispatch in the bracket resolves `fab resolve-agent ` first, surfaces the resolved `model=/effort=` (so a skipped or mis-resolved tier is visible, not silent), then dispatches through the two seams — model via the Agent tool's `model` param (empty ⇒ omit/inherit) and effort via an imperative instruction in the dispatch prompt (``Operate at `` reasoning effort for this task.``; empty effort ⇒ omit, since the Agent tool has no effort param) — see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. +> **Per-stage model**: each stage dispatch in the bracket resolves the stage's profile first, surfaces the resolved `model=/effort=` (so a skipped or mis-resolved tier is visible, not silent), then dispatches through the two seams — model via the Agent tool's `model` param, resolved with `fab resolve-agent --alias` so the alias is Agent-tool-valid (empty ⇒ omit/inherit), and effort via an imperative instruction in the dispatch prompt (``Operate at `` reasoning effort for this task.``; empty effort ⇒ omit, since the Agent tool has no effort param) — see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. --- diff --git a/src/kit/skills/fab-fff.md b/src/kit/skills/fab-fff.md index e618f236..6b729061 100644 --- a/src/kit/skills/fab-fff.md +++ b/src/kit/skills/fab-fff.md @@ -34,7 +34,7 @@ Execute the **shared pipeline bracket** (`_pipeline.md`, loaded via `helpers:`) The bracket defines pre-flight (intake prerequisite + intake gate), context loading, resumability, Steps 1–3 (apply → review → hydrate), the auto-rework loop with its per-cycle choreography, and the exhaustion stop. The two steps below are fff-only. -> **Per-stage model**: every stage dispatch (the bracket's Steps 1–3 and the fff-only Steps 4–5 below) resolves `fab resolve-agent ` first, surfaces the resolved `model=/effort=` (so a skipped or mis-resolved tier is visible, not silent), then dispatches through the two seams — model via the Agent tool's `model` param (empty ⇒ omit/inherit) and effort via an imperative instruction in the dispatch prompt (``Operate at `` reasoning effort for this task.``; empty effort ⇒ omit, since the Agent tool has no effort param) — see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. +> **Per-stage model**: every stage dispatch (the bracket's Steps 1–3 and the fff-only Steps 4–5 below) resolves the stage's profile first, surfaces the resolved `model=/effort=` (so a skipped or mis-resolved tier is visible, not silent), then dispatches through the two seams — model via the Agent tool's `model` param, resolved with `fab resolve-agent --alias` so the alias is Agent-tool-valid (empty ⇒ omit/inherit), and effort via an imperative instruction in the dispatch prompt (``Operate at `` reasoning effort for this task.``; empty effort ⇒ omit, since the Agent tool has no effort param) — see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. > **Dispatch exception (ship and review-pr)**: unlike the bracket's `/fab-continue`-behavior subagents, `/git-pr` and `/git-pr-review` manage their own stage transitions internally — their subagent prompts do NOT carry the "do not run `fab status`" instruction. @@ -44,7 +44,7 @@ The bracket defines pre-flight (intake prerequisite + intake gate), context load *(Skip if `progress.ship` is `done`.)* -Resolve the ship model: run `fab resolve-agent ship`, surface the resolved `model=/effort=`, and apply both halves to the dispatch below — model via the Agent `model` param (empty ⇒ omit/inherit), effort via the imperative prompt instruction ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit) — per `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. Dispatch `/git-pr` as subagent — the prompt instructs it to invoke `/git-pr {name}` (the **explicit change argument**, using the folder name per the `{name}` note above: git-pr resolves it as a transient override, so the subagent targets this pipeline's change rather than self-resolving the active one, and its branch-matches-change guard verifies the checked-out branch before mutating anything). The subagent commits, pushes, and creates a GitHub PR. Handles `fab status` integration internally (start/finish ship stage). Returns PR URL or error. +Resolve the ship model: run `fab resolve-agent ship --alias` (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line), surface the resolved `model=/effort=`, and apply both halves to the dispatch below — model via the Agent `model` param (empty ⇒ omit/inherit), effort via the imperative prompt instruction ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit) — per `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. Dispatch `/git-pr` as subagent — the prompt instructs it to invoke `/git-pr {name}` (the **explicit change argument**, using the folder name per the `{name}` note above: git-pr resolves it as a transient override, so the subagent targets this pipeline's change rather than self-resolving the active one, and its branch-matches-change guard verifies the checked-out branch before mutating anything). The subagent commits, pushes, and creates a GitHub PR. Handles `fab status` integration internally (start/finish ship stage). Returns PR URL or error. **If git-pr fails**: STOP with the error from git-pr. The ship stage remains `active` for user retry. @@ -54,7 +54,7 @@ On success: `progress.ship` becomes `done`, `progress.review-pr` auto-activates. *(Skip if `progress.review-pr` is `done`.)* -Resolve the review-pr model: run `fab resolve-agent review-pr`, surface the resolved `model=/effort=`, and apply both halves to the dispatch below — model via the Agent `model` param (empty ⇒ omit/inherit), effort via the imperative prompt instruction ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit) — per `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. Dispatch `/git-pr-review` as subagent — the prompt instructs it to invoke `/git-pr-review {name}` (the **explicit change argument**, same transient-override + branch-guard contract as Step 4). The subagent detects existing reviews, triages comments, applies fixes, and pushes. If no reviews exist, it requests a Copilot review and polls up to 10 minutes — see the timeout outcome below. Handles `fab status` integration internally (start/finish/fail review-pr stage). Returns completion status. +Resolve the review-pr model: run `fab resolve-agent review-pr --alias` (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line), surface the resolved `model=/effort=`, and apply both halves to the dispatch below — model via the Agent `model` param (empty ⇒ omit/inherit), effort via the imperative prompt instruction ``Operate at `` reasoning effort for this task.`` (empty effort ⇒ omit) — per `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. Dispatch `/git-pr-review` as subagent — the prompt instructs it to invoke `/git-pr-review {name}` (the **explicit change argument**, same transient-override + branch-guard contract as Step 4). The subagent detects existing reviews, triages comments, applies fixes, and pushes. If no reviews exist, it requests a Copilot review and polls up to 10 minutes — see the timeout outcome below. Handles `fab status` integration internally (start/finish/fail review-pr stage). Returns completion status. **If review-pr fails** (no PR found, processing error): STOP with the error. From aec2534fcfed162daef87e73db99f3f36fb475e6 Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Sat, 13 Jun 2026 20:43:38 +0530 Subject: [PATCH 3/5] Update ship status and record PR URL --- .../.history.jsonl | 1 + .../.status.yaml | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl b/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl index 9c4c037b..6e98423d 100644 --- a/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl @@ -10,3 +10,4 @@ {"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"hydrate","ts":"2026-06-13T15:06:52Z"} {"event":"review","result":"passed","ts":"2026-06-13T15:06:52Z"} {"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"ship","ts":"2026-06-13T15:11:58Z"} +{"action":"enter","driver":"git-pr","event":"stage-transition","stage":"review-pr","ts":"2026-06-13T15:13:26Z"} diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml b/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml index d0065156..957d9c52 100644 --- a/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml @@ -9,8 +9,8 @@ progress: apply: done review: done hydrate: done - ship: active - review-pr: pending + ship: done + review-pr: active plan: generated: true task_count: 12 @@ -33,8 +33,10 @@ stage_metrics: apply: {started_at: "2026-06-13T14:50:39Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T14:59:57Z"} review: {started_at: "2026-06-13T14:59:57Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T15:06:52Z"} hydrate: {started_at: "2026-06-13T15:06:52Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T15:11:58Z"} - ship: {started_at: "2026-06-13T15:11:58Z", driver: fab-fff, iterations: 1} -prs: [] + ship: {started_at: "2026-06-13T15:11:58Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T15:13:26Z"} + review-pr: {started_at: "2026-06-13T15:13:26Z", driver: git-pr, iterations: 1} +prs: + - https://github.com/sahil87/fab-kit/pull/414 true_impact: added: 231 deleted: 0 @@ -50,4 +52,4 @@ true_impact: computed_at: "2026-06-13T15:11:58Z" computed_at_stage: hydrate # true_impact: lazily created on first apply-finish (no placeholder here). -last_updated: 2026-06-13T15:11:58Z +last_updated: 2026-06-13T15:13:26Z From 0a9dea0e62d2de5a2f7b40493a29c4a12b0ded95 Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Sat, 13 Jun 2026 20:51:29 +0530 Subject: [PATCH 4/5] fix: address review feedback from @Copilot --- docs/specs/skills/SPEC-_pipeline.md | 17 +++++++++-------- docs/specs/skills/SPEC-fab-continue.md | 6 +++--- docs/specs/skills/SPEC-fab-ff.md | 2 +- docs/specs/skills/SPEC-fab-fff.md | 2 +- src/go/fab/internal/agent/agent_test.go | 6 +++++- src/kit/skills/_preamble.md | 2 +- 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/docs/specs/skills/SPEC-_pipeline.md b/docs/specs/skills/SPEC-_pipeline.md index cb6c0ea1..40161ace 100644 --- a/docs/specs/skills/SPEC-_pipeline.md +++ b/docs/specs/skills/SPEC-_pipeline.md @@ -2,7 +2,7 @@ ## Summary -Shared pipeline bracket executed by `/fab-ff` and `/fab-fff` (added in 260611-szxd — f007/f071). The bracket is a **pure sequencer** (affirmed 260613-fgxx): dispatch block → read returned status/findings → decide proceed / loop / stop; it owns the `fab status` transitions and `fab resolve-agent ` resolution and never reaches into block internals. The subagent dispatch note's "do NOT run `fab status`; return results only" instruction is the **universal block contract** for post-intake `/fab-continue` blocks (Apply/Review/Hydrate) — not an override this orchestrator imposes — because plain `/fab-continue` is itself a one-stage sequencer that dispatches those blocks identically and runs their transitions after they return (see `SPEC-fab-continue.md`). Single authoritative source for everything the two drivers share: pre-flight (intake prerequisite + the single intake confidence gate, flat 3.0), context loading, the subagent dispatch note ("do NOT run `fab status`; return results only"), resumability (skip `done` stages; review-`failed` recovery via `fab status start review`), Steps 1–3 (apply with internal plan generation → review → hydrate — each preceded by `fab resolve-agent ` per-stage model resolution since 260613-l3ja, and since 260613-m3d4 each site **surfaces** the resolved `model=/effort=` (visibility — a skipped resolution is then detectable, not silent) and dispatches via two seams: model on the Agent `model` param, effort as an imperative instruction in the dispatch prompt (no Agent effort param; omitted when empty); review resolves once and applies the same model + same effort-prompt instruction to both reviewers + merge; rework items 3/4 re-resolve, re-surface, and re-inject before re-dispatch), the auto-rework loop with its **explicit per-cycle choreography**, the exhaustion stop, and the shared error rows. All user-facing stop/re-run guidance in the bracket (gate-fail, task-fail, exhaustion stop) names the change in the suggested commands — the run may be driving a non-active override, and argless re-runs would resolve the active change (260612-w7dp). +Shared pipeline bracket executed by `/fab-ff` and `/fab-fff` (added in 260611-szxd — f007/f071). The bracket is a **pure sequencer** (affirmed 260613-fgxx): dispatch block → read returned status/findings → decide proceed / loop / stop; it owns the `fab status` transitions and `fab resolve-agent ` resolution and never reaches into block internals. The subagent dispatch note's "do NOT run `fab status`; return results only" instruction is the **universal block contract** for post-intake `/fab-continue` blocks (Apply/Review/Hydrate) — not an override this orchestrator imposes — because plain `/fab-continue` is itself a one-stage sequencer that dispatches those blocks identically and runs their transitions after they return (see `SPEC-fab-continue.md`). Single authoritative source for everything the two drivers share: pre-flight (intake prerequisite + the single intake confidence gate, flat 3.0), context loading, the subagent dispatch note ("do NOT run `fab status`; return results only"), resumability (skip `done` stages; review-`failed` recovery via `fab status start review`), Steps 1–3 (apply with internal plan generation → review → hydrate — each preceded by `fab resolve-agent --alias` per-stage model resolution since 260613-l3ja (`--alias` since 260613-yky7 — emits the Agent-tool-valid short alias on the `model=` line), and since 260613-m3d4 each site **surfaces** the resolved `model=/effort=` (visibility — a skipped resolution is then detectable, not silent) and dispatches via two seams: model on the Agent `model` param, effort as an imperative instruction in the dispatch prompt (no Agent effort param; omitted when empty); review resolves once and applies the same model + same effort-prompt instruction to both reviewers + merge; rework items 3/4 re-resolve, re-surface, and re-inject before re-dispatch), the auto-rework loop with its **explicit per-cycle choreography**, the exhaustion stop, and the shared error rows. All user-facing stop/re-run guidance in the bracket (gate-fail, task-fail, exhaustion stop) names the change in the suggested commands — the run may be driving a non-active override, and argless re-runs would resolve the active change (260612-w7dp). **Parameters** (declared by each driver's own file): @@ -43,18 +43,19 @@ Driver (fab-ff / fab-fff) reads _pipeline.md with {driver}/{terminal} bound ├─ Resumability: skip done stages; {terminal}: done → already complete; │ review failed → fab status start review, resume from Step 2 │ -│ (each stage dispatch first runs `fab resolve-agent `, surfaces the -│ resolved model=/effort= (visibility — 260613-m3d4), then dispatches via two +│ (each stage dispatch first runs `fab resolve-agent --alias`, surfaces +│ the resolved model=/effort= (visibility — 260613-m3d4), then dispatches via two │ seams: model → Agent `model` param (empty ⇒ omit/inherit), effort → imperative │ instruction in the dispatch prompt (no Agent effort param; empty ⇒ omit; -│ 260613-m3d4) — 260613-l3ja established the resolve call; see _preamble.md -│ § Subagent Dispatch → Per-Stage Model Resolution) +│ 260613-m3d4) — 260613-l3ja established the resolve call, 260613-yky7 added +│ `--alias` (emits the Agent-tool-valid short alias on the model= line); see +│ _preamble.md § Subagent Dispatch → Per-Stage Model Resolution) │ -├─ Step 1: Apply (fab resolve-agent apply → subagent: /fab-continue Apply Behavior — plan co-gen + tasks) +├─ Step 1: Apply (fab resolve-agent apply --alias → subagent: /fab-continue Apply Behavior — plan co-gen + tasks) │ ├─ fab status finish intake {driver} (if intake not done) │ └─ fab status finish apply {driver} (on success) │ -├─ Step 2: Review (fab resolve-agent review ONCE for both reviewers + merge → +├─ Step 2: Review (fab resolve-agent review --alias ONCE for both reviewers + merge → │ │ subagent: /fab-continue Review Behavior → _review.md │ │ inward + outward dispatch, merged findings, pass/fail) │ ├─ Pass: fab status finish review {driver} → Step 3 @@ -63,7 +64,7 @@ Driver (fab-ff / fab-fff) reads _pipeline.md with {driver}/{terminal} bound │ items 3/4 re-resolve apply/review before re-dispatch) │ └─ Exhaustion: fab status fail review (no reset) → STOP │ -├─ Step 3: Hydrate (fab resolve-agent hydrate → subagent: /fab-continue Hydrate Behavior) +├─ Step 3: Hydrate (fab resolve-agent hydrate --alias → subagent: /fab-continue Hydrate Behavior) │ └─ fab status finish hydrate {driver} │ └─ {terminal} = hydrate → complete; {terminal} = review-pr → driver Steps 4–5 diff --git a/docs/specs/skills/SPEC-fab-continue.md b/docs/specs/skills/SPEC-fab-continue.md index adeb8798..b354f55a 100644 --- a/docs/specs/skills/SPEC-fab-continue.md +++ b/docs/specs/skills/SPEC-fab-continue.md @@ -4,11 +4,11 @@ Advances through the 6-stage pipeline one step at a time. Each invocation handles the current stage's work and transitions to the next. Supports reset to a given stage (legacy `tasks`/`spec` targets error with a pointer to the `apply` and `intake` reset routes). Handles all six stages: intake (the only planning stage), apply (co-generates `plan.md` `## Requirements` + `## Tasks` + `## Acceptance` at entry then runs tasks), review (sub-agent), hydrate, ship (delegates to `/git-pr` behavior), and review-pr (delegates to `/git-pr-review` behavior). -**Single post-intake execution mode — one-stage sequencer** (260613-fgxx): intake is the sole context boundary. Intake is the only stage `/fab-continue` runs in the main session; **every post-intake stage (apply / review / hydrate) is always dispatched as a sub-agent**, the same dispatch the orchestrators (`_pipeline.md`) perform. There is no foreground execution path for apply/review/hydrate. In this mode `/fab-continue` is a **one-stage sequencer**: it runs `fab resolve-agent ` immediately before the dispatch and applies the resolved tier (the two-seam model-param + effort-via-prompt mechanics are described in the **Per-stage model** paragraph below), includes the universal **"do NOT run `fab status`; return results only"** prompt contract, reads the returned status/findings, and owns the `finish`/`fail`/`reset` transition itself — identical sequencer/block split whether the caller is manual `/fab-continue` or an orchestrator. The three former dual-mode "When invoked as a subagent: do NOT run `fab status`" conditionals (Apply/Review/Hydrate Behavior) are removed; the instruction is now the universal block contract, not a per-caller override, and is NOT re-encoded as any "skip §Verdict when subagent" flag. Ship and review-pr are excluded from this rewrite — they self-manage their transitions (the dispatch exception). +**Single post-intake execution mode — one-stage sequencer** (260613-fgxx): intake is the sole context boundary. Intake is the only stage `/fab-continue` runs in the main session; **every post-intake stage (apply / review / hydrate) is always dispatched as a sub-agent**, the same dispatch the orchestrators (`_pipeline.md`) perform. There is no foreground execution path for apply/review/hydrate. In this mode `/fab-continue` is a **one-stage sequencer**: it runs `fab resolve-agent --alias` immediately before the dispatch and applies the resolved tier (the two-seam model-param + effort-via-prompt mechanics are described in the **Per-stage model** paragraph below), includes the universal **"do NOT run `fab status`; return results only"** prompt contract, reads the returned status/findings, and owns the `finish`/`fail`/`reset` transition itself — identical sequencer/block split whether the caller is manual `/fab-continue` or an orchestrator. The three former dual-mode "When invoked as a subagent: do NOT run `fab status`" conditionals (Apply/Review/Hydrate Behavior) are removed; the instruction is now the universal block contract, not a per-caller override, and is NOT re-encoded as any "skip §Verdict when subagent" flag. Ship and review-pr are excluded from this rewrite — they self-manage their transitions (the dispatch exception). **Failure recovery + idempotent reset** (260612-w7dp): a `review-pr`/`failed` dispatch row — keyed off `progress.review-pr == failed`, the same progress-map guard mechanism as the review row — re-executes `/git-pr-review` behavior (its Step 0 `start` accepts `failed → active` for review-pr; never `reset`, whose From-set `{done, ready, skipped}` excludes `failed`), so a failed PR review no longer falls through to "Change is complete." The ship and review-pr rows (incl. the failed row) pass the resolved change **explicitly** to `/git-pr`/`/git-pr-review` (`{name}` as the `` argument — the explicit-arg contract); the ship and review-pr **`active`** rows key on `active` only — `ready` is not in either stage's AllowedStates — while the review-pr failed row keys on the progress map's `failed`. The Reset Flow handles all non-resettable target states (reset From-set `{done, ready, skipped}`): already-`active` → skip the call and proceed (re-running a reset is a state-wise no-op — Constitution III); `failed` → route via the matching failed dispatch row (`start` owns failed→active, review/review-pr only); `pending` → error with advance guidance. All recovery pointers are executable: the unexecutable `/fab-clarify intake` form is replaced by `/fab-continue intake` then argless `/fab-clarify` (argless is correct in fab-continue's own messages — the change reference of the current invocation is implied, active or `[change-name]` override, and an Error Handling note tells override users to re-run with the same ``; cross-context sites like `_pipeline.md`'s stop guidance instead name the change in every command), with an explicit delete-`plan.md` note where plan regeneration is the intent; the `intake.md`-missing error points at `/fab-continue intake` instead of looping through plain `/fab-continue`. The Review Behavior call site reads `change_type` from `.status.yaml` and passes it in the inward sub-agent prompt per `_review.md`'s context contract. -**Per-stage model** (260613-l3ja, 260613-fgxx, 260613-m3d4): the one-stage sequencer resolves `fab resolve-agent ` immediately before dispatching each post-intake stage's block, **surfaces** the resolved `model=/effort=` (visibility — a skip is detectable, not silent; m3d4), and applies both halves via two seams: model on the Agent `model` param (empty ⇒ omit/inherit) and effort as an imperative instruction in the sub-agent's prompt (no Agent effort param; omitted when empty; m3d4). Since every post-intake stage now dispatches, per-stage selection applies uniformly across apply/review/hydrate regardless of caller — there is no foreground post-intake path left to be the advisory-only exception (this closes Gap 1a of the model-tier finding; Gap 1b visibility + Gap 2's effort half are closed by m3d4 via the surface + prompt-injection seams; the lone residual is a first-class per-sub-agent effort param on the Agent tool — a harness ask, not built). The Review block additionally resolves `fab resolve-agent review` **once** for its own nested reviewer sub-agents (inward + outward) and the merge — applying the same model + same effort-prompt instruction to all three, independent of the sequencer's resolution of the `review` stage (the Claude Code adapter is the Agent tool `model` param, effort rides the prompt; resolution is provider-neutral — see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution). Intake is pre-boundary and is not tiered by `/fab-continue`. +**Per-stage model** (260613-l3ja, 260613-fgxx, 260613-m3d4, 260613-yky7): the one-stage sequencer resolves `fab resolve-agent --alias` immediately before dispatching each post-intake stage's block (`--alias` since 260613-yky7 — emits the Agent-tool-valid short alias on the `model=` line), **surfaces** the resolved `model=/effort=` (visibility — a skip is detectable, not silent; m3d4), and applies both halves via two seams: model on the Agent `model` param (empty ⇒ omit/inherit) and effort as an imperative instruction in the sub-agent's prompt (no Agent effort param; omitted when empty; m3d4). Since every post-intake stage now dispatches, per-stage selection applies uniformly across apply/review/hydrate regardless of caller — there is no foreground post-intake path left to be the advisory-only exception (this closes Gap 1a of the model-tier finding; Gap 1b visibility + Gap 2's effort half are closed by m3d4 via the surface + prompt-injection seams; the lone residual is a first-class per-sub-agent effort param on the Agent tool — a harness ask, not built). The Review block additionally resolves `fab resolve-agent review --alias` **once** for its own nested reviewer sub-agents (inward + outward) and the merge — applying the same model + same effort-prompt instruction to all three, independent of the sequencer's resolution of the `review` stage (the Claude Code adapter is the Agent tool `model` param, effort rides the prompt; resolution is provider-neutral — see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution). Intake is pre-boundary and is not tiered by `/fab-continue`. **Helpers**: Declares `helpers: [_srad]` in frontmatter; `_generation` and `_review` are loaded **stage-conditionally** at point of use (apply entry / intake regeneration → `_generation`; Review Behavior entry → `_review`) per `_preamble.md` § Skill Helper Declaration stage-conditional loading. Hydrate/ship/review-pr invocations and apply-resumes load neither. @@ -149,7 +149,7 @@ User invokes /fab-continue [change-name] [stage] └─ Output: summary + Next: line ``` -> **Dispatch annotation** (260613-fgxx): in the APPLY / REVIEW / HYDRATE boxes above, the stage *work* runs inside a dispatched sub-agent (resolved via `fab resolve-agent ` and dispatched by the one-stage sequencer). The `Bash: fab status finish/fail/reset` lines shown in those boxes are run by the **sequencer** after the block returns — the dispatched block runs no `fab status` command. The boxes show the end-to-end stage picture, not block-internal actions. INTAKE is the only box that runs in the main session. +> **Dispatch annotation** (260613-fgxx): in the APPLY / REVIEW / HYDRATE boxes above, the stage *work* runs inside a dispatched sub-agent (resolved via `fab resolve-agent --alias` and dispatched by the one-stage sequencer). The `Bash: fab status finish/fail/reset` lines shown in those boxes are run by the **sequencer** after the block returns — the dispatched block runs no `fab status` command. The boxes show the end-to-end stage picture, not block-internal actions. INTAKE is the only box that runs in the main session. ### Tools used diff --git a/docs/specs/skills/SPEC-fab-ff.md b/docs/specs/skills/SPEC-fab-ff.md index e7fe7ac3..3877ac81 100644 --- a/docs/specs/skills/SPEC-fab-ff.md +++ b/docs/specs/skills/SPEC-fab-ff.md @@ -2,7 +2,7 @@ ## Summary -Fast-forward apply → review → hydrate (everything after intake) in one invocation. Since 260611-szxd the skill file is a **thin wrapper over the shared pipeline bracket in `_pipeline.md`** (see `SPEC-_pipeline.md`): it declares Purpose, Arguments, and the two bracket parameters — `{driver}` = `fab-ff`, `{terminal}` = `hydrate` — plus its own Output block. The bracket owns the single intake confidence gate (flat 3.0, all types), context loading, resumability (incl. the review-`failed` recovery via `fab status start review`), Steps 1–3, the auto-rework loop (`{max_cycles}`-cycle cap — the code-review.md Rework Budget knob, default 3 (c5tr); explicit per-cycle choreography, escalation after 2 consecutive fix-code), the exhaustion stop (terminal state: review `failed` — `/fab-continue`'s review-failed row presents the rework menu from there), and the shared error rows. No `/fab-clarify` runs inside the bracket — clarification is intake-only. All sub-skill invocations are dispatched as sub-agents with "do NOT run `fab status` commands; return results only" — the orchestrator owns those stages' transitions. Each stage dispatch in the bracket first resolves `fab resolve-agent ` (260613-l3ja), **surfaces** the resolved `model=/effort=` (visibility — a skip is detectable, not silent; 260613-m3d4), then dispatches via two seams: model on the Agent `model` param (empty ⇒ omit/inherit) and effort as an imperative instruction in the dispatch prompt (no Agent effort param; omitted when empty; 260613-m3d4) — see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. Accepts `--force` to bypass the intake gate. The wrapper's `{driver}` parameter row scopes the driver claim to the commands the bracket shows it on — the fail/recovery commands are deliberately driver-less (260612-w7dp). +Fast-forward apply → review → hydrate (everything after intake) in one invocation. Since 260611-szxd the skill file is a **thin wrapper over the shared pipeline bracket in `_pipeline.md`** (see `SPEC-_pipeline.md`): it declares Purpose, Arguments, and the two bracket parameters — `{driver}` = `fab-ff`, `{terminal}` = `hydrate` — plus its own Output block. The bracket owns the single intake confidence gate (flat 3.0, all types), context loading, resumability (incl. the review-`failed` recovery via `fab status start review`), Steps 1–3, the auto-rework loop (`{max_cycles}`-cycle cap — the code-review.md Rework Budget knob, default 3 (c5tr); explicit per-cycle choreography, escalation after 2 consecutive fix-code), the exhaustion stop (terminal state: review `failed` — `/fab-continue`'s review-failed row presents the rework menu from there), and the shared error rows. No `/fab-clarify` runs inside the bracket — clarification is intake-only. All sub-skill invocations are dispatched as sub-agents with "do NOT run `fab status` commands; return results only" — the orchestrator owns those stages' transitions. Each stage dispatch in the bracket first resolves `fab resolve-agent --alias` (260613-l3ja; `--alias` since 260613-yky7 — emits the Agent-tool-valid short alias on the `model=` line), **surfaces** the resolved `model=/effort=` (visibility — a skip is detectable, not silent; 260613-m3d4), then dispatches via two seams: model on the Agent `model` param (empty ⇒ omit/inherit) and effort as an imperative instruction in the dispatch prompt (no Agent effort param; omitted when empty; 260613-m3d4) — see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. Accepts `--force` to bypass the intake gate. The wrapper's `{driver}` parameter row scopes the driver claim to the commands the bracket shows it on — the fail/recovery commands are deliberately driver-less (260612-w7dp). **Helpers**: Declares `helpers: [_generation, _review, _srad, _pipeline]` in frontmatter per `docs/specs/skills.md § Skill Helpers`. diff --git a/docs/specs/skills/SPEC-fab-fff.md b/docs/specs/skills/SPEC-fab-fff.md index a509a95b..780456de 100644 --- a/docs/specs/skills/SPEC-fab-fff.md +++ b/docs/specs/skills/SPEC-fab-fff.md @@ -2,7 +2,7 @@ ## Summary -Full pipeline gated on the single intake gate (identical to fab-ff). Since 260611-szxd the skill file is a **thin wrapper over the shared pipeline bracket in `_pipeline.md`** (see `SPEC-_pipeline.md`) with the two bracket parameters — `{driver}` = `fab-fff`, `{terminal}` = `review-pr` — plus the fff-only Steps 4–5 (ship, review-pr), its own Output block, and the fff-only error rows. The bracket owns the gate, resumability, Steps 1–3, the auto-rework loop (`{max_cycles}`-cycle cap — the code-review.md Rework Budget knob, default 3 (c5tr); explicit per-cycle choreography, exhaustion terminal state review `failed`), and the shared error rows. Ship and review-pr are self-managed: `/git-pr` and `/git-pr-review` run their own stage transitions internally (the dispatch exception is noted in `fab-fff.md`); `/git-pr-review`'s timeout outcome deliberately leaves `review-pr` `active` and replaces `Pipeline complete.` with a re-run pointer. Since 260612-w7dp, Steps 4–5 dispatch with the **explicit change argument** (`/git-pr {name}`, `/git-pr-review {name}` — `{name}` is the change's folder name from preflight, chosen over the 4-char `{id}` because git-pr classifies any argument spelling a PR type word as a ``, and a 4-char id can collide with `feat`/`docs`/`test`; a folder name never matches a type token) so the subagents resolve the pipeline's target as a transient override instead of self-resolving the active change — and their branch-matches-change guards verify the checked-out branch before mutating anything. Accepts `--force` to bypass the intake gate. **Per-stage model** (260613-l3ja, m3d4): the fff-only Steps 4–5 run `fab resolve-agent ship` / `fab resolve-agent review-pr` before dispatching `/git-pr` / `/git-pr-review` (the bracket handles Steps 1–3) — each site **surfaces** the resolved `model=/effort=` (visibility — a skip is detectable) and dispatches via two seams: model on the Agent `model` param (empty ⇒ omit/inherit) and effort as an imperative instruction in the dispatch prompt (no Agent effort param; omitted when empty; 260613-m3d4); see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. +Full pipeline gated on the single intake gate (identical to fab-ff). Since 260611-szxd the skill file is a **thin wrapper over the shared pipeline bracket in `_pipeline.md`** (see `SPEC-_pipeline.md`) with the two bracket parameters — `{driver}` = `fab-fff`, `{terminal}` = `review-pr` — plus the fff-only Steps 4–5 (ship, review-pr), its own Output block, and the fff-only error rows. The bracket owns the gate, resumability, Steps 1–3, the auto-rework loop (`{max_cycles}`-cycle cap — the code-review.md Rework Budget knob, default 3 (c5tr); explicit per-cycle choreography, exhaustion terminal state review `failed`), and the shared error rows. Ship and review-pr are self-managed: `/git-pr` and `/git-pr-review` run their own stage transitions internally (the dispatch exception is noted in `fab-fff.md`); `/git-pr-review`'s timeout outcome deliberately leaves `review-pr` `active` and replaces `Pipeline complete.` with a re-run pointer. Since 260612-w7dp, Steps 4–5 dispatch with the **explicit change argument** (`/git-pr {name}`, `/git-pr-review {name}` — `{name}` is the change's folder name from preflight, chosen over the 4-char `{id}` because git-pr classifies any argument spelling a PR type word as a ``, and a 4-char id can collide with `feat`/`docs`/`test`; a folder name never matches a type token) so the subagents resolve the pipeline's target as a transient override instead of self-resolving the active change — and their branch-matches-change guards verify the checked-out branch before mutating anything. Accepts `--force` to bypass the intake gate. **Per-stage model** (260613-l3ja, m3d4): the fff-only Steps 4–5 run `fab resolve-agent ship --alias` / `fab resolve-agent review-pr --alias` before dispatching `/git-pr` / `/git-pr-review` (the bracket handles Steps 1–3; `--alias` since 260613-yky7 — emits the Agent-tool-valid short alias on the `model=` line) — each site **surfaces** the resolved `model=/effort=` (visibility — a skip is detectable) and dispatches via two seams: model on the Agent `model` param (empty ⇒ omit/inherit) and effort as an imperative instruction in the dispatch prompt (no Agent effort param; omitted when empty; 260613-m3d4); see `_preamble.md` § Subagent Dispatch → Per-Stage Model Resolution. **Helpers**: Declares `helpers: [_generation, _review, _srad, _pipeline]` in frontmatter per `docs/specs/skills.md § Skill Helpers`. diff --git a/src/go/fab/internal/agent/agent_test.go b/src/go/fab/internal/agent/agent_test.go index 457ac9e4..91df4716 100644 --- a/src/go/fab/internal/agent/agent_test.go +++ b/src/go/fab/internal/agent/agent_test.go @@ -162,7 +162,11 @@ func TestModelAlias(t *testing.T) { "some-unrecognized-model-id": "some-unrecognized-model-id", } for in, want := range cases { - t.Run(in, func(t *testing.T) { + name := in + if name == "" { + name = "empty" // avoid an empty subtest name (TestModelAlias/) + } + t.Run(name, func(t *testing.T) { if got := ModelAlias(in); got != want { t.Errorf("ModelAlias(%q) = %q, want %q", in, got, want) } diff --git a/src/kit/skills/_preamble.md b/src/kit/skills/_preamble.md index 5f9f7d70..d4f6350f 100644 --- a/src/kit/skills/_preamble.md +++ b/src/kit/skills/_preamble.md @@ -352,7 +352,7 @@ and passes the resolved profile into the Agent dispatch: **Harness-adapter boundary (Claude Code).** The resolution (stage→tier→`{model, effort}`) is **provider-neutral**. Injecting the resolved model into the actual dispatch is **harness-specific**: for Claude Code that adapter is the **Agent tool's `model` parameter**, and the effort half is injected via the subagent-prompt instruction described above. One concrete harness detail: the Agent tool's `model` param takes a **short alias** (`opus` / `sonnet` / `haiku` / `fable`), **not** the full versioned id (`claude-opus-4-8`) the plain command emits. So for Agent-tool dispatch, **resolve the model half with `fab resolve-agent --alias`** — the `--alias` flag emits the Agent-tool-valid short alias directly on the `model=` line (empty ⇒ omit/inherit; a non-Claude override passes through verbatim), so no agent ever hand-maps the id. This is named explicitly as the Claude-Code adapter, not as universal truth — and the coupling is not new (fab's entire subagent-dispatch design is already Claude-Code-shaped). *(The operator launcher path is the deliberate exception: it appends `--model ` to a `claude` **CLI** invocation, which accepts full IDs, so it resolves WITHOUT `--alias`.)* -**Review resolves once.** The `review` stage spawns two reviewer sub-agents (inward + outward) plus a merge. Resolve `fab resolve-agent review` **once** and apply the same profile to all three — the same model and the same effort-prompt instruction govern both reviewers and the merge; the mechanical merge runs at the reviewer's tier (an accepted stage-granularity tradeoff). +**Review resolves once.** The `review` stage spawns two reviewer sub-agents (inward + outward) plus a merge. Resolve `fab resolve-agent review --alias` **once** (the `--alias` flag emits the Agent-tool-valid short alias on the `model=` line, per the Harness-adapter boundary note above) and apply the same profile to all three — the same model and the same effort-prompt instruction govern both reviewers and the merge; the mechanical merge runs at the reviewer's tier (an accepted stage-granularity tradeoff). **Per-stage selection applies on every post-intake stage.** Per-stage selection is a property of dispatched sub-agent runs — and **every post-intake stage now dispatches a sub-agent**, including plain `/fab-continue` (which is a one-stage sequencer that resolves `fab resolve-agent ` and dispatches its stage's block — see `fab-continue.md` Normal Flow Step 1). So there is no post-intake foreground execution path left to be the exception; `fab resolve-agent` applies uniformly across apply/review/hydrate regardless of caller. Intake is pre-boundary (it runs in the main session and is not tiered by `/fab-continue`). The only residual "advisory" case is a stage skill genuinely run with no dispatch at all — fab cannot switch the session model mid-run, so such a skill MAY note "this stage is configured for X; you're on Y" but MUST NOT attempt to switch. *(The effort half of the tier is now injected via the subagent-prompt instruction described above — see "The two halves dispatch through different seams"; the lone remaining residual is a first-class per-sub-agent `effort` parameter on the Agent tool, which is a harness ask outside fab's control, not a fab gap.)* From 7e96b05bbea7d71a35e031ed456a4ca4d46eb858 Mon Sep 17 00:00:00 2001 From: Sahil Ahuja Date: Sat, 13 Jun 2026 20:52:28 +0530 Subject: [PATCH 5/5] Update review-pr status --- .../260613-yky7-resolve-agent-alias-flag/.history.jsonl | 1 + .../260613-yky7-resolve-agent-alias-flag/.status.yaml | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl b/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl index 6e98423d..9311f9e9 100644 --- a/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/.history.jsonl @@ -11,3 +11,4 @@ {"event":"review","result":"passed","ts":"2026-06-13T15:06:52Z"} {"action":"enter","driver":"fab-fff","event":"stage-transition","stage":"ship","ts":"2026-06-13T15:11:58Z"} {"action":"enter","driver":"git-pr","event":"stage-transition","stage":"review-pr","ts":"2026-06-13T15:13:26Z"} +{"event":"review","result":"passed","ts":"2026-06-13T15:22:15Z"} diff --git a/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml b/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml index 957d9c52..d3f46bbb 100644 --- a/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml +++ b/fab/changes/260613-yky7-resolve-agent-alias-flag/.status.yaml @@ -10,7 +10,7 @@ progress: review: done hydrate: done ship: done - review-pr: active + review-pr: done plan: generated: true task_count: 12 @@ -34,7 +34,7 @@ stage_metrics: review: {started_at: "2026-06-13T14:59:57Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T15:06:52Z"} hydrate: {started_at: "2026-06-13T15:06:52Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T15:11:58Z"} ship: {started_at: "2026-06-13T15:11:58Z", driver: fab-fff, iterations: 1, completed_at: "2026-06-13T15:13:26Z"} - review-pr: {started_at: "2026-06-13T15:13:26Z", driver: git-pr, iterations: 1} + review-pr: {started_at: "2026-06-13T15:13:26Z", driver: git-pr, iterations: 1, completed_at: "2026-06-13T15:22:15Z"} prs: - https://github.com/sahil87/fab-kit/pull/414 true_impact: @@ -52,4 +52,4 @@ true_impact: computed_at: "2026-06-13T15:11:58Z" computed_at_stage: hydrate # true_impact: lazily created on first apply-finish (no placeholder here). -last_updated: 2026-06-13T15:13:26Z +last_updated: 2026-06-13T15:22:15Z