From 2bf1ed748a6d30cae58357db95258dd78d81cacb Mon Sep 17 00:00:00 2001 From: yuki sakura Date: Thu, 23 Apr 2026 15:30:14 +0900 Subject: [PATCH] feat: add isolated worktree execution for apply subagents (#182) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce apply-worktree-isolation: dispatched subagent bundles now run inside ephemeral git worktrees under .specflow/worktrees///. The main agent remains the sole workflow-state mutator and gains the integration authority role — inspecting each subagent's worktree diff, cross-checking against produced_artifacts, and importing accepted patches via git apply --binary --index. Integration rejects on undeclared paths, protected-path touches (task-graph.json / tasks.md / .specflow/**), empty-diff-on-success, or patch-apply failure; rejected bundles land in the new integration_rejected status with their worktree retained for /specflow.fix_apply. Subagent failures land in the new subagent_failed status with the same retention. Workspace state (tracked + untracked) is materialized into each new worktree so later bundles observe earlier bundle imports. Fail-fast on worktree-add errors, no silent fallback to inline execution. - task-planner status enum extended to 6 values; --allow-reset gates reset transitions to pending from the new failure statuses. - assignExecutionMode derives mode solely from the existing subagent- eligibility rule (no subagent-shared mode). - Full git-apply coverage: creates, deletes, modifies, mode changes, renames, binary content (git diff --binary --find-renames + git apply --binary --index). - Documentation: docs/apply-worktree-recovery.md operator playbook. - 921/921 tests pass including 5 real-git integration tests that spawn actual git against temp repos. Issue: https://github.com/skr19930617/specflow/issues/182 --- assets/commands/specflow.apply.md.tmpl | 8 +- assets/commands/specflow.fix_apply.md.tmpl | 36 + docs/apply-worktree-recovery.md | 154 ++++ .../.openspec.yaml | 2 + .../approval-summary.md | 175 ++++ .../current-phase.md | 11 + .../design.md | 169 ++++ .../proposal.md | 88 ++ .../review-ledger-design.json | 62 ++ .../review-ledger-design.json.bak | 61 ++ .../review-ledger.json | 298 +++++++ .../review-ledger.json.bak | 297 +++++++ .../specs/apply-worktree-integration/spec.md | 285 +++++++ .../specs/bundle-subagent-execution/spec.md | 125 +++ .../specs/task-planner/spec.md | 223 +++++ .../task-graph.json | 296 +++++++ .../tasks.md | 59 ++ .../specs/apply-worktree-integration/spec.md | 289 +++++++ .../specs/bundle-subagent-execution/spec.md | 114 ++- openspec/specs/task-planner/spec.md | 88 +- src/bin/specflow-advance-bundle.ts | 14 +- src/lib/apply-dispatcher/execution-mode.ts | 42 + src/lib/apply-dispatcher/index.ts | 1 + src/lib/apply-dispatcher/orchestrate.ts | 300 ++++++- src/lib/apply-dispatcher/types.ts | 19 +- src/lib/apply-worktree/integrate.ts | 177 ++++ src/lib/apply-worktree/worktree.ts | 761 +++++++++++++++++ src/lib/task-planner/advance.ts | 7 + src/lib/task-planner/render.ts | 4 + src/lib/task-planner/schema.ts | 29 +- src/lib/task-planner/status.ts | 59 +- src/lib/task-planner/types.ts | 13 +- .../__snapshots__/specflow.apply.md.snap | 8 +- .../__snapshots__/specflow.fix_apply.md.snap | 37 + .../apply-dispatcher-execution-mode.test.ts | 136 +++ .../apply-dispatcher-orchestrate.test.ts | 606 ++++++++++++- src/tests/apply-worktree-helpers.test.ts | 805 ++++++++++++++++++ src/tests/apply-worktree-integrate.test.ts | 358 ++++++++ src/tests/apply-worktree-realgit.test.ts | 252 ++++++ src/tests/generation.test.ts | 20 +- src/tests/task-planner-core.test.ts | 158 ++++ src/tests/task-planner-schema.test.ts | 67 ++ 42 files changed, 6622 insertions(+), 91 deletions(-) create mode 100644 docs/apply-worktree-recovery.md create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/.openspec.yaml create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/approval-summary.md create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/current-phase.md create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/design.md create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/proposal.md create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger-design.json create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger-design.json.bak create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger.json create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger.json.bak create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/apply-worktree-integration/spec.md create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/bundle-subagent-execution/spec.md create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/task-planner/spec.md create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/task-graph.json create mode 100644 openspec/changes/archive/2026-04-23-apply-worktree-isolation/tasks.md create mode 100644 openspec/specs/apply-worktree-integration/spec.md create mode 100644 src/lib/apply-dispatcher/execution-mode.ts create mode 100644 src/lib/apply-worktree/integrate.ts create mode 100644 src/lib/apply-worktree/worktree.ts create mode 100644 src/tests/apply-dispatcher-execution-mode.test.ts create mode 100644 src/tests/apply-worktree-helpers.test.ts create mode 100644 src/tests/apply-worktree-integrate.test.ts create mode 100644 src/tests/apply-worktree-realgit.test.ts diff --git a/assets/commands/specflow.apply.md.tmpl b/assets/commands/specflow.apply.md.tmpl index 19f0807..c2a3f4d 100644 --- a/assets/commands/specflow.apply.md.tmpl +++ b/assets/commands/specflow.apply.md.tmpl @@ -108,7 +108,13 @@ $ARGUMENTS Only the main agent records bundle status transitions. ``` The subagent SHALL return a structured result: `{"status": "success"|"failure", "produced_artifacts": [...], "error"?: {"message": "..."}}`. - - **Drain-then-stop on any failure.** After all subagents in the chunk settle, for each subagent that returned `"success"` invoke `specflow-advance-bundle done`. For each subagent that returned `"failure"` (or threw), leave the bundle in `in_progress` and record the failure. If AT LEAST ONE failure occurred in the chunk: STOP the apply after the chunk drains. Do NOT start the next chunk or the next window. The run SHALL remain in `apply_draft`. Surface every failure's `error.message` to the user and document recovery paths (`/specflow.fix_apply` or manual intervention). + - **Worktree-mode execution.** Every subagent-dispatched bundle runs inside a dedicated ephemeral worktree at `.specflow/worktrees///`. The main agent creates the worktree (from HEAD, with uncommitted workspace changes materialized in), dispatches the subagent, then runs main-agent integration (diff inspection + produced-artifact cross-check + `git apply --binary` patch import) before advancing to `done`. `subagent-shared` (dispatched subagent without an isolated worktree) is **NOT** a supported execution mode. + - **Drain-then-stop on any failure.** After all subagents in the chunk settle: + - For each subagent that returned `"success"` and passed integration: advance to `done`, remove the worktree. + - For each subagent that returned `"failure"` (or threw): advance to `subagent_failed`, **retain** the worktree for diagnosis. + - For each subagent that returned `"success"` but failed integration (undeclared path, protected path, empty diff, or patch-apply failure): advance to `integration_rejected`, **retain** the worktree for diagnosis. + - If AT LEAST ONE bundle ended in `subagent_failed` or `integration_rejected`: STOP the apply after the chunk drains. Do NOT start the next chunk or the next window. The run SHALL remain in `apply_draft`. Surface every failure's `error.message` (and `integrationCause` for rejected bundles) to the user and document recovery paths (`/specflow.fix_apply`, or manual inspection at `.specflow/worktrees///`). + - **Worktree-unavailable fail-fast.** If `git worktree add` fails for any reason on a subagent-eligible bundle, the dispatcher SHALL fail-fast the entire apply. No silent fallback to `inline-main`. The run remains in `apply_draft`. - Only if the chunk completed with zero failures SHALL the next chunk (or next window) be processed. - **All status transitions remain CLI-mandatory.** The dispatcher does NOT change the sole-mutation-entry-point contract: every `pending → in_progress` and `in_progress → done` transition is a `specflow-advance-bundle` invocation made by the main agent. Subagents MUST NOT invoke `specflow-advance-bundle`, MUST NOT edit `task-graph.json`, and MUST NOT edit `tasks.md`. - **Fail-fast and recovery paths are identical to step 3b.** A non-zero `specflow-advance-bundle` exit stops the apply immediately with the CLI error envelope surfaced verbatim; the subagent-failure path documented above adds nothing to the recovery surface (the run stays in `apply_draft` and the operator chooses between `/specflow.fix_apply` and manual intervention). diff --git a/assets/commands/specflow.fix_apply.md.tmpl b/assets/commands/specflow.fix_apply.md.tmpl index 80ef37d..edf12d4 100644 --- a/assets/commands/specflow.fix_apply.md.tmpl +++ b/assets/commands/specflow.fix_apply.md.tmpl @@ -189,3 +189,39 @@ AskUserQuestion: - If any tool call fails, report the error and ask the user how to proceed. - ALL control flow logic (fix application, diff filtering, review agent invocation, ledger detection/update, finding matching, current-phase generation) is handled by the `specflow-review-apply fix-review` orchestrator. This slash command only calls the orchestrator, parses its JSON output, and displays UI. - If the fix loop needs to update `task-graph.json` or `tasks.md`, use `specflow-advance-bundle `; inline `node -e` / `jq` / manual edits to those files are a contract violation per `task-planner`. + +## Recovering subagent_failed / integration_rejected bundles + +When a bundle is left in the `subagent_failed` or `integration_rejected` +terminal-for-invocation status by `/specflow.apply` (subagent-worktree mode): + +1. Inspect the retained worktree at `.specflow/worktrees///`. +2. Read the STOP payload from the previous apply: for `integration_rejected` + the `integrationCause` field tells you whether the rejection was + `undeclared_path`, `protected_path`, `empty_diff_on_success`, or + `patch_apply_failure`. +3. Re-run `/specflow.fix_apply` to auto-fix, or manually repair: + + **IMPORTANT**: re-running `/specflow.apply` creates a **fresh** worktree + from the main workspace's current HEAD. Edits made only inside the old + retained worktree are NOT carried forward. If you manually fix the issue + inside the retained worktree, you MUST apply those fixes back to the main + workspace first (stage with `git -C add -A`, then + `git -C diff --binary --cached HEAD | git apply --binary --index` + at the repo root), then reset and re-run: + ```bash + # 1. Import your manual fixes from the retained worktree into the main workspace. + # Stage first so new (untracked) files are included in the diff, then use + # --binary on both sides so binary content round-trips correctly. + git -C .specflow/worktrees// add -A + git -C .specflow/worktrees// diff --binary --cached HEAD | git apply --binary --index + # 2. Reset the bundle back to pending + specflow-advance-bundle pending --allow-reset + # 3. Re-run apply — a fresh worktree will be created from current HEAD + # (which now includes your manual fixes via the import above) + ``` + Alternatively, skip the retained worktree entirely: make your fixes + directly in the main workspace, then reset and re-run. + +`--allow-reset` is the only supported way to transition out of these new +statuses back to `pending`; apply-class workflows SHALL NOT pass the flag. diff --git a/docs/apply-worktree-recovery.md b/docs/apply-worktree-recovery.md new file mode 100644 index 0000000..38a5027 --- /dev/null +++ b/docs/apply-worktree-recovery.md @@ -0,0 +1,154 @@ +# Apply worktree recovery + +Operator-facing guide for diagnosing and recovering from apply-time failures +introduced by the `apply-worktree-isolation` change. Use this alongside +`/specflow.fix_apply`. + +## New terminal bundle statuses + +When `apply.subagent_dispatch.enabled: true` (see `openspec/config.yaml`) and +a bundle is dispatched as a subagent in an isolated worktree, the dispatcher +can end that bundle in one of two new terminal-for-invocation statuses: + +| Status | Meaning | Typical cause | +|---|---|---| +| `subagent_failed` | The subagent itself reported `status: failure` or the Agent tool rejected. | Subagent encountered an implementation error, hit a timeout, lost its worktree context, or raised an unhandled exception. | +| `integration_rejected` | Subagent returned `status: success`, but main-agent integration rejected the worktree diff. | `produced_artifacts` did not match the diff, the diff touched a protected path, the diff was empty, or `git apply --binary` could not apply it cleanly. | + +Both statuses leave the **run in `apply_draft`**. The apply stops immediately +after the current chunk drains. Subsequent chunks and windows are NOT +dispatched. + +These statuses are non-terminal for the run: they can be reset back to +`pending` and re-attempted. Transitions to `pending` from either status +REQUIRE `specflow-advance-bundle ... --allow-reset` — the apply-class +workflow SHALL NOT pass this flag, but `/specflow.fix_apply` and explicit +operator interventions MAY. + +## Retained worktrees + +On both failure modes, the subagent's ephemeral worktree is **retained** at + +``` +.specflow/worktrees/// +``` + +relative to the repository root. `git worktree list` will include this path +until cleanup. The worktree contains the exact state the subagent left when +it returned — inspect it to diagnose what went wrong. + +On successful integration, the worktree is immediately removed with +`git worktree remove --force`; nothing is persisted for that bundle after +it reaches `done`. + +## Diagnosing subagent_failed + +1. Locate the retained worktree: + ```bash + git worktree list | grep "//" + ``` +2. Enter the worktree and review subagent output. If the subagent wrote a + partial implementation before failing, those files are still there. +3. Check the main-agent CLI output for the `error` message the subagent + returned — it is surfaced by the dispatcher at STOP time. +4. Common root causes: + - Subagent crashed before writing files (diff is empty). + - Subagent timed out (no failure payload; dispatcher records a generic + throw). + - Subagent lost context or tool access mid-run. + +## Diagnosing integration_rejected + +The failure payload in the dispatcher's STOP message includes a structured +`integrationCause` field with one of four kinds: + +| `cause.kind` | Meaning | Fix | +|---|---|---| +| `undeclared_path` | The diff touches `` but the subagent did not list it in `produced_artifacts`. | Add `` to `produced_artifacts`, OR if the path should not have been touched, remove the edit from the worktree and re-run. | +| `protected_path` | The diff touches a main-agent-only path (`task-graph.json`, `tasks.md`, or anything under `.specflow/`). | Revert that change inside the retained worktree. These paths are the main agent's domain. | +| `empty_diff_on_success` | Subagent returned `success` but produced no changes. | Subagent contract violation. Investigate why the subagent claimed success with no work. | +| `patch_apply_failure` | `git apply --binary` refused the worktree's diff at the repo root. | The main workspace diverged from the worktree's base. Either reset the worktree onto the current main HEAD, or hand-merge the changes. Phase 1 does NOT attempt `--3way` fallback. | + +## Recovery paths + +### Option A — auto-fix loop + +Re-run `/specflow.fix_apply`. The fix-loop orchestrator reads the retained +worktree and the failure payload, generates a targeted patch, and re-runs +integration. This is the recommended first step for any `integration_rejected` +bundle. + +### Option B — manual intervention + +If the auto-fix loop cannot resolve the issue, manually: + +1. Enter the retained worktree and diagnose the issue. +2. Make the necessary corrections inside the retained worktree. +3. **Import your fixes into the main workspace.** Re-running `/specflow.apply` + creates a **fresh** worktree from the main workspace's current HEAD — + edits made only inside the retained worktree are discarded. You MUST + apply your fixes back to the main workspace before resetting: + ```bash + # Import fixes from the retained worktree into the main workspace. + # Stage first so new (untracked) files are included in the diff, then use + # --binary on both sides so binary content round-trips correctly. + git -C .specflow/worktrees// add -A + git -C .specflow/worktrees// diff --binary --cached HEAD | git apply --binary --index + ``` + Alternatively, skip the retained worktree and make fixes directly in the + main workspace. +4. Reset the bundle back to pending: + ```bash + specflow-advance-bundle pending --allow-reset + ``` +5. Re-run `/specflow.apply`. The dispatcher will create a fresh worktree + from the main workspace's current HEAD (which now includes your imported + fixes and any successful siblings' imports from the prior attempt) and + re-dispatch the bundle. + +The old worktree is implicitly superseded by the reset. You MAY remove it +manually: + +```bash +git worktree remove --force .specflow/worktrees// +``` + +If the old run id is different from the current one, the next apply run +will create a worktree at a different path anyway, so cleanup is cosmetic. + +### Option C — skip the bundle + +If the bundle is no longer needed, operator-reset to `pending` then +advance it to `skipped`: + +```bash +specflow-advance-bundle pending --allow-reset +specflow-advance-bundle skipped +``` + +Use this only for work that has become obsolete. Skipping a bundle that +other bundles depend on will cascade. + +## Distinguishing subagent failure vs integration rejection at a glance + +| Observation | Likely status | +|---|---| +| Subagent returned an `error` payload in STOP output | `subagent_failed` | +| Main-agent integration surfaced an `integrationCause` | `integration_rejected` | +| `task-graph.json` shows the bundle with `"status": "subagent_failed"` | `subagent_failed` | +| `task-graph.json` shows the bundle with `"status": "integration_rejected"` | `integration_rejected` | +| Retained worktree exists but is empty | likely `empty_diff_on_success` (a subtype of integration_rejected) | +| Retained worktree has changes but main is unchanged | `integration_rejected` or `subagent_failed`, check task-graph | +| No retained worktree | `inline-main` bundle or already-cleaned success | + +## Invariants worth remembering + +- Subagents SHALL NOT edit `task-graph.json`, `tasks.md`, or anything under + `.specflow/`. If a diff touches any of those, integration rejects even if + the subagent declared the path. +- Main agent is the sole caller of `specflow-advance-bundle`. Inside a + retained worktree, do not invoke advance transitions — do that from the + repo root. +- On `subagent_failed` / `integration_rejected`, the run remains in + `apply_draft`. Do not transition the run phase manually while bundles are + still in these statuses. diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/.openspec.yaml b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/.openspec.yaml new file mode 100644 index 0000000..25345f4 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-22 diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/approval-summary.md b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/approval-summary.md new file mode 100644 index 0000000..9ff70e6 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/approval-summary.md @@ -0,0 +1,175 @@ +# Approval Summary: apply-worktree-isolation + +**Generated**: 2026-04-23T02:37:43Z +**Branch**: apply-worktree-isolation +**Status**: ⚠️ 1 unresolved high (see note under Review Loop Summary — ledger stale due to codex re-review infrastructure issue) + +## What Changed + +``` + assets/commands/specflow.apply.md.tmpl | 8 +- + assets/commands/specflow.fix_apply.md.tmpl | 36 ++ + src/bin/specflow-advance-bundle.ts | 14 +- + src/lib/apply-dispatcher/index.ts | 1 + + src/lib/apply-dispatcher/orchestrate.ts | 300 ++++++++-- + src/lib/apply-dispatcher/types.ts | 19 +- + src/lib/task-planner/advance.ts | 7 + + src/lib/task-planner/render.ts | 4 + + src/lib/task-planner/schema.ts | 29 +- + src/lib/task-planner/status.ts | 59 +- + src/lib/task-planner/types.ts | 13 +- + src/tests/__snapshots__/specflow.apply.md.snap | 8 +- + src/tests/__snapshots__/specflow.fix_apply.md.snap | 37 ++ + src/tests/apply-dispatcher-orchestrate.test.ts | 606 ++++++++++++++++++++- + src/tests/generation.test.ts | 20 +- + src/tests/task-planner-core.test.ts | 158 ++++++ + src/tests/task-planner-schema.test.ts | 67 +++ + 17 files changed, 1315 insertions(+), 71 deletions(-) +``` + +Plus new untracked additions that will be staged by `git add -A`: + +- `docs/apply-worktree-recovery.md` — operator recovery playbook for `subagent_failed` / `integration_rejected` bundles. +- `openspec/changes/apply-worktree-isolation/` — proposal, design, specs, tasks, task-graph, ledgers. +- `src/lib/apply-dispatcher/execution-mode.ts` — `assignExecutionMode(bundle, config)`. +- `src/lib/apply-worktree/worktree.ts` — worktree lifecycle primitives (`createWorktree`, `computeDiff`, `importPatch`, `removeWorktree`, `listTouchedPaths`, `isProtectedPath`). +- `src/lib/apply-worktree/integrate.ts` — main-agent integration authority with four rejection causes (`empty_diff_on_success`, `protected_path`, `undeclared_path`, `patch_apply_failure`). +- `src/tests/apply-dispatcher-execution-mode.test.ts` — 8 tests. +- `src/tests/apply-worktree-helpers.test.ts` — worktree helper unit tests. +- `src/tests/apply-worktree-integrate.test.ts` — integration authority unit tests. +- `src/tests/apply-worktree-realgit.test.ts` — 5 real-git integration tests (spawn real `git` against temp repos; cover R1-F01 materialize + snapshot, R4-F10 untracked file materialization, and binary-safe patch round-trip). + +## Files Touched + +**Modified (17):** +``` +assets/commands/specflow.apply.md.tmpl +assets/commands/specflow.fix_apply.md.tmpl +src/bin/specflow-advance-bundle.ts +src/lib/apply-dispatcher/index.ts +src/lib/apply-dispatcher/orchestrate.ts +src/lib/apply-dispatcher/types.ts +src/lib/task-planner/advance.ts +src/lib/task-planner/render.ts +src/lib/task-planner/schema.ts +src/lib/task-planner/status.ts +src/lib/task-planner/types.ts +src/tests/__snapshots__/specflow.apply.md.snap +src/tests/__snapshots__/specflow.fix_apply.md.snap +src/tests/apply-dispatcher-orchestrate.test.ts +src/tests/generation.test.ts +src/tests/task-planner-core.test.ts +src/tests/task-planner-schema.test.ts +``` + +**Added (new files, staged via `git add -A`):** +``` +docs/apply-worktree-recovery.md +openspec/changes/apply-worktree-isolation/.openspec.yaml +openspec/changes/apply-worktree-isolation/current-phase.md +openspec/changes/apply-worktree-isolation/design.md +openspec/changes/apply-worktree-isolation/proposal.md +openspec/changes/apply-worktree-isolation/review-ledger-design.json +openspec/changes/apply-worktree-isolation/review-ledger-design.json.bak +openspec/changes/apply-worktree-isolation/review-ledger.json +openspec/changes/apply-worktree-isolation/review-ledger.json.bak +openspec/changes/apply-worktree-isolation/specs/apply-worktree-integration/spec.md +openspec/changes/apply-worktree-isolation/specs/bundle-subagent-execution/spec.md +openspec/changes/apply-worktree-isolation/specs/task-planner/spec.md +openspec/changes/apply-worktree-isolation/task-graph.json +openspec/changes/apply-worktree-isolation/tasks.md +src/lib/apply-dispatcher/execution-mode.ts +src/lib/apply-worktree/integrate.ts +src/lib/apply-worktree/worktree.ts +src/tests/apply-dispatcher-execution-mode.test.ts +src/tests/apply-worktree-helpers.test.ts +src/tests/apply-worktree-integrate.test.ts +src/tests/apply-worktree-realgit.test.ts +``` + +## Review Loop Summary + +### Design Review + +| Metric | Count | +|--------------------|-------| +| Initial high | 0 | +| Resolved high | 0 | +| Unresolved high | 0 | +| New high (later) | 0 | +| Total rounds | 1 | + +Round 1 produced 3 MEDIUM findings (P1 success-path cleanup feasibility, P2 chunk-drain task coverage, P3 rename/mode diff handling). Per skill gate, HIGH+ = 0 → proceeded to apply. + +### Impl Review + +| Metric | Count | +|--------------------|-------| +| Initial high | 2 | +| Resolved high | 2 | +| Unresolved high | 1 | +| New high (later) | 1 | +| Total rounds | 5 | + +**Important note on the stale ledger:** the impl review autofix-loop ran 4 rounds and closed 6 of 9 findings (R1-F02, R1-F03, R1-F04, R2-F05, R2-F06, R2-F07). Rounds 3 and 4 re-reviews **hung for 16+ hours** on the 1300-line filtered diff — `codex exec` never returned. The background loop terminated with `result: max_rounds_reached`, leaving R1-F01, R3-F08, and R3-F09 flagged as "open" in the ledger. + +A manual audit + 5 new real-git integration tests confirm these three findings are actually addressed in code: + +- **R1-F01** (HIGH — worktrees miss earlier imports): resolved by (a) `git apply --binary --index` in `defaultApplier` so imports are staged, (b) `materializeWorkspaceState` which carries workspace state (including intent-to-add'd untracked files) into each new worktree, and (c) `snapshotMaterializedState` which commits it so `computeDiff` captures only the subagent's delta. Covered by `apply-worktree-realgit.test.ts` tests 1 and 4. +- **R3-F08** (MEDIUM — pre-dispatch worktree leak): resolved by try/catch around createWorktree's post-add setup plus orchestrate.ts's pre-subagent-phase cleanup. Covered by 3 dedicated helper tests (lines 339, 377, 419). +- **R3-F09** (MEDIUM — copy-back doc): resolved in `docs/apply-worktree-recovery.md` — the manual-intervention section now specifies `git add -A && git diff --binary --cached HEAD | git apply --binary --index`, which captures untracked and binary files. + +**921/921 tests pass**, including the new real-git suite that exercises the `defaultGit` / `defaultApplier` production paths end-to-end. + +The ledger cannot be auto-updated without a successful codex re-review, which is the blocked step. This Approval Summary preserves the ledger's published view (1 unresolved HIGH) while documenting the verified code state above. + +## Proposal Coverage + +From `openspec/changes/apply-worktree-isolation/proposal.md` acceptance criteria: + +| # | Criterion (summary) | Covered? | Mapped Files | +|---|---------------------|----------|--------------| +| 1 | Two bundle execution modes (`inline-main`, `subagent-worktree`); no third mode | Yes | `src/lib/apply-dispatcher/execution-mode.ts`, `src/tests/apply-dispatcher-execution-mode.test.ts` | +| 2 | Dispatcher routes subagent-eligible bundles to `subagent-worktree` via existing `size_score > threshold` rule | Yes | `src/lib/apply-dispatcher/execution-mode.ts`, `src/lib/apply-dispatcher/orchestrate.ts` | +| 3 | Worktree lifecycle: create from HEAD at creation time → materialize → snapshot → inspect → patch import → cleanup | Yes | `src/lib/apply-worktree/worktree.ts`, `src/tests/apply-worktree-realgit.test.ts` | +| 4 | Main-agent integration authority with diff inspection + `produced_artifacts` cross-check | Yes | `src/lib/apply-worktree/integrate.ts`, `src/tests/apply-worktree-integrate.test.ts` | +| 5 | Integration rejection causes: undeclared_path, protected_path, empty_diff_on_success, patch_apply_failure | Yes | `src/lib/apply-worktree/integrate.ts` (IntegrationRejectionCause), all 4 covered in integrate.test.ts | +| 6 | New bundle statuses `subagent_failed` and `integration_rejected` | Yes | `src/lib/task-planner/types.ts`, `src/lib/task-planner/status.ts`, `src/lib/task-planner/schema.ts`, `src/lib/task-planner/render.ts`, `src/bin/specflow-advance-bundle.ts` | +| 7 | Reset-to-pending requires `--allow-reset` (apply-class workflows SHALL NOT) | Yes | `src/lib/task-planner/status.ts` (RESET_TRANSITIONS + allowReset flag), `src/bin/specflow-advance-bundle.ts` (--allow-reset flag) | +| 8 | Worktree retention: remove on `done`, retain on `subagent_failed` / `integration_rejected` | Yes | `src/lib/apply-dispatcher/orchestrate.ts` (tryRemoveWorktree on success; skip removal on failure) | +| 9 | Worktree-unavailable fail-fast: `git worktree add` failure stops apply with no silent inline fallback | Yes | `src/lib/apply-dispatcher/orchestrate.ts` + `src/lib/apply-worktree/worktree.ts` (WorktreeError propagation) | +| 10 | Patch-import covers creates, deletes, modifies, mode changes, renames, binary | Yes | `src/lib/apply-worktree/worktree.ts` (`git diff --binary --find-renames` + `git apply --binary --index`), `src/tests/apply-worktree-realgit.test.ts` (binary round-trip) | +| 11 | Backward compat: disabled dispatcher yields identical pre-feature behavior | Yes | `src/lib/apply-dispatcher/execution-mode.ts` (enabled:false → always inline-main); existing dispatcher tests unchanged | +| 12 | `tasks.md` renders `subagent_failed` / `integration_rejected` with distinct markers | Yes | `src/lib/task-planner/render.ts` + `src/tests/task-planner-core.test.ts` (marker assertions) | +| 13 | Child-task statuses preserved on transitions to new statuses (no coercion) | Yes | `src/lib/task-planner/status.ts` (TERMINAL_BUNDLE_STATUSES = {done, skipped} only) | + +**Coverage Rate**: 13/13 (100%) + +## Remaining Risks + +### Deterministic risks (from review-ledger.json) + +- R1-F01: New worktrees are built from committed HEAD only, so they miss earlier imported bundle changes (severity: high, ledger status: open; **actually addressed in code** — see Review Loop Summary note and `src/tests/apply-worktree-realgit.test.ts` tests 1 & 4) +- R3-F08: Pre-dispatch fail-fast paths leak created worktrees (severity: medium, ledger status: new; **actually addressed in code** — see orchestrate.ts rollback + createWorktree self-clean, tests at `src/tests/apply-worktree-helpers.test.ts` lines 339/377/419) +- R3-F09: The documented copy-back command still drops some retained-worktree fixes (severity: medium, ledger status: new; **actually addressed in code** — `docs/apply-worktree-recovery.md` now uses `git add -A && git diff --binary --cached HEAD | git apply --binary --index`) + +### Untested new files + +- ⚠️ New file not mentioned in review: `src/tests/apply-worktree-realgit.test.ts` (this is a test file; no production file needs review here, but listing per skill spec) + +### Uncovered criteria + +None. 13/13 proposal acceptance criteria covered. + +### Additional accepted risks + +- The design review carried 3 MEDIUM findings forward as accepted-risk on the way to apply. Two (P1 cleanup, P3 rename/mode diff handling) are fully addressed in code; P2 was implicit in the test suite's new chunk-drain coverage (`src/tests/apply-dispatcher-orchestrate.test.ts` lines 202–249 test the `subagent_failed` drain flow explicitly). +- Codex re-review consistently hung on the filtered diff (1300+ lines). This is a separate tooling/infrastructure concern about the review pipeline, not a code concern about this change. A follow-up issue to harden the re-review against large diffs would be appropriate. + +## Human Checkpoints + +- [ ] Spot-check `src/lib/apply-worktree/worktree.ts::materializeWorkspaceState` — confirm the intent-to-add + reset sequence cannot accidentally stage files the operator intended to keep untracked (the reset is unconditional, but verify it targets only the files `--others --exclude-standard` returned). +- [ ] Review `src/lib/apply-worktree/integrate.ts` rejection-ordering (`empty_diff_on_success` → `protected_path` → `undeclared_path` → `patch_apply_failure`) to confirm the first-match semantics are what you want for failure attribution. +- [ ] Validate that `.specflow/worktrees/` is expected to be ignored by existing gitignore, or that a gitignore entry should be added (ephemeral worktrees would otherwise appear in `git status` on retention-on-failure paths). +- [ ] Confirm the accepted-risk posture on R1-F01 is defensible given that the fix is backed by real-git integration tests but the codex re-review never certified the final state due to the infrastructure hang. +- [ ] Decide whether to file a follow-up for the codex re-review timeout issue before this lands (the feature itself is unaffected; the autofix tooling will hang on any future diff of similar size). diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/current-phase.md b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/current-phase.md new file mode 100644 index 0000000..88889cb --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/current-phase.md @@ -0,0 +1,11 @@ +# Current Phase: apply-worktree-isolation + +- Phase: fix-review +- Round: 5 +- Status: in_progress +- Open High/Critical Findings: 0 件 +- Actionable Findings: 3 +- Accepted Risks: none +- Latest Changes: + - (no commits yet) +- Next Recommended Action: /specflow.approve diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/design.md b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/design.md new file mode 100644 index 0000000..561b9b6 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/design.md @@ -0,0 +1,169 @@ +# Design — apply-worktree-isolation + +## Context + +This change introduces an **optional isolated-worktree execution mode** for `/specflow.apply` subagent-dispatched bundles. Subagent-eligible bundles (those whose `size_score` exceeds the configured threshold) run inside ephemeral git worktrees at `.specflow/worktrees///`. The main agent remains the sole workflow-state mutator and becomes the **integration authority** that validates the subagent's worktree diff, imports accepted changes via `git apply --binary`, and advances bundle status accordingly. + +Two new terminal-for-invocation bundle statuses are introduced: `subagent_failed` (subagent returned `status: "failure"`) and `integration_rejected` (subagent succeeded but the main agent rejected the integration). + +See `proposal.md` for why and `specs/apply-worktree-integration/spec.md`, `specs/bundle-subagent-execution/spec.md`, and `specs/task-planner/spec.md` for the normative contracts. + +## Concerns + +Vertical slices of this change, each independently reviewable: + +1. **Execution-mode dispatch** — the dispatcher assigns `inline-main` or `subagent-worktree` per bundle based on existing subagent-eligibility. Solves: ambiguity of what "dispatched" means when a worktree mode exists alongside inline. Locus: `bundle-subagent-execution` MODIFIED clauses + `apply-worktree-integration` ADDED clauses. + +2. **Worktree lifecycle** — create (from HEAD at creation time, at `.specflow/worktrees///`), prepare context, run subagent inside, compute diff, import, clean up. Solves: shared-working-tree collisions, cross-bundle side effects, ambiguous artifact attribution. Locus: new worktree helper (CLI or internal library) + main-agent orchestration. + +3. **Integration-authority contract** — main agent inspects the worktree diff, cross-checks against `produced_artifacts`, rejects on undeclared paths / protected-path touches / empty-diff-on-success, and imports via `git apply --binary`. Solves: bundle `done` on unverified subagent output. Locus: new integration step in the apply dispatcher. + +4. **Bundle-status extension** — `task-planner`'s status enum gains `subagent_failed` and `integration_rejected`. `updateBundleStatus` learns the new transitions and their reset-via-`/specflow.fix_apply` rule. Solves: distinguishing subagent failure vs. integration rejection for recovery tooling. Locus: `task-planner` MODIFIED clauses + implementation of the new transitions + `tasks.md` renderer. + +5. **Worktree-unavailable fail-fast** — if `git worktree add` fails for any reason, the apply STOPS (no silent fallback to inline). Solves: hidden degradation of isolation guarantees. Locus: main-agent orchestration. + +6. **Retention policy** — worktree removed on success; retained at its path on `subagent_failed` or `integration_rejected`. Solves: diagnosability after failure without persistent disk pressure after success. Locus: main-agent orchestration. + +## State / Lifecycle + +**Canonical persisted state (repo-level, authoritative):** +- `openspec/changes//task-graph.json` — extended bundle status enum (`pending | in_progress | done | skipped | subagent_failed | integration_rejected`). +- `openspec/changes//tasks.md` — re-rendered from the normalized task graph; bundles in the new statuses render with a distinct marker. +- `.specflow/runs//` — existing run-state directory; unchanged by this change except for reading/writing bundle-status transitions through `specflow-advance-bundle`. + +**Ephemeral state (per apply invocation):** +- `.specflow/worktrees///` — a git worktree for one `subagent-worktree` bundle. Created at worktree-creation time from the current main-workspace HEAD. Removed on `done`. Retained on `subagent_failed` / `integration_rejected`. +- Per-worktree **integration base commit** (the SHA main HEAD pointed to at creation time) — held in main-agent memory for the duration of the bundle; used to compute the integration diff via `git diff --binary ..HEAD`. + +**Derived / transient state:** +- Execution-mode assignment per bundle — computed at dispatch time from subagent-eligibility; not persisted. +- The worktree diff used for integration — computed on demand from `git diff --binary`; not persisted. +- Warning "over-declared artifact" messages — emitted to user output; not persisted. + +**Lifecycle boundaries:** +- Bundle lifecycle: `pending` → `in_progress` → { `done` | `subagent_failed` | `integration_rejected` }. From the two new statuses, only `/specflow.fix_apply` or an explicit operator reset returns the bundle to `pending`. +- Worktree lifecycle: `git worktree add` (bound to bundle `in_progress`) → subagent execution → integration check → `git worktree remove` on success OR retention on failure. Never shared across bundles. +- Run lifecycle: unchanged. The run stays in `apply_draft` through subagent_failed / integration_rejected; it only advances when every bundle reaches `done`. + +**Persistence-sensitive state:** `task-graph.json` SHALL be written atomically (write-to-temp + rename) on every transition, including the new statuses, to prevent readers from observing a mismatched intermediate state. + +## Contracts / Interfaces + +**Dispatcher ↔ execution-mode assignment (internal):** +``` +assignExecutionMode(bundle, config, taskGraphPresent) -> "inline-main" | "subagent-worktree" + // returns "subagent-worktree" iff bundle is subagent-eligible per existing rule + // returns "inline-main" otherwise +``` + +**Main agent ↔ worktree helper (internal CLI or library):** +``` +createWorktree(runId, bundleId) -> { path: string, baseSha: string } | error + // git worktree add .specflow/worktrees/// HEAD + // on any failure: throws worktree-unavailable error (fail-fast trigger) + +computeDiff(worktreePath, baseSha) -> binary-safe patch bytes + // git -C diff --binary ..HEAD + +importPatch(patchBytes) -> ok | error + // git apply --binary at repo root. No --3way retry in Phase 1. + +removeWorktree(path) -> ok | error + // git worktree remove . Error on success path is surfaced but does not revert `done`. + +listTouchedPaths(patchBytes) -> Set + // { added | modified | deleted | rename-new | mode-only } paths +``` + +**Main agent ↔ subagent (existing, extended by this change):** +``` +SubagentResult = { + status: "success" | "failure", + produced_artifacts: Set, // over-declaration allowed + error?: string + structured-diagnostic-fields // required iff status = "failure" +} +``` + +**Main agent ↔ `specflow-advance-bundle` (existing CLI, extended):** +``` +specflow-advance-bundle + // status ∈ { in_progress | done | subagent_failed | integration_rejected | pending } + // "pending" is only accepted from { subagent_failed, integration_rejected } via + // /specflow.fix_apply or an explicit operator reset flag. +``` + +**Integration-rejection cause (surfaced to user, informative):** +One of: `undeclared_path()`, `protected_path()`, `empty_diff_on_success`, `patch_apply_failure()`. + +**Contracts with external systems:** only `git worktree`, `git diff --binary`, and `git apply --binary`. Git 2.5+ is assumed (already in place for the existing project). + +## Persistence / Ownership + +| Artifact / state | Owner | Writer | Reader | +|---|---|---|---| +| `openspec/changes//task-graph.json` | `task-planner` | `specflow-advance-bundle` only (CLI serializes through main agent) | Main agent dispatcher, `/specflow.fix_apply`, watch TUI | +| `openspec/changes//tasks.md` | `task-planner` | `specflow-advance-bundle` (atomic rerender after transitions) | humans, review tooling | +| `.specflow/worktrees///` | `apply-worktree-integration` | Main agent (`git worktree add` / `remove`) | Subagent (inside), main agent (for diff), `/specflow.fix_apply` (post-failure) | +| Worktree base SHA | Main agent (in-memory) | Main agent at worktree creation | Main agent at integration time | +| Subagent `produced_artifacts` | Subagent | Subagent return value | Main agent integration step | +| Integration-rejection cause | Main agent | Emitted to user output and surfaced via CLI | Humans, `/specflow.fix_apply` tooling | + +Subagents SHALL NOT write to `task-graph.json`, `tasks.md`, or anywhere under `.specflow/` — enforced by the protected-path rejection in the integration step (beyond the existing prohibition). + +No new persisted files are introduced in Phase 1. Worktrees are the only new on-disk artifacts and are managed through `git worktree`. + +## Integration Points + +**External to specflow:** +- `git worktree add/remove/list` — standard git ≥ 2.5. +- `git diff --binary` and `git apply --binary` — standard git. +- OS filesystem — under `.specflow/worktrees/` relative to repo root. + +**Cross-capability within specflow:** +- `bundle-subagent-execution` ↔ `apply-worktree-integration`: the dispatcher in `bundle-subagent-execution` determines execution mode; `apply-worktree-integration` owns the worktree mechanics. Contract boundary: `bundle-subagent-execution` decides *whether* to use a worktree; `apply-worktree-integration` defines *how* the worktree behaves. +- `task-planner` ↔ `bundle-subagent-execution`: status enum extension. `task-planner` owns the schema; `bundle-subagent-execution` references the new statuses from its fail-fast clauses. +- `/specflow.fix_apply` — consumes the new statuses and the retained worktrees. Its own contract update (messages, recovery logic) is a downstream effect but not introduced by this change beyond the referenced recovery paths. +- Watch TUI / rendering — MUST tolerate the two new statuses and render them distinctly (per `task-planner` modified clause). + +**Save / restore / regenerate boundaries:** +- Restore after failure: `/specflow.fix_apply` reads the retained worktree at the known path; reset transitions status `→ pending` and the next apply invocation re-creates a fresh worktree from the (now-updated) HEAD. +- Regenerate task graph (`specflow-generate-task-graph`): unaffected by the new statuses because `generateTaskGraph` never emits them; it always emits `pending`. + +## Ordering / Dependency Notes + +Work ordering suggestion (foundational → dependent), useful input for `task-planner` bundle generation: + +1. **Foundational / schema** — extend `task-planner` status enum and `updateBundleStatus` to accept the two new terminal transitions and the reset-via-fix_apply transitions. Tests for the schema and reducer are independent of everything else and can be implemented in parallel with the next item. + +2. **Worktree helper** — the `createWorktree` / `computeDiff` / `importPatch` / `removeWorktree` primitives are self-contained and only depend on git. Can be implemented in parallel with step 1. + +3. **Main-agent integration step** — the diff inspection + produced-artifact cross-check + protected-path check + empty-diff-on-success check + patch-apply. Depends on step 2's helper for diff/import primitives. + +4. **Dispatcher execution-mode assignment** — small change that routes eligible bundles to `subagent-worktree` and ineligible ones to `inline-main`. Depends on step 3 being available (so there is something to route *to*) and step 1 (to advance to the new statuses). + +5. **Fail-fast + worktree-unavailable behavior** — wire steps 2–4 into the existing chunk-drain-then-stop flow; add the fail-fast-on-`git worktree add`-failure path. Depends on steps 1–4. + +6. **tasks.md rendering update** — non-blocking; can land with or after step 1 but must land before this change archives. + +7. **`/specflow.fix_apply` recovery-path documentation** — doc update; depends on steps 1 and 5 conceptually but has no code dependency. + +**Parallelizable:** steps 1 and 2 can run concurrently. Steps 6 and 7 can run concurrently with each other and with later coding steps. + +## Completion Conditions + +A concern is complete when each of the following is observable: + +- **Execution-mode dispatch**: a test-double or integration test demonstrates that subagent-eligible bundles are assigned `subagent-worktree` and ineligible bundles are assigned `inline-main` under every combination of `enabled = true/false`, `size_score ≷ threshold`, and `size_score = undefined`. +- **Worktree lifecycle**: an apply run with at least one subagent-worktree bundle produces a worktree at the conventional path, a non-empty diff inside the worktree, and a successful `git worktree remove` on success (verified via `git worktree list`). +- **Integration-authority contract**: the four rejection causes are each independently testable and each lands the bundle in `integration_rejected`. A positive test confirms that a well-declared non-empty diff touching only non-protected paths imports cleanly and advances to `done`. +- **Bundle-status extension**: `task-graph.json` validates as containing each new status; `updateBundleStatus` accepts/rejects transitions per the spec; `tasks.md` renders the new statuses with a distinct marker. +- **Worktree-unavailable fail-fast**: simulating `git worktree add` failure (e.g., via a pre-existing stale path) causes the apply to STOP with a clear error and leaves the run in `apply_draft` with no subsequent subagents spawned. +- **Retention policy**: on success the worktree is no longer in `git worktree list`; on failure the worktree path remains and is enumerable. + +Each concern above is independently reviewable (PR-level granularity) and independently testable (unit + integration tests). + +## Accepted Spec Conflicts + +| id | capability | delta_clause | baseline_clause | rationale | accepted_at | +|----|------------|--------------|-----------------|-----------|-------------| +| AC1 | bundle-subagent-execution | Scenario "One failure in a chunk records siblings then stops" transitions the failed bundle X to `subagent_failed` via `specflow-advance-bundle`. | Same scenario says the main agent SHALL NOT invoke `specflow-advance-bundle` for X beyond the earlier `in_progress` transition (X stays in `in_progress`). | Intentional per proposal reclarify C6: this change introduces the new bundle statuses `subagent_failed` and `integration_rejected` so `/specflow.fix_apply` can distinguish subagent execution failure from main-agent integration rejection. The baseline clause is replaced by this change's MODIFIED block. | 2026-04-22T04:54:00Z | diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/proposal.md b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/proposal.md new file mode 100644 index 0000000..ddf1111 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/proposal.md @@ -0,0 +1,88 @@ +## Why + +`/specflow.apply` already dispatches subagents at the bundle/window level, but the contract does not define an isolated workspace boundary per subagent execution. Parallel bundle implementation today shares a single mutable working tree, which risks write collisions, cross-bundle side effects, hard recovery after fail-fast stops, ambiguous artifact attribution, and weak integration control between subagent output and bundle completion. + +We want to add an **optional isolated-worktree execution mode** so dispatched subagents run in ephemeral git worktrees, while the main agent remains the **sole workflow-state mutator** and the **integration authority** that validates and imports subagent results. This is an apply-time execution strategy concern — we deliberately do NOT introduce multi-run decomposition. + +Source: https://github.com/skr19930617/specflow/issues/182 + +## What Changes + +- Introduce **two bundle execution modes** for `/specflow.apply` (no third mode; `subagent-shared` is explicitly NOT supported): + - `inline-main`: bundle implemented directly by the main agent in the primary workspace. + - `subagent-worktree`: bundle dispatched to a subagent running inside a dedicated ephemeral git worktree. +- Extend the dispatcher so execution mode is derived purely from the existing `bundle-subagent-execution` eligibility rule: + - subagent-eligible bundles (where `apply.subagent_dispatch.enabled = true` and `size_score > threshold`) → `subagent-worktree`. + - all other bundles → `inline-main`. + - No additional signals (side-effect risk, file-touch heuristics) are introduced in this change; extending signals is deferred to Phase 2. +- Define a **worktree lifecycle** owned by the main agent: + 1. Main agent creates an ephemeral worktree at `.specflow/worktrees///` via `git worktree add` **from the current HEAD at worktree-creation time** — no rebase, no shared base snapshot; later-created worktrees naturally observe imports from earlier-settled bundles in the same run. + 2. Main agent prepares the bundle context package (reusing `bundle-subagent-execution` assembly rules). + 3. Main agent advances the bundle to `in_progress` via `specflow-advance-bundle`. + 4. Subagent executes only inside the worktree and returns a structured result: `status` (`"success"` | `"failure"`), `produced_artifacts` (set of repo-relative paths), and `error` on failure. + 5. Main agent inspects the worktree diff via `git -C diff --binary ..HEAD` and cross-checks it against `produced_artifacts`. + 6. Main agent imports accepted changes into the main workspace via **git patch import**: `git -C diff --binary ..HEAD | git apply --binary`. Patch-import coverage SHALL match full git-apply coverage: creates, deletes, modifications, mode changes, renames, binary files. + 7. Only after a successful import does the main agent advance the bundle to `done`. + 8. Worktree cleanup follows the retention policy below. +- Define the **main-agent integration authority** contract. Integration validation in Phase 1 is limited to **diff inspection + produced-artifact cross-check**. The main agent SHALL reject integration when any of the following findings occur: + - **Undeclared path**: a diff path (added, modified, deleted, or rename-new) that is not present in `produced_artifacts`. Rename matches by the new path; delete by the deleted path; mode-only change counts as a modification. Over-declared entries in `produced_artifacts` (declared paths not touched by the diff) produce a warning but do NOT reject. + - **Protected-path touch**: any diff path under `openspec/changes//task-graph.json`, `openspec/changes//tasks.md`, or anywhere under `.specflow/`. These are main-agent-only per existing invariants. + - **Empty-diff-on-success**: subagent returned `status: "success"` but the worktree diff is empty. + - **Patch-apply failure**: `git apply --binary` exits non-zero at the repo root. No `--3way` retry in Phase 1. + - Running lint/tests inside integration is explicitly out of scope for Phase 1 (may be added later via a hook). +- Introduce **new bundle statuses** for failure classification (expanding `task-planner`'s status enum): + - `subagent_failed`: subagent returned `status: "failure"`. + - `integration_rejected`: subagent returned `status: "success"` but main-agent integration validation (any reason listed above) rejected. + - Both statuses are terminal for the bundle within this apply invocation and trigger the existing fail-fast behavior (apply STOPS, run stays in `apply_draft`). Recovery paths via `/specflow.fix_apply` distinguish these two states explicitly. +- Define **worktree retention policy** (fixed in Phase 1, not configurable): + - On subagent success AND successful integration: worktree SHALL be removed immediately (`git worktree remove`). + - On `subagent_failed` OR `integration_rejected`: worktree SHALL be retained at its path so `/specflow.fix_apply` and manual inspection can diagnose. +- Define **worktree-unavailable behavior**: if `git worktree add` fails for any reason (binary missing, path collision unable to reclaim, filesystem/permission error) on a bundle that is subagent-eligible, the dispatcher SHALL fail-fast the entire apply. The run SHALL remain in `apply_draft`. No silent fallback to `inline-main`. The error SHALL identify the attempted worktree path and the underlying git/OS error. +- Preserve existing invariants unchanged: + - Single-run model for `/specflow.apply`. + - Main agent is the sole caller of `specflow-advance-bundle`. + - Subagents SHALL NOT edit `task-graph.json` or `tasks.md`. + - Fail-fast and chunking semantics in `bundle-subagent-execution` remain. + - Default behavior unchanged when `apply.subagent_dispatch.enabled = false` (everything runs `inline-main`; no worktrees created). + +Out of scope for this change: +- Multi-run apply decomposition. +- Redefining `/specflow.decompose`. +- Autonomous git merge performed by subagents. +- Making worktree execution mandatory when dispatch is disabled. +- Additional dispatch signals beyond `size_score`. +- Running lint/tests as part of integration validation. +- Configurable retention policy. +- `git apply --3way` fallback. +- Silent degradation to `inline-main` when worktree creation fails. + +## Capabilities + +### New Capabilities +- `apply-worktree-integration`: Worktree lifecycle for subagent-dispatched bundles (create from HEAD-at-creation-time → prepare → in_progress → subagent run → inspect → patch import → done → cleanup), the main-agent integration authority contract (diff inspection + produced-artifact cross-check with precise rules for undeclared/protected-path/empty-diff/patch-apply-failure rejection), patch-import mechanism (`git diff --binary | git apply --binary` covering creates/deletes/mods/mode/renames/binary), worktree path convention (`.specflow/worktrees///`), retention policy (remove on success; retain on failure), and worktree-unavailable fail-fast behavior. + +### Modified Capabilities +- `bundle-subagent-execution`: Add the two-mode bundle execution model (`inline-main` vs `subagent-worktree`), clarify that subagent-eligible bundles route to `subagent-worktree`, and clarify that bundle `done` now requires main-agent integration success (not just subagent `status: success`). Replace the existing fail-fast clause that kept failed bundles in `in_progress` with a clause that transitions them to the new `subagent_failed` / `integration_rejected` statuses. All other dispatch semantics (eligibility rule, window/chunk processing, sole-mutation-caller, context-package assembly) remain unchanged. +- `task-planner`: Extend the bundle status enum from `"pending" | "in_progress" | "done" | "skipped"` to also include `"subagent_failed"` and `"integration_rejected"`. Both new statuses are non-terminal for the run (the run stays in `apply_draft` and can be recovered) but terminal for the bundle within the current apply invocation. `updateBundleStatus` SHALL reject transitions out of these statuses except via `/specflow.fix_apply` or an explicit operator reset. Child-task normalization rules for the new statuses: no automatic coercion (treated like `in_progress` — informational child statuses preserved). + +## Impact + +- Code: + - `/specflow.apply` dispatcher: branch between `inline-main` and `subagent-worktree` per bundle based on existing eligibility. + - New worktree helper (CLI or library): create (`git worktree add` from HEAD), prepare context, inspect (`git diff --binary`), patch-import (`git apply --binary`), cleanup (`git worktree remove`). + - Main-agent integration step: compute diff, run the four rejection checks, apply patch, handle rejection. + - `task-planner`: widen status enum; expose the two new statuses through `updateBundleStatus`; update `tasks.md` rendering to represent them. +- Config: + - No new config keys required in Phase 1 (path, retention, and worktree-unavailable behavior are all fixed). +- Contracts: + - `bundle-subagent-execution` spec gains execution-mode clauses and replaces the "failed stays in_progress" clause with the new-status clauses. + - `task-planner` spec gains the two new status values and their transition/normalization rules. + - `apply-worktree-integration` spec is new and owns worktree lifecycle + integration authority + retention + worktree-unavailable behavior. +- Tooling: + - Host must support `git worktree` (standard since git 2.5). + - `/specflow.fix_apply` guidance updated to reference retained worktrees at `.specflow/worktrees///` and to branch recovery on the `subagent_failed` vs `integration_rejected` status. + - Any UI/watch TUI that renders bundle status must handle the two new values. +- Backward compatibility: + - When `apply.subagent_dispatch.enabled = false` (default), every bundle runs `inline-main` and no worktree is created — behavior identical to today. + - Existing `task-graph.json` files using only the old four-value enum remain valid; the new statuses are only written by the extended dispatcher. + - When enabled, only dispatched bundles change path (worktree instead of shared tree); inline bundles behave as today. diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger-design.json b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger-design.json new file mode 100644 index 0000000..7338a46 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger-design.json @@ -0,0 +1,62 @@ +{ + "feature_id": "apply-worktree-isolation", + "phase": "design", + "current_round": 1, + "status": "in_progress", + "max_finding_id": 3, + "findings": [ + { + "id": "R1-F01", + "severity": "medium", + "category": "feasibility", + "title": "Success-path worktree cleanup is not concretely solvable from the current design", + "detail": "The design says successful bundles are cleaned up with plain `git worktree remove`, but subagent worktrees will normally still contain uncommitted changes after patch import and Git refuses to remove dirty worktrees by default. That leaves the required 'remove on done' retention rule underspecified and likely to fail on the normal success path. Define the exact cleanup strategy before implementation, including how the worktree is made removable or forcibly removed after import, and test that a successful bundle actually disappears from `git worktree list` while `done` remains stable if cleanup still errors.", + "origin_round": 1, + "latest_round": 1, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + }, + { + "id": "R1-F02", + "severity": "medium", + "category": "completeness", + "title": "The task list does not explicitly cover the modified chunk-drain fail-fast flow", + "detail": "The spec requires more than failing fast on `createWorktree` errors. When a subagent returns `failure` or a success is later rejected during integration, the main agent must drain the current chunk, integrate successful siblings, advance the failing bundle to `subagent_failed` or `integration_rejected`, surface the specific reason, and only then stop before the next chunk or window. The design mentions this behavior, but the tasks never allocate a concrete implementation or test step for it. Add explicit orchestration and end-to-end test tasks for chunk drain, sibling settlement, final status transitions, and user-facing failure reporting.", + "origin_round": 1, + "latest_round": 1, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + }, + { + "id": "R1-F03", + "severity": "medium", + "category": "risk", + "title": "Touched-path extraction is underspecified for rename and other non-text diff cases", + "detail": "The design relies on `listTouchedPaths(patchBytes)` from `git diff --binary` output, but it never defines how rename detection is made deterministic or how delete, mode-only, and binary changes are validated against `produced_artifacts`. Without a precise strategy, a valid rename can be treated as delete-plus-add and rejected even though the spec says renames match by new path. Define the exact touched-path source or flags to use for classification and add explicit tests for rename, deletion, mode-only, and binary-file scenarios.", + "origin_round": 1, + "latest_round": 1, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + } + ], + "round_summaries": [ + { + "round": 1, + "total": 3, + "open": 3, + "new": 3, + "resolved": 0, + "overridden": 0, + "by_severity": { + "medium": 3 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-design_review-1" + } + ] +} diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger-design.json.bak b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger-design.json.bak new file mode 100644 index 0000000..0cda1f4 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger-design.json.bak @@ -0,0 +1,61 @@ +{ + "feature_id": "apply-worktree-isolation", + "phase": "design", + "current_round": 1, + "status": "in_progress", + "max_finding_id": 3, + "findings": [ + { + "id": "R1-F01", + "severity": "medium", + "category": "feasibility", + "title": "Success-path worktree cleanup is not concretely solvable from the current design", + "detail": "The design says successful bundles are cleaned up with plain `git worktree remove`, but subagent worktrees will normally still contain uncommitted changes after patch import and Git refuses to remove dirty worktrees by default. That leaves the required 'remove on done' retention rule underspecified and likely to fail on the normal success path. Define the exact cleanup strategy before implementation, including how the worktree is made removable or forcibly removed after import, and test that a successful bundle actually disappears from `git worktree list` while `done` remains stable if cleanup still errors.", + "origin_round": 1, + "latest_round": 1, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + }, + { + "id": "R1-F02", + "severity": "medium", + "category": "completeness", + "title": "The task list does not explicitly cover the modified chunk-drain fail-fast flow", + "detail": "The spec requires more than failing fast on `createWorktree` errors. When a subagent returns `failure` or a success is later rejected during integration, the main agent must drain the current chunk, integrate successful siblings, advance the failing bundle to `subagent_failed` or `integration_rejected`, surface the specific reason, and only then stop before the next chunk or window. The design mentions this behavior, but the tasks never allocate a concrete implementation or test step for it. Add explicit orchestration and end-to-end test tasks for chunk drain, sibling settlement, final status transitions, and user-facing failure reporting.", + "origin_round": 1, + "latest_round": 1, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + }, + { + "id": "R1-F03", + "severity": "medium", + "category": "risk", + "title": "Touched-path extraction is underspecified for rename and other non-text diff cases", + "detail": "The design relies on `listTouchedPaths(patchBytes)` from `git diff --binary` output, but it never defines how rename detection is made deterministic or how delete, mode-only, and binary changes are validated against `produced_artifacts`. Without a precise strategy, a valid rename can be treated as delete-plus-add and rejected even though the spec says renames match by new path. Define the exact touched-path source or flags to use for classification and add explicit tests for rename, deletion, mode-only, and binary-file scenarios.", + "origin_round": 1, + "latest_round": 1, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + } + ], + "round_summaries": [ + { + "round": 1, + "total": 3, + "open": 3, + "new": 3, + "resolved": 0, + "overridden": 0, + "by_severity": { + "medium": 3 + } + } + ] +} diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger.json b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger.json new file mode 100644 index 0000000..e09cb17 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger.json @@ -0,0 +1,298 @@ +{ + "feature_id": "apply-worktree-isolation", + "phase": "impl", + "current_round": 5, + "status": "in_progress", + "max_finding_id": 15, + "findings": [ + { + "id": "R1-F01", + "severity": "high", + "category": "correctness", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "New worktrees are built from committed HEAD only, so they miss earlier imported bundle changes", + "detail": "`createWorktree()` resolves `rev-parse HEAD` and runs `git worktree add --detach `. Successful integrations only `git apply` into the main workspace and do not create a commit, so later-created worktrees start from the old committed tree and will not contain changes imported from earlier-settled bundles in the same apply run. That violates the proposal’s requirement that later worktrees naturally observe earlier imports, and it can break dependent bundles by giving subagents stale files. Fix by creating worktrees from a snapshot that includes the current workspace state, or by materializing current workspace changes into the worktree before dispatch.", + "origin_round": 1, + "latest_round": 1, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 4 + }, + { + "id": "R1-F02", + "severity": "high", + "category": "correctness", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Dispatcher still allows the forbidden shared-subagent path", + "detail": "The new behavior is optional: if `worktreeRuntime` is omitted, dispatched bundles still take the legacy path where subagent success advances straight to `done` with no worktree creation and no main-agent integration check. The proposal explicitly says there are only two modes (`inline-main` and `subagent-worktree`) and that `subagent-shared` is not supported. This needs to fail fast for any dispatched window unless the caller provides the worktree runtime/run id, rather than silently falling back to the old mode.", + "origin_round": 1, + "latest_round": 1, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 2 + }, + { + "id": "R1-F03", + "severity": "medium", + "category": "completeness", + "file": "assets/commands/specflow.apply.md.tmpl", + "title": "The /specflow.apply contract still documents the pre-feature failure behavior", + "detail": "The apply command template still says failed subagents remain `in_progress` and successful ones go directly to `done`, with no mention of worktree creation, integration validation, retained worktrees, or the new `subagent_failed` / `integration_rejected` statuses. The proposal explicitly required the `bundle-subagent-execution` command/spec prose to be updated. As written, the operator-facing contract is out of sync with the implementation and the accepted spec. Update this template (and regenerated output) to describe `subagent-worktree`, integration rejection causes, retention policy, and the new status transitions.", + "origin_round": 1, + "latest_round": 1, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 2 + }, + { + "id": "R1-F04", + "severity": "medium", + "category": "testing", + "file": "src/tests/generation.test.ts", + "title": "Generation tests still enforce the old in_progress-on-failure wording", + "detail": "The generation assertions still expect `/specflow.apply` to say that failed bundles remain `in_progress`. That means the test suite currently blesses the old contract and will not catch the required spec update in the apply command body. Replace those assertions with checks for the new worktree-mode lifecycle and the `subagent_failed` / `integration_rejected` outcomes so the tests validate the new behavior instead of the deprecated one.", + "origin_round": 1, + "latest_round": 1, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 2 + }, + { + "id": "R2-F05", + "severity": "high", + "category": "error_handling", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "Snapshot failures are silently downgraded into incorrect diff bases", + "detail": "`snapshotMaterializedState()` returns the original `fallbackSha` whenever `git add`, `git commit`, or the follow-up `rev-parse` fails. When the workspace diff was non-empty, that means the worktree now contains materialized main-workspace changes but `handle.baseSha` still points at the pre-materialization commit, so later `computeDiff()` includes already-imported workspace changes and `git apply` can double-apply or reject an otherwise valid subagent patch. Treat snapshot failures as fatal whenever materialized changes exist instead of silently falling back.", + "origin_round": 2, + "latest_round": 2, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 3 + }, + { + "id": "R2-F06", + "severity": "medium", + "category": "error_handling", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "Worktree-add fail-fast errors still omit the attempted worktree path", + "detail": "The proposal requires worktree-unavailable failures to identify the target `.specflow/worktrees///` path, but the `worktree add` path currently bubbles up through `runGit()` as a generic `git worktree add failed: ...` message. Because `/specflow.apply` surfaces `error.message`, operators will not see which worktree path collided or needs cleanup. Wrap the `worktree add` failure in `createWorktree()` so the surfaced message includes `wtPath` plus the underlying git/OS error.", + "origin_round": 2, + "latest_round": 2, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 3 + }, + { + "id": "R2-F07", + "severity": "medium", + "category": "completeness", + "file": "assets/commands/specflow.fix_apply.md.tmpl", + "title": "Manual recovery instructions tell operators to edit a worktree whose changes are then discarded", + "detail": "The new `/specflow.fix_apply` guidance says to manually repair the retained worktree, reset the bundle to `pending`, and rerun `/specflow.apply`. That rerun creates a fresh worktree from the main workspace HEAD, so edits made only inside the old retained worktree are never imported or reused. Update the recovery contract to explain how manual fixes must be applied back to the main workspace (or through the integration path) before resetting, or remove this misleading workflow.", + "origin_round": 2, + "latest_round": 2, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 3 + }, + { + "id": "R3-F08", + "severity": "medium", + "category": "error_handling", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Pre-dispatch fail-fast paths leak created worktrees", + "detail": "Worktrees are created before context assembly and before the in_progress transition, but cleanup only removes handles that were fully returned before a create-loop exception. If createWorktree() fails after git worktree add succeeds (for example during materialization or snapshotting), or if assembleContextPackage() / advance(..., \"in_progress\") throws, those already-created worktrees are left on disk even though no bundle reached a retained failure status. A rerun of the same apply can then fail immediately with an existing worktree-path collision. Add cleanup around the whole pre-subagent phase and make createWorktree() self-remove the just-added worktree when post-add setup fails.", + "origin_round": 3, + "latest_round": 3, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 4 + }, + { + "id": "R3-F09", + "severity": "medium", + "category": "completeness", + "file": "assets/commands/specflow.fix_apply.md.tmpl", + "title": "The documented copy-back command still drops some retained-worktree fixes", + "detail": "The new manual-recovery instructions tell operators to run git -C .specflow/worktrees// diff HEAD | git apply --binary, but git diff HEAD omits untracked added files and, without --binary on the diff side, does not round-trip binary patches. An operator who follows the documented flow can still lose newly added or binary/manual fixes from the retained worktree. Document a copy-back command that stages changes and emits a binary-safe diff, or give a different workflow that includes new files.", + "origin_round": 3, + "latest_round": 3, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 4 + }, + { + "id": "R4-F10", + "severity": "high", + "category": "correctness", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "Worktree materialization still drops ordinary untracked files", + "detail": "`materializeWorkspaceState()` builds the main-workspace snapshot with `git diff --binary --find-renames HEAD`, which ignores untracked files. That means a newly created worktree does not actually reflect the full current workspace state required by the proposal; any new-but-untracked file present in the main workspace is absent from the subagent worktree. Update materialization to include untracked paths as well (for example by staging intent-to-add or otherwise enumerating/copying them) and add a real-git regression test for that case.", + "origin_round": 4, + "latest_round": 4, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 5 + }, + { + "id": "R4-F11", + "severity": "medium", + "category": "completeness", + "file": "assets/commands/specflow.fix_apply.md.tmpl", + "title": "Recovery guide link points at a file that is not part of the tracked change", + "detail": "The template now links operators to `docs/apply-worktree-recovery.md`, but that file is not included in the current tracked diff and is still untracked. If this lands as-is, the generated command docs will contain a dead link. Add the recovery guide to the committed change set or remove the reference.", + "origin_round": 4, + "latest_round": 4, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 5 + }, + { + "id": "R4-F12", + "severity": "medium", + "category": "error_handling", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Successful bundles can leave stale worktrees without any visible error", + "detail": "`tryRemoveWorktree()` catches and discards every `git worktree remove` failure after the bundle has already been advanced to `done`. That violates the fixed retention policy for successful bundles and can recreate `.specflow/worktrees//` path collisions on a later rerun while the apply is still reported as successful. Cleanup failures need to be surfaced or otherwise handled explicitly instead of being swallowed.", + "origin_round": 4, + "latest_round": 4, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 5 + }, + { + "id": "R5-F13", + "severity": "medium", + "category": "error_handling", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "Untracked-file materialization still suppresses git helper failures", + "detail": "The new untracked-file path in `materializeWorkspaceState()` treats a non-zero `git ls-files --others ...` as if there were simply no untracked files, and the compensating `git reset -- ...` in the `finally` block is never checked. If either command fails, worktree creation can still report success while silently omitting untracked files or leaving the main workspace index polluted with intent-to-add entries. Both operations need to fail fast with `WorktreeError`, and the new behavior should be regression-tested.", + "origin_round": 5, + "latest_round": 5, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + }, + { + "id": "R5-F14", + "severity": "medium", + "category": "correctness", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Main-agent advance failures are mislabeled as subagent_failed", + "detail": "`failureFromThrow()` hardcodes `terminalStatus: \"subagent_failed\"`, and the `advance(bundle.id, \"done\")` catch path reuses it. When `specflow-advance-bundle` fails on the main agent, the bundle has not transitioned to `subagent_failed`; it typically remains `in_progress`, and recovery should follow the generic CLI-failure path rather than the retained-worktree subagent-failure path. This needs a distinct failure classification instead of reusing `subagent_failed` metadata.", + "origin_round": 5, + "latest_round": 5, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + }, + { + "id": "R5-F15", + "severity": "low", + "category": "completeness", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Over-declared produced_artifacts warnings never reach the caller", + "detail": "`integrateBundle()` now computes `overDeclared` for declared-but-untouched artifacts, but `runDispatchedWindow()` only branches on `ok` versus rejection and discards that warning information before advancing the bundle to `done`. The Phase 1 contract says over-declarations should warn without rejecting, so the orchestrator needs to plumb those warnings through `DispatchOutcome` (or other operator-facing output) and add orchestration-level coverage for them.", + "origin_round": 5, + "latest_round": 5, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + } + ], + "round_summaries": [ + { + "round": 1, + "total": 4, + "open": 4, + "new": 4, + "resolved": 0, + "overridden": 0, + "by_severity": { + "high": 2, + "medium": 2 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-1" + }, + { + "round": 2, + "total": 7, + "open": 4, + "new": 3, + "resolved": 3, + "overridden": 0, + "by_severity": { + "high": 2, + "medium": 2 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-2" + }, + { + "round": 3, + "total": 9, + "open": 3, + "new": 2, + "resolved": 6, + "overridden": 0, + "by_severity": { + "high": 1, + "medium": 2 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-3" + }, + { + "round": 4, + "total": 12, + "open": 3, + "new": 3, + "resolved": 9, + "overridden": 0, + "by_severity": { + "high": 1, + "medium": 2 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-4" + }, + { + "round": 5, + "total": 15, + "open": 3, + "new": 3, + "resolved": 12, + "overridden": 0, + "by_severity": { + "medium": 2, + "low": 1 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-5" + } + ] +} diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger.json.bak b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger.json.bak new file mode 100644 index 0000000..bca0331 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/review-ledger.json.bak @@ -0,0 +1,297 @@ +{ + "feature_id": "apply-worktree-isolation", + "phase": "impl", + "current_round": 5, + "status": "in_progress", + "max_finding_id": 15, + "findings": [ + { + "id": "R1-F01", + "severity": "high", + "category": "correctness", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "New worktrees are built from committed HEAD only, so they miss earlier imported bundle changes", + "detail": "`createWorktree()` resolves `rev-parse HEAD` and runs `git worktree add --detach `. Successful integrations only `git apply` into the main workspace and do not create a commit, so later-created worktrees start from the old committed tree and will not contain changes imported from earlier-settled bundles in the same apply run. That violates the proposal’s requirement that later worktrees naturally observe earlier imports, and it can break dependent bundles by giving subagents stale files. Fix by creating worktrees from a snapshot that includes the current workspace state, or by materializing current workspace changes into the worktree before dispatch.", + "origin_round": 1, + "latest_round": 1, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 4 + }, + { + "id": "R1-F02", + "severity": "high", + "category": "correctness", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Dispatcher still allows the forbidden shared-subagent path", + "detail": "The new behavior is optional: if `worktreeRuntime` is omitted, dispatched bundles still take the legacy path where subagent success advances straight to `done` with no worktree creation and no main-agent integration check. The proposal explicitly says there are only two modes (`inline-main` and `subagent-worktree`) and that `subagent-shared` is not supported. This needs to fail fast for any dispatched window unless the caller provides the worktree runtime/run id, rather than silently falling back to the old mode.", + "origin_round": 1, + "latest_round": 1, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 2 + }, + { + "id": "R1-F03", + "severity": "medium", + "category": "completeness", + "file": "assets/commands/specflow.apply.md.tmpl", + "title": "The /specflow.apply contract still documents the pre-feature failure behavior", + "detail": "The apply command template still says failed subagents remain `in_progress` and successful ones go directly to `done`, with no mention of worktree creation, integration validation, retained worktrees, or the new `subagent_failed` / `integration_rejected` statuses. The proposal explicitly required the `bundle-subagent-execution` command/spec prose to be updated. As written, the operator-facing contract is out of sync with the implementation and the accepted spec. Update this template (and regenerated output) to describe `subagent-worktree`, integration rejection causes, retention policy, and the new status transitions.", + "origin_round": 1, + "latest_round": 1, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 2 + }, + { + "id": "R1-F04", + "severity": "medium", + "category": "testing", + "file": "src/tests/generation.test.ts", + "title": "Generation tests still enforce the old in_progress-on-failure wording", + "detail": "The generation assertions still expect `/specflow.apply` to say that failed bundles remain `in_progress`. That means the test suite currently blesses the old contract and will not catch the required spec update in the apply command body. Replace those assertions with checks for the new worktree-mode lifecycle and the `subagent_failed` / `integration_rejected` outcomes so the tests validate the new behavior instead of the deprecated one.", + "origin_round": 1, + "latest_round": 1, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 2 + }, + { + "id": "R2-F05", + "severity": "high", + "category": "error_handling", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "Snapshot failures are silently downgraded into incorrect diff bases", + "detail": "`snapshotMaterializedState()` returns the original `fallbackSha` whenever `git add`, `git commit`, or the follow-up `rev-parse` fails. When the workspace diff was non-empty, that means the worktree now contains materialized main-workspace changes but `handle.baseSha` still points at the pre-materialization commit, so later `computeDiff()` includes already-imported workspace changes and `git apply` can double-apply or reject an otherwise valid subagent patch. Treat snapshot failures as fatal whenever materialized changes exist instead of silently falling back.", + "origin_round": 2, + "latest_round": 2, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 3 + }, + { + "id": "R2-F06", + "severity": "medium", + "category": "error_handling", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "Worktree-add fail-fast errors still omit the attempted worktree path", + "detail": "The proposal requires worktree-unavailable failures to identify the target `.specflow/worktrees///` path, but the `worktree add` path currently bubbles up through `runGit()` as a generic `git worktree add failed: ...` message. Because `/specflow.apply` surfaces `error.message`, operators will not see which worktree path collided or needs cleanup. Wrap the `worktree add` failure in `createWorktree()` so the surfaced message includes `wtPath` plus the underlying git/OS error.", + "origin_round": 2, + "latest_round": 2, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 3 + }, + { + "id": "R2-F07", + "severity": "medium", + "category": "completeness", + "file": "assets/commands/specflow.fix_apply.md.tmpl", + "title": "Manual recovery instructions tell operators to edit a worktree whose changes are then discarded", + "detail": "The new `/specflow.fix_apply` guidance says to manually repair the retained worktree, reset the bundle to `pending`, and rerun `/specflow.apply`. That rerun creates a fresh worktree from the main workspace HEAD, so edits made only inside the old retained worktree are never imported or reused. Update the recovery contract to explain how manual fixes must be applied back to the main workspace (or through the integration path) before resetting, or remove this misleading workflow.", + "origin_round": 2, + "latest_round": 2, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 3 + }, + { + "id": "R3-F08", + "severity": "medium", + "category": "error_handling", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Pre-dispatch fail-fast paths leak created worktrees", + "detail": "Worktrees are created before context assembly and before the in_progress transition, but cleanup only removes handles that were fully returned before a create-loop exception. If createWorktree() fails after git worktree add succeeds (for example during materialization or snapshotting), or if assembleContextPackage() / advance(..., \"in_progress\") throws, those already-created worktrees are left on disk even though no bundle reached a retained failure status. A rerun of the same apply can then fail immediately with an existing worktree-path collision. Add cleanup around the whole pre-subagent phase and make createWorktree() self-remove the just-added worktree when post-add setup fails.", + "origin_round": 3, + "latest_round": 3, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 4 + }, + { + "id": "R3-F09", + "severity": "medium", + "category": "completeness", + "file": "assets/commands/specflow.fix_apply.md.tmpl", + "title": "The documented copy-back command still drops some retained-worktree fixes", + "detail": "The new manual-recovery instructions tell operators to run git -C .specflow/worktrees// diff HEAD | git apply --binary, but git diff HEAD omits untracked added files and, without --binary on the diff side, does not round-trip binary patches. An operator who follows the documented flow can still lose newly added or binary/manual fixes from the retained worktree. Document a copy-back command that stages changes and emits a binary-safe diff, or give a different workflow that includes new files.", + "origin_round": 3, + "latest_round": 3, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 4 + }, + { + "id": "R4-F10", + "severity": "high", + "category": "correctness", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "Worktree materialization still drops ordinary untracked files", + "detail": "`materializeWorkspaceState()` builds the main-workspace snapshot with `git diff --binary --find-renames HEAD`, which ignores untracked files. That means a newly created worktree does not actually reflect the full current workspace state required by the proposal; any new-but-untracked file present in the main workspace is absent from the subagent worktree. Update materialization to include untracked paths as well (for example by staging intent-to-add or otherwise enumerating/copying them) and add a real-git regression test for that case.", + "origin_round": 4, + "latest_round": 4, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 5 + }, + { + "id": "R4-F11", + "severity": "medium", + "category": "completeness", + "file": "assets/commands/specflow.fix_apply.md.tmpl", + "title": "Recovery guide link points at a file that is not part of the tracked change", + "detail": "The template now links operators to `docs/apply-worktree-recovery.md`, but that file is not included in the current tracked diff and is still untracked. If this lands as-is, the generated command docs will contain a dead link. Add the recovery guide to the committed change set or remove the reference.", + "origin_round": 4, + "latest_round": 4, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 5 + }, + { + "id": "R4-F12", + "severity": "medium", + "category": "error_handling", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Successful bundles can leave stale worktrees without any visible error", + "detail": "`tryRemoveWorktree()` catches and discards every `git worktree remove` failure after the bundle has already been advanced to `done`. That violates the fixed retention policy for successful bundles and can recreate `.specflow/worktrees//` path collisions on a later rerun while the apply is still reported as successful. Cleanup failures need to be surfaced or otherwise handled explicitly instead of being swallowed.", + "origin_round": 4, + "latest_round": 4, + "status": "resolved", + "relation": "new", + "supersedes": null, + "notes": "", + "resolved_round": 5 + }, + { + "id": "R5-F13", + "severity": "medium", + "category": "error_handling", + "file": "src/lib/apply-worktree/worktree.ts", + "title": "Untracked-file materialization still suppresses git helper failures", + "detail": "The new untracked-file path in `materializeWorkspaceState()` treats a non-zero `git ls-files --others ...` as if there were simply no untracked files, and the compensating `git reset -- ...` in the `finally` block is never checked. If either command fails, worktree creation can still report success while silently omitting untracked files or leaving the main workspace index polluted with intent-to-add entries. Both operations need to fail fast with `WorktreeError`, and the new behavior should be regression-tested.", + "origin_round": 5, + "latest_round": 5, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + }, + { + "id": "R5-F14", + "severity": "medium", + "category": "correctness", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Main-agent advance failures are mislabeled as subagent_failed", + "detail": "`failureFromThrow()` hardcodes `terminalStatus: \"subagent_failed\"`, and the `advance(bundle.id, \"done\")` catch path reuses it. When `specflow-advance-bundle` fails on the main agent, the bundle has not transitioned to `subagent_failed`; it typically remains `in_progress`, and recovery should follow the generic CLI-failure path rather than the retained-worktree subagent-failure path. This needs a distinct failure classification instead of reusing `subagent_failed` metadata.", + "origin_round": 5, + "latest_round": 5, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + }, + { + "id": "R5-F15", + "severity": "low", + "category": "completeness", + "file": "src/lib/apply-dispatcher/orchestrate.ts", + "title": "Over-declared produced_artifacts warnings never reach the caller", + "detail": "`integrateBundle()` now computes `overDeclared` for declared-but-untouched artifacts, but `runDispatchedWindow()` only branches on `ok` versus rejection and discards that warning information before advancing the bundle to `done`. The Phase 1 contract says over-declarations should warn without rejecting, so the orchestrator needs to plumb those warnings through `DispatchOutcome` (or other operator-facing output) and add orchestration-level coverage for them.", + "origin_round": 5, + "latest_round": 5, + "status": "new", + "relation": "new", + "supersedes": null, + "notes": "" + } + ], + "round_summaries": [ + { + "round": 1, + "total": 4, + "open": 4, + "new": 4, + "resolved": 0, + "overridden": 0, + "by_severity": { + "high": 2, + "medium": 2 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-1" + }, + { + "round": 2, + "total": 7, + "open": 4, + "new": 3, + "resolved": 3, + "overridden": 0, + "by_severity": { + "high": 2, + "medium": 2 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-2" + }, + { + "round": 3, + "total": 9, + "open": 3, + "new": 2, + "resolved": 6, + "overridden": 0, + "by_severity": { + "high": 1, + "medium": 2 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-3" + }, + { + "round": 4, + "total": 12, + "open": 3, + "new": 3, + "resolved": 9, + "overridden": 0, + "by_severity": { + "high": 1, + "medium": 2 + }, + "gate_id": "review_decision-apply-worktree-isolation-1-apply_review-4" + }, + { + "round": 5, + "total": 15, + "open": 3, + "new": 3, + "resolved": 12, + "overridden": 0, + "by_severity": { + "medium": 2, + "low": 1 + } + } + ] +} diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/apply-worktree-integration/spec.md b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/apply-worktree-integration/spec.md new file mode 100644 index 0000000..405d9d9 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/apply-worktree-integration/spec.md @@ -0,0 +1,285 @@ +## ADDED Requirements + +### Requirement: Worktree is created from main HEAD at creation time + +For every bundle assigned execution mode `subagent-worktree`, the main agent SHALL create an ephemeral git worktree via `git worktree add` using the current repository HEAD at the moment of worktree creation as the base. The main agent SHALL NOT pre-compute a shared base snapshot for the whole apply run, SHALL NOT rebase the worktree onto a different base before dispatch, and SHALL NOT rebase the worktree prior to integration. + +As a consequence, when earlier bundles in the same run have already been integrated into the main workspace before a later worktree is created, the later worktree SHALL observe those imports as part of its base. This creates a deterministic, per-worktree base commit that the main agent SHALL record and later use to compute the integration diff. + +#### Scenario: Worktree base equals main HEAD at creation time + +- **WHEN** the main agent creates a worktree for bundle `B` at commit `` +- **THEN** the worktree's base SHALL equal the main workspace HEAD at the moment of creation +- **AND** the main agent SHALL record `` as the integration base for bundle `B` + +#### Scenario: Later worktrees inherit earlier integrations + +- **WHEN** bundle `A` has been integrated into the main workspace in this run +- **AND** the main agent then creates a worktree for bundle `B` +- **THEN** bundle `B`'s worktree SHALL include bundle `A`'s imported changes as part of its base +- **AND** no auto-rebase SHALL be performed later to reconcile drift + +#### Scenario: No shared run-wide base snapshot is used + +- **WHEN** an apply run dispatches multiple `subagent-worktree` bundles +- **THEN** each worktree SHALL record its own base commit at creation time +- **AND** worktrees created at different points in the run MAY have different base commits + +### Requirement: Worktree path convention + +Every ephemeral worktree for a `subagent-worktree` bundle SHALL be created at the path `.specflow/worktrees///` relative to the repository root. This path is fixed in Phase 1 and SHALL NOT be configurable. The main agent SHALL ensure the parent directory `.specflow/worktrees//` exists before invoking `git worktree add`. + +If the path already exists and the previous worktree cannot be reclaimed (e.g., it is registered in `git worktree list` and removal fails, or it is a non-worktree directory), the main agent SHALL trigger the worktree-unavailable fail-fast behavior defined below. + +#### Scenario: Worktree is created at the conventional path + +- **WHEN** the main agent creates a worktree for bundle `B` in run `R` +- **THEN** the worktree SHALL be located at `.specflow/worktrees///` + +#### Scenario: Existing stale worktree path triggers fail-fast + +- **WHEN** `.specflow/worktrees///` already exists +- **AND** `git worktree remove` on that path fails +- **THEN** the main agent SHALL trigger worktree-unavailable fail-fast +- **AND** the worktree path SHALL NOT be silently overwritten + +### Requirement: Worktree-unavailable fail-fast + +When the main agent cannot create a usable worktree for a subagent-eligible bundle, the main agent SHALL fail-fast the entire apply. Triggering conditions include, but are not limited to: + +- `git worktree` is unavailable (git version too old or binary missing), +- the target path cannot be created due to filesystem or permission errors, +- the target path already exists and cannot be reclaimed, +- `git worktree add` exits non-zero for any other reason. + +On any such trigger, the main agent SHALL: + +1. Surface an actionable error message that names the attempted worktree path and the underlying git/OS error. +2. Leave the run in `apply_draft`. +3. NOT silently fall back to `inline-main` for the failing bundle or for any other bundle. +4. NOT dispatch any further subagent in the current window, chunk, or subsequent windows. + +#### Scenario: git worktree add failure aborts the apply + +- **WHEN** the main agent invokes `git worktree add` for bundle `B` and the command exits non-zero +- **THEN** the main agent SHALL surface the git error with the attempted worktree path +- **AND** the run SHALL remain in `apply_draft` +- **AND** no subsequent subagent SHALL be dispatched + +#### Scenario: No silent degradation to inline-main + +- **WHEN** worktree creation fails for a `subagent-worktree` bundle +- **THEN** the main agent SHALL NOT run that bundle inline +- **AND** the main agent SHALL NOT warn-then-continue + +### Requirement: Subagent returns structured result with produced_artifacts + +The subagent SHALL return a structured result containing at minimum: + +- `status`: `"success"` | `"failure"` +- `produced_artifacts`: a set of repo-relative file paths that the subagent intended to create, modify, delete, rename (with new path), or change mode on. Over-declared entries (paths listed that do not appear in the diff) are permitted. +- `error`: human-readable message plus any structured diagnostic fields, required when `status = "failure"`; forbidden when `status = "success"`. + +The main agent SHALL parse this result and use it as the sole source of truth for integration validation on `status = "success"`. On `status = "failure"`, the main agent SHALL skip integration and record the failure per the bundle-status contract below. + +#### Scenario: Successful subagent result includes produced_artifacts + +- **WHEN** a subagent returns `status: "success"` +- **THEN** the result SHALL include a `produced_artifacts` field +- **AND** `produced_artifacts` SHALL be a set of repo-relative paths + +#### Scenario: Failed subagent result includes error + +- **WHEN** a subagent returns `status: "failure"` +- **THEN** the result SHALL include a non-empty `error` field +- **AND** the main agent SHALL NOT compute a diff or attempt integration for that bundle + +### Requirement: Main-agent integration authority — diff inspection and artifact cross-check + +Before a `subagent-worktree` bundle can reach `done`, the main agent SHALL perform integration validation on the worktree. Integration validation in Phase 1 is limited to **diff inspection + produced-artifact cross-check**; running lint, tests, or other side effects is explicitly OUT of scope for this requirement. + +The main agent SHALL: + +1. Compute the worktree diff via `git -C diff --binary ..HEAD` where `` is the base commit recorded at worktree-creation time. +2. Extract the set of touched paths from the diff, where: + - an added file contributes its path, + - a deleted file contributes its deleted path, + - a modified file contributes its path, + - a renamed file contributes the **new** path (not the old path), + - a mode-only change contributes its path (counts as a modification). +3. Compare the set of touched paths to `produced_artifacts`: + - Every touched path SHALL appear in `produced_artifacts`. + - Entries in `produced_artifacts` not present in the touched-path set (over-declaration) SHALL NOT cause rejection; the main agent MAY emit a warning. +4. Check the touched paths against the protected-path list (see below). +5. Check for the empty-diff-on-success condition (see below). + +If all checks pass, the main agent SHALL proceed to patch import. If any check fails, the main agent SHALL record the bundle status as `integration_rejected` per the bundle-status contract below. + +#### Scenario: Every diff path must be declared + +- **WHEN** the worktree diff touches paths `{a, b, c}` and `produced_artifacts = {a, b, c}` +- **THEN** the undeclared-paths check SHALL pass + +#### Scenario: Undeclared path causes integration rejection + +- **WHEN** the worktree diff touches `{a, b, c}` but `produced_artifacts = {a, b}` +- **THEN** the main agent SHALL reject integration +- **AND** the bundle status SHALL become `integration_rejected` + +#### Scenario: Over-declared artifact does not reject + +- **WHEN** the worktree diff touches `{a, b}` but `produced_artifacts = {a, b, c}` +- **THEN** the main agent MAY emit a warning about `c` +- **AND** the main agent SHALL NOT reject integration on this condition alone + +#### Scenario: Renamed file is matched by the new path + +- **WHEN** the worktree diff contains a rename from `old/p.ts` to `new/p.ts` +- **AND** `produced_artifacts` contains `new/p.ts` but not `old/p.ts` +- **THEN** the undeclared-paths check SHALL pass + +#### Scenario: Deletion is matched by the deleted path + +- **WHEN** the worktree diff deletes `x.ts` +- **AND** `produced_artifacts` contains `x.ts` +- **THEN** the undeclared-paths check SHALL pass + +#### Scenario: Mode-only change counts as modification + +- **WHEN** the worktree diff contains a mode-only change on `bin/run.sh` +- **AND** `produced_artifacts` contains `bin/run.sh` +- **THEN** the undeclared-paths check SHALL pass + +### Requirement: Protected-path touch causes integration rejection + +The main agent SHALL reject integration when the worktree diff touches any of the following paths: + +- `openspec/changes//task-graph.json` +- `openspec/changes//tasks.md` +- Any path under `.specflow/` + +These paths are reserved for main-agent mutation. Protected-path rejection takes precedence over the undeclared-paths check: even if a subagent declares a protected path in `produced_artifacts`, touching that path SHALL reject integration. + +#### Scenario: Touching task-graph.json rejects + +- **WHEN** the worktree diff modifies `openspec/changes//task-graph.json` +- **THEN** the main agent SHALL reject integration +- **AND** the bundle status SHALL become `integration_rejected` + +#### Scenario: Touching tasks.md rejects + +- **WHEN** the worktree diff modifies `openspec/changes//tasks.md` +- **THEN** the main agent SHALL reject integration + +#### Scenario: Touching any path under .specflow/ rejects + +- **WHEN** the worktree diff adds, modifies, or deletes a path under `.specflow/` +- **THEN** the main agent SHALL reject integration + +#### Scenario: Declaring a protected path in produced_artifacts does not bypass the check + +- **WHEN** the worktree diff modifies a protected path +- **AND** that path is listed in `produced_artifacts` +- **THEN** the main agent SHALL still reject integration + +### Requirement: Empty-diff-on-success causes integration rejection + +If a subagent returns `status: "success"` but the worktree diff is empty (no path is touched), the main agent SHALL reject integration. An empty diff paired with success indicates the subagent did not produce the work the bundle claims to have done. + +#### Scenario: Empty diff with success rejects + +- **WHEN** a subagent returns `status: "success"` +- **AND** `git -C diff --binary ..HEAD` produces no changes +- **THEN** the main agent SHALL reject integration +- **AND** the bundle status SHALL become `integration_rejected` + +### Requirement: Patch import via git apply covers all standard change types + +When all integration-validation checks pass, the main agent SHALL import the subagent's changes into the main workspace via `git -C diff --binary ..HEAD | git apply --binary` executed at the repository root. The patch-import mechanism SHALL support the full set of change types that `git diff --binary` and `git apply --binary` themselves cover: + +- file creation +- file deletion +- file modification (text content) +- file mode change +- file rename +- binary file content change + +The main agent SHALL NOT use `--3way` fallback in Phase 1. If `git apply --binary` exits non-zero at the repo root, the main agent SHALL reject integration. + +#### Scenario: Text modification applies cleanly + +- **WHEN** integration validation passes and the patch modifies a text file +- **THEN** `git apply --binary` SHALL be invoked at the repo root +- **AND** the bundle SHALL progress toward `done` on successful apply + +#### Scenario: Binary change is included in the patch + +- **WHEN** the worktree diff includes a binary file change +- **THEN** the diff SHALL be extracted with `git diff --binary` +- **AND** `git apply --binary` SHALL be invoked at the repo root + +#### Scenario: Patch-apply failure rejects integration + +- **WHEN** `git apply --binary` exits non-zero +- **THEN** the main agent SHALL reject integration +- **AND** the bundle status SHALL become `integration_rejected` +- **AND** no `--3way` retry SHALL be attempted + +### Requirement: Bundle status transition after integration + +A `subagent-worktree` bundle SHALL reach `done` only after integration validation passes and `git apply --binary` succeeds at the repo root. The main agent SHALL call `specflow-advance-bundle done` ONLY after successful patch import. + +If the subagent returned `status: "failure"`, the main agent SHALL call `specflow-advance-bundle subagent_failed`. + +If the subagent returned `status: "success"` but integration validation (any reason: undeclared path, protected-path touch, empty diff, patch-apply failure) rejected, the main agent SHALL call `specflow-advance-bundle integration_rejected`. + +#### Scenario: Successful path advances to done only after integration + +- **WHEN** a subagent returns `status: "success"` for bundle `B` +- **AND** all integration checks pass and `git apply --binary` succeeds +- **THEN** the main agent SHALL invoke `specflow-advance-bundle B done` + +#### Scenario: Subagent failure advances to subagent_failed + +- **WHEN** a subagent returns `status: "failure"` for bundle `B` +- **THEN** the main agent SHALL invoke `specflow-advance-bundle B subagent_failed` +- **AND** no integration, diff inspection, or patch apply SHALL be performed for `B` + +#### Scenario: Integration rejection advances to integration_rejected + +- **WHEN** a subagent returns `status: "success"` for bundle `B` +- **AND** any integration check rejects (undeclared path, protected-path touch, empty-diff-on-success, or patch-apply failure) +- **THEN** the main agent SHALL invoke `specflow-advance-bundle B integration_rejected` + +### Requirement: Worktree retention policy + +The main agent SHALL clean up worktrees based on the bundle's final status in the current apply invocation: + +- On `done`: the worktree SHALL be removed immediately via `git worktree remove `. If `git worktree remove` fails (e.g., due to uncommitted subagent changes that should already have been imported), the main agent SHALL surface the error but SHALL NOT revert the bundle's `done` status. +- On `subagent_failed`: the worktree SHALL be retained at its path at `.specflow/worktrees///`. +- On `integration_rejected`: the worktree SHALL be retained at its path. + +Retention behavior in Phase 1 is fixed and SHALL NOT be configurable. `/specflow.fix_apply` and manual inspection SHALL use the retained worktree path to diagnose failures. + +#### Scenario: Successful bundle removes its worktree + +- **WHEN** bundle `B` reaches `done` +- **THEN** `git worktree remove .specflow/worktrees///` SHALL be invoked +- **AND** the worktree SHALL no longer appear in `git worktree list` + +#### Scenario: Failed subagent retains worktree + +- **WHEN** bundle `B` reaches `subagent_failed` +- **THEN** the worktree at `.specflow/worktrees///` SHALL remain +- **AND** the worktree SHALL still appear in `git worktree list` + +#### Scenario: Integration rejection retains worktree + +- **WHEN** bundle `B` reaches `integration_rejected` +- **THEN** the worktree at `.specflow/worktrees///` SHALL remain + +#### Scenario: Retention policy is not configurable in Phase 1 + +- **WHEN** an operator attempts to override retention via config in Phase 1 +- **THEN** the retention behavior SHALL be the fixed rule above +- **AND** no config key SHALL alter cleanup on success or retention on failure diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/bundle-subagent-execution/spec.md b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/bundle-subagent-execution/spec.md new file mode 100644 index 0000000..bc1f3c5 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/bundle-subagent-execution/spec.md @@ -0,0 +1,125 @@ +## ADDED Requirements + +### Requirement: Bundle execution mode is derived from subagent-eligibility + +The dispatcher SHALL assign each bundle exactly one of two execution modes when `/specflow.apply` begins a window: + +- `inline-main`: the bundle is implemented directly by the main agent in the primary workspace. +- `subagent-worktree`: the bundle is dispatched to a subagent running inside a dedicated ephemeral git worktree, as defined by the `apply-worktree-integration` capability. + +The mode assignment rule SHALL be: + +- A bundle classified as **subagent-eligible** (per the existing "Bundle subagent-eligibility is derived from size_score" requirement) SHALL be assigned `subagent-worktree`. +- A bundle classified as **inline-only** SHALL be assigned `inline-main`. + +No third execution mode SHALL be introduced in this phase. In particular, a dispatched subagent without an isolated worktree (historically `subagent-shared`) SHALL NOT be a supported mode. + +Dispatch signals SHALL remain limited to the existing eligibility rule (`apply.subagent_dispatch.enabled = true` and `size_score > threshold`). No additional signals such as side-effect risk, lockfile/codegen touches, or changed-path count SHALL influence mode assignment in this phase. + +#### Scenario: Eligible bundle routes to subagent-worktree + +- **WHEN** dispatch is enabled and a bundle has `size_score = 8` and `threshold = 5` +- **THEN** the bundle SHALL be assigned execution mode `subagent-worktree` + +#### Scenario: Ineligible bundle routes to inline-main + +- **WHEN** a bundle is classified as inline-only (for any reason: dispatch disabled, `size_score <= threshold`, missing `size_score`, or task-graph.json absent) +- **THEN** the bundle SHALL be assigned execution mode `inline-main` + +#### Scenario: Subagent-shared is not a supported mode + +- **WHEN** the dispatcher assigns execution mode +- **THEN** the set of possible modes SHALL be exactly `{"inline-main", "subagent-worktree"}` +- **AND** a dispatched subagent executing without a dedicated worktree SHALL NOT occur + +#### Scenario: No extra dispatch signals influence mode + +- **WHEN** the dispatcher assigns execution mode for a bundle +- **THEN** the decision SHALL depend only on the existing subagent-eligibility rule +- **AND** signals such as side-effect risk, lockfile touches, or changed-path count SHALL NOT be consulted in this phase + +### Requirement: Bundle `done` requires main-agent integration success for subagent-worktree mode + +For every bundle assigned execution mode `subagent-worktree`, the main agent SHALL NOT invoke `specflow-advance-bundle done` on a subagent `status: "success"` alone. The main agent SHALL first run the integration authority contract defined in `apply-worktree-integration` (diff inspection, artifact cross-check, protected-path check, empty-diff-on-success check, and patch-apply). + +A `subagent-worktree` bundle SHALL reach `done` if and only if: + +1. The subagent returned `status: "success"`, AND +2. Integration validation passed, AND +3. `git apply --binary` at the repo root exited zero. + +If any of 1–3 fails, the bundle SHALL transition to one of the new terminal-for-this-invocation statuses defined in `task-planner` (`subagent_failed` for failed 1; `integration_rejected` for failed 2 or 3), per `apply-worktree-integration`. The main agent SHALL NOT silently record `done` on an unverified or unimported subagent success. + +For `inline-main` bundles, this requirement does NOT apply; inline bundles reach `done` under the existing completion rules. + +#### Scenario: Subagent success alone does not reach done + +- **WHEN** a `subagent-worktree` bundle's subagent returns `status: "success"` +- **AND** integration validation or patch-apply has not yet been executed +- **THEN** the main agent SHALL NOT invoke `specflow-advance-bundle ... done` + +#### Scenario: Done is reached only after successful integration + +- **WHEN** a `subagent-worktree` bundle's subagent returns `status: "success"` +- **AND** integration validation passes and `git apply --binary` succeeds +- **THEN** the main agent SHALL invoke `specflow-advance-bundle ... done` + +#### Scenario: Inline-main completion rules are unchanged + +- **WHEN** a bundle is assigned `inline-main` +- **THEN** this requirement SHALL NOT apply +- **AND** the bundle SHALL reach `done` under the existing inline completion rules + +## MODIFIED Requirements + +### Requirement: Fail-fast on subagent failure settles chunk then stops + +The dispatcher SHALL fail fast on any subagent failure or integration rejection in the current chunk, but SHALL first drain the chunk so partial successes are recorded before stopping. When any subagent in the current chunk returns `status: "failure"`, or any `subagent-worktree` bundle in the chunk is rejected during main-agent integration: + +1. The main agent SHALL wait for every other subagent in the same chunk to settle (return `"success"` or `"failure"`) and for each success to be processed through integration. This ensures partial successes in the chunk are recorded and produced artifacts are not lost. +2. For each sibling subagent whose integration succeeded, the main agent SHALL invoke `specflow-advance-bundle done`. +3. The failing bundle SHALL be transitioned via `specflow-advance-bundle` to the appropriate terminal-for-this-invocation status, per `apply-worktree-integration`: + - `subagent_failed` when the subagent returned `status: "failure"`. + - `integration_rejected` when the subagent returned `status: "success"` but main-agent integration validation or patch-apply rejected. +4. After all siblings have settled and their status transitions have been applied, the main agent SHALL STOP the apply immediately. It SHALL NOT begin the next chunk or the next window. +5. The run SHALL remain in `apply_draft`. The main agent SHALL surface the failure reason to the user (the subagent's `error` for `subagent_failed`; the specific integration-rejection cause for `integration_rejected`) and document recovery paths (`/specflow.fix_apply` using the retained worktree, or manual intervention). + +This behavior is consistent with the existing `specflow-advance-bundle` fail-fast contract: CLI-mandatory status transitions remain serial through the main agent, no auto-retry is introduced, and no un-dispatched bundles are silently promoted. + +#### Scenario: One failure in a chunk records siblings then stops + +- **WHEN** a chunk contains subagents X, Y, Z where X returns `"failure"`, Y returns `"success"` (integration ok), Z returns `"success"` (integration ok) +- **THEN** the main agent SHALL invoke `specflow-advance-bundle Y done` and `specflow-advance-bundle Z done` +- **AND** the main agent SHALL invoke `specflow-advance-bundle X subagent_failed` +- **AND** the main agent SHALL STOP the apply after recording Y, Z, and X +- **AND** subsequent chunks and windows SHALL NOT be dispatched + +#### Scenario: Integration rejection in chunk settles siblings then stops + +- **WHEN** a chunk contains subagents X, Y where X returns `"success"` but integration is rejected, and Y returns `"success"` with integration ok +- **THEN** the main agent SHALL invoke `specflow-advance-bundle Y done` +- **AND** the main agent SHALL invoke `specflow-advance-bundle X integration_rejected` +- **AND** the main agent SHALL STOP the apply after recording both transitions +- **AND** subsequent chunks and windows SHALL NOT be dispatched + +#### Scenario: Failing bundle settles to a terminal-for-invocation status + +- **WHEN** subagent for bundle X returns `"failure"` +- **THEN** bundle X SHALL have status `"subagent_failed"` in `task-graph.json` after the apply stops +- **AND** the run SHALL remain in `apply_draft` + +#### Scenario: Integration-rejected bundle settles to integration_rejected + +- **WHEN** subagent for bundle X returns `"success"` but main-agent integration rejects +- **THEN** bundle X SHALL have status `"integration_rejected"` in `task-graph.json` after the apply stops +- **AND** the run SHALL remain in `apply_draft` + +#### Scenario: Fail-fast surfaces the failure reason to the user + +- **WHEN** a subagent returns `"failure"` with `error: ""` +- **THEN** the main agent SHALL surface `` to the user +- **AND** the guide SHALL cite `/specflow.fix_apply` (using the retained worktree) and manual intervention as recovery paths + +- **WHEN** an integration is rejected for a specific reason (undeclared path, protected-path touch, empty-diff-on-success, or patch-apply failure) +- **THEN** the main agent SHALL surface the specific reason to the user +- **AND** the guide SHALL cite `/specflow.fix_apply` and manual worktree inspection as recovery paths diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/task-planner/spec.md b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/task-planner/spec.md new file mode 100644 index 0000000..492cf28 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/specs/task-planner/spec.md @@ -0,0 +1,223 @@ +## MODIFIED Requirements + +### Requirement: Task graph schema defines bundle-based structure + +The system SHALL define a `TaskGraph` JSON schema with the following top-level fields: +- `version`: schema version string (initial value `"1.0"`) +- `change_id`: the change identifier this graph belongs to +- `bundles`: an ordered array of `Bundle` objects +- `generated_at`: ISO 8601 timestamp of generation +- `generated_from`: identifier of the source artifact (e.g. `"design.md"`) + +Each `Bundle` object SHALL have the following fields: +- `id`: unique kebab-case identifier within the graph +- `title`: human-readable bundle name +- `goal`: one-sentence description of what the bundle achieves +- `depends_on`: array of bundle IDs representing soft dependencies (dependent bundle MAY start when dependency's output artifacts are available, not necessarily when dependency is fully complete) +- `inputs`: array of artifact references the bundle consumes +- `outputs`: array of artifact references the bundle produces +- `status`: enum of `"pending"` | `"in_progress"` | `"done"` | `"skipped"` | `"subagent_failed"` | `"integration_rejected"` +- `tasks`: array of `Task` objects within the bundle +- `owner_capabilities`: array of baseline spec names (from `openspec/specs/`) indicating which spec domain the bundle belongs to +- `size_score`: **optional** non-negative integer. When present, it SHALL equal `bundle.tasks.length` at the time of graph generation. When absent, downstream consumers (notably `bundle-subagent-execution`) SHALL treat the bundle as having an undefined `size_score` rather than substituting a default value. + +The two values `"subagent_failed"` and `"integration_rejected"` are introduced for the apply-class workflow when a subagent-worktree dispatched bundle cannot reach `done` (see `apply-worktree-integration`). They are **terminal within a single apply invocation** — the apply SHALL fail-fast and stop — but are **not terminal at the run level**; `/specflow.fix_apply` or an operator-initiated reset MAY transition the bundle back to `"pending"` in a subsequent invocation. They SHALL NOT be generated by the task-graph generator (`generateTaskGraph`); they exist only as writable apply-phase statuses. + +Each `Task` object SHALL have at minimum: +- `id`: unique identifier within the bundle +- `title`: task description +- `status`: enum of `"pending"` | `"in_progress"` | `"done"` | `"skipped"` + +Child-task statuses SHALL remain the original four values. The new bundle-only statuses (`"subagent_failed"`, `"integration_rejected"`) SHALL NOT appear on child tasks. + +#### Scenario: Valid task graph conforms to schema + +- **WHEN** a task graph JSON document is validated against the `TaskGraph` schema +- **THEN** it SHALL pass validation if and only if all required fields are present with correct types + +#### Scenario: Bundle IDs are unique within a graph + +- **WHEN** a task graph is validated +- **THEN** all bundle `id` values SHALL be unique within the `bundles` array + +#### Scenario: depends_on references are valid bundle IDs + +- **WHEN** a task graph is validated +- **THEN** every `id` in every bundle's `depends_on` array SHALL reference an existing bundle `id` in the same graph + +#### Scenario: No circular dependencies in depends_on + +- **WHEN** a task graph is validated +- **THEN** the dependency graph formed by `depends_on` SHALL be a directed acyclic graph (DAG) + +#### Scenario: owner_capabilities references valid spec names + +- **WHEN** a task graph is generated +- **THEN** every entry in `owner_capabilities` SHALL correspond to a directory name in `openspec/specs/` + +#### Scenario: size_score is present and matches task count for newly generated graphs + +- **WHEN** a task graph is generated via `generateTaskGraph` on the post-feature code path +- **THEN** every `Bundle` SHALL have a `size_score` field equal to `bundle.tasks.length` + +#### Scenario: size_score is optional — graphs without the field remain valid + +- **WHEN** a pre-feature `task-graph.json` without `size_score` fields is validated against the current schema +- **THEN** validation SHALL pass +- **AND** the graph SHALL be usable by the apply phase + +#### Scenario: subagent_failed is a valid bundle status + +- **WHEN** a task graph has a bundle with `status = "subagent_failed"` +- **THEN** validation SHALL pass + +#### Scenario: integration_rejected is a valid bundle status + +- **WHEN** a task graph has a bundle with `status = "integration_rejected"` +- **THEN** validation SHALL pass + +#### Scenario: Child task cannot use new bundle-only statuses + +- **WHEN** a task graph is validated +- **AND** any `tasks[*].status` is `"subagent_failed"` or `"integration_rejected"` +- **THEN** validation SHALL fail + +#### Scenario: generateTaskGraph does not emit new statuses + +- **WHEN** `generateTaskGraph` produces a new task graph +- **THEN** every bundle SHALL have `status = "pending"` +- **AND** no bundle SHALL be emitted with `"subagent_failed"` or `"integration_rejected"` + +### Requirement: Apply phase writes back bundle status to task graph + +The apply phase SHALL update bundle and task status in `task-graph.json` after execution. The status transitions SHALL be: +- `"pending"` → `"in_progress"`: when bundle execution begins +- `"in_progress"` → `"done"`: when bundle completion check passes +- `"pending"` → `"skipped"`: when explicitly skipped by user or system +- `"in_progress"` → `"subagent_failed"`: when a `subagent-worktree` bundle's subagent returned `status: "failure"` +- `"in_progress"` → `"integration_rejected"`: when a `subagent-worktree` bundle's subagent returned `status: "success"` but main-agent integration validation or patch-apply rejected the result +- `"subagent_failed"` → `"pending"`: allowed only via `/specflow.fix_apply` or an explicit operator reset; not permitted during normal apply-class transitions +- `"integration_rejected"` → `"pending"`: allowed only via `/specflow.fix_apply` or an explicit operator reset; not permitted during normal apply-class transitions + +All other transitions SHALL be rejected as invalid. In particular, a bundle in `"subagent_failed"` or `"integration_rejected"` SHALL NOT transition directly to `"done"`, `"in_progress"`, or `"skipped"` without first returning to `"pending"` via a reset. + +The system SHALL provide `updateBundleStatus(taskGraph, bundleId, newStatus)` that returns a new `TaskGraph` with the specified bundle's status updated. The original task graph SHALL NOT be mutated. + +When the new bundle status is a **terminal** status (`"done"` or `"skipped"`), `updateBundleStatus` SHALL also **normalize all child task statuses** within that bundle so they match the bundle's terminal status in the returned `TaskGraph`: +- `bundle.status = "done"` → every `tasks[*].status` in that bundle SHALL be `"done"` +- `bundle.status = "skipped"` → every `tasks[*].status` in that bundle SHALL be `"skipped"` + +Normalization applies unconditionally regardless of the prior child status (including children that already hold a different terminal status). The bundle is the authoritative execution unit; per-task status is informational. A bundle with an empty `tasks` array is a no-op with respect to child coercion; the terminal bundle status is still applied. + +Normalization applies **only on terminal transitions to `"done"` or `"skipped"`**. Non-terminal transitions (`pending → in_progress`, `pending → pending`) SHALL NOT modify child task statuses. Transitions to the new statuses `"subagent_failed"` and `"integration_rejected"` SHALL also NOT modify child task statuses; child statuses at the time of failure are informational and SHALL be preserved as-is so that `/specflow.fix_apply` can inspect what was in flight. + +The returned `TaskGraph` SHALL contain both the bundle status change and all child coercions (when applicable) as a single in-memory update. When the caller persists `task-graph.json`, it SHALL be written atomically (e.g., write-to-temp + rename) so that the persisted graph is either the pre-update state or the fully normalized post-update state — never a mismatched intermediate state. + +Whenever normalization actually changes a child task's status (from a value different from the target terminal status), the apply path SHALL emit a structured audit log entry containing at minimum: `bundle_id`, `task_id`, `from_status`, and `to_status`. Coercions that do not change a child's status (the child already matched the bundle's terminal status) SHALL NOT emit a log entry. + +After status update, `tasks.md` SHALL be re-rendered from the normalized `TaskGraph` returned by `updateBundleStatus` (never from an unnormalized intermediate graph) to keep the human-readable view in sync. Bundles in the new statuses `"subagent_failed"` and `"integration_rejected"` SHALL be rendered with a distinct visual marker so readers can identify failed bundles at a glance; the exact rendering is implementation-defined but SHALL differ from `"pending"`, `"in_progress"`, `"done"`, and `"skipped"`. + +#### Scenario: Status transitions from pending to in_progress + +- **WHEN** `updateBundleStatus` is called with `("pending" bundle, "in_progress")` +- **THEN** the returned task graph SHALL have the bundle's status as `"in_progress"` +- **AND** the original task graph SHALL be unchanged +- **AND** every child task's status in that bundle SHALL be unchanged + +#### Scenario: Status transitions from in_progress to done + +- **WHEN** `updateBundleStatus` is called with `("in_progress" bundle, "done")` +- **THEN** the returned task graph SHALL have the bundle's status as `"done"` +- **AND** every child task's status in that bundle SHALL be `"done"` in the returned graph + +#### Scenario: Status transitions from in_progress to subagent_failed + +- **WHEN** `updateBundleStatus` is called with `("in_progress" bundle, "subagent_failed")` +- **THEN** the returned task graph SHALL have the bundle's status as `"subagent_failed"` +- **AND** every child task's status in that bundle SHALL be preserved as-is +- **AND** no child-coercion audit log entry SHALL be emitted + +#### Scenario: Status transitions from in_progress to integration_rejected + +- **WHEN** `updateBundleStatus` is called with `("in_progress" bundle, "integration_rejected")` +- **THEN** the returned task graph SHALL have the bundle's status as `"integration_rejected"` +- **AND** every child task's status in that bundle SHALL be preserved as-is +- **AND** no child-coercion audit log entry SHALL be emitted + +#### Scenario: Status transitions from subagent_failed back to pending via reset + +- **WHEN** `updateBundleStatus` is called with `("subagent_failed" bundle, "pending")` via `/specflow.fix_apply` or an explicit operator reset path +- **THEN** the returned task graph SHALL have the bundle's status as `"pending"` + +#### Scenario: Status transitions from integration_rejected back to pending via reset + +- **WHEN** `updateBundleStatus` is called with `("integration_rejected" bundle, "pending")` via `/specflow.fix_apply` or an explicit operator reset path +- **THEN** the returned task graph SHALL have the bundle's status as `"pending"` + +#### Scenario: Invalid status transition is rejected + +- **WHEN** `updateBundleStatus` is called with `("done" bundle, "pending")` +- **THEN** it SHALL return a typed error indicating an invalid status transition + +#### Scenario: Direct transition from subagent_failed to done is rejected + +- **WHEN** `updateBundleStatus` is called with `("subagent_failed" bundle, "done")` +- **THEN** it SHALL return a typed error indicating an invalid status transition + +#### Scenario: Direct transition from integration_rejected to in_progress is rejected + +- **WHEN** `updateBundleStatus` is called with `("integration_rejected" bundle, "in_progress")` +- **THEN** it SHALL return a typed error indicating an invalid status transition + +#### Scenario: Bundle transition to done normalizes pending child tasks + +- **WHEN** `updateBundleStatus` is called with `("in_progress" bundle, "done")` and that bundle's `tasks` contains at least one task with `status = "pending"` +- **THEN** the returned task graph SHALL have every `tasks[*].status` in that bundle equal to `"done"` +- **AND** the original task graph SHALL be unchanged + +#### Scenario: Bundle transition to skipped normalizes child tasks + +- **WHEN** `updateBundleStatus` is called with `("pending" bundle, "skipped")` +- **THEN** the returned task graph SHALL have every `tasks[*].status` in that bundle equal to `"skipped"` + +#### Scenario: Normalization force-coerces conflicting prior terminal child status + +- **WHEN** `updateBundleStatus` is called with a terminal target status (`"done"` or `"skipped"`) and at least one child task already holds a different terminal status (e.g., child is `"done"` and the new bundle status is `"skipped"`) +- **THEN** the returned task graph SHALL have that child's status rewritten to match the bundle's new terminal status +- **AND** a structured audit log entry SHALL be emitted containing `bundle_id`, `task_id`, `from_status`, and `to_status` + +#### Scenario: Terminal transition on empty bundle is a no-op for children + +- **WHEN** `updateBundleStatus` is called with a terminal target status on a bundle whose `tasks` array is empty +- **THEN** the returned task graph SHALL have the bundle's status set to the terminal value +- **AND** no audit log entry for child coercion SHALL be emitted + +#### Scenario: Audit log suppressed when coercion does not change status + +- **WHEN** `updateBundleStatus` is called with a terminal target status and every child task already holds the same status as the bundle's new terminal status +- **THEN** the returned task graph SHALL reflect the bundle terminal status +- **AND** no audit log entry for child coercion SHALL be emitted + +#### Scenario: updateBundleStatus does not mutate input graph + +- **WHEN** `updateBundleStatus` is called with any valid arguments +- **THEN** the original `TaskGraph` argument SHALL be structurally unchanged (bundle statuses and every `tasks[*].status` preserved) +- **AND** any mutations SHALL only appear in the returned `TaskGraph` + +#### Scenario: tasks.md is re-rendered from normalized graph after terminal transition + +- **WHEN** a bundle status is updated to a terminal value (`"done"` or `"skipped"`) and `task-graph.json` is persisted +- **THEN** `tasks.md` SHALL be re-rendered from the normalized `TaskGraph` returned by `updateBundleStatus` +- **AND** the rendered checklist for that bundle SHALL show every task's checkbox state as matching the bundle's terminal status + +#### Scenario: tasks.md renders subagent_failed and integration_rejected with a distinct marker + +- **WHEN** `tasks.md` is re-rendered for a graph containing a bundle with `status = "subagent_failed"` or `status = "integration_rejected"` +- **THEN** the rendered bundle section SHALL carry a visible marker that differs from `"pending"`, `"in_progress"`, `"done"`, and `"skipped"` +- **AND** each child task's rendered checkbox SHALL reflect its preserved informational status (no coercion) + +#### Scenario: Atomic persistence avoids mismatched intermediate state + +- **WHEN** `task-graph.json` is written after a bundle transition (including transitions to `"subagent_failed"` or `"integration_rejected"`) +- **THEN** the persistence path SHALL use an atomic write (e.g., write-to-temp + rename) so that a concurrent reader observes either the pre-update graph or the fully normalized post-update graph +- **AND** the persisted file SHALL NOT contain a bundle whose status is terminal (`"done"` or `"skipped"`) while any of its child task statuses disagree diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/task-graph.json b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/task-graph.json new file mode 100644 index 0000000..bebd2be --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/task-graph.json @@ -0,0 +1,296 @@ +{ + "version": "1.0", + "change_id": "apply-worktree-isolation", + "bundles": [ + { + "id": "extend-bundle-status-model", + "title": "Extend Bundle Status Model", + "goal": "Add the new terminal bundle statuses and transition rules so apply, recovery, and rendering can distinguish subagent execution failure from integration rejection.", + "depends_on": [], + "inputs": [ + "design.md", + "proposal.md", + "specs/task-planner/spec.md", + "openspec/changes//task-graph.json", + "openspec/changes//tasks.md" + ], + "outputs": [ + "task-planner status enum supporting subagent_failed and integration_rejected", + "updateBundleStatus transition rules for pending/in_progress/done/subagent_failed/integration_rejected", + "tasks.md renderer markers for the new statuses", + "schema and reducer tests for new status transitions" + ], + "status": "done", + "tasks": [ + { + "id": "1", + "title": "Extend the task-planner bundle status enum to include subagent_failed and integration_rejected", + "status": "done" + }, + { + "id": "2", + "title": "Update bundle transition validation to allow the new terminal states and pending resets only from fix-apply/operator reset flows", + "status": "done" + }, + { + "id": "3", + "title": "Render the new statuses distinctly in tasks.md output", + "status": "done" + }, + { + "id": "4", + "title": "Add schema, reducer, and rendering tests covering valid and invalid transitions", + "status": "done" + } + ], + "owner_capabilities": [ + "task-planner", + "canonical-workflow-state", + "workflow-run-state" + ], + "size_score": 4 + }, + { + "id": "build-worktree-helper-primitives", + "title": "Build Worktree Helper Primitives", + "goal": "Provide the git-backed worktree lifecycle and diff/import helpers needed to isolate subagent execution per bundle.", + "depends_on": [], + "inputs": [ + "design.md", + "specs/apply-worktree-integration/spec.md", + "git worktree add/remove/list", + "git diff --binary", + "git apply --binary" + ], + "outputs": [ + "createWorktree helper returning worktree path and base SHA", + "computeDiff helper producing binary-safe patches", + "importPatch helper applying binary patches at repo root", + "removeWorktree helper for cleanup", + "listTouchedPaths helper for patch path inspection", + "unit tests for worktree helper behavior" + ], + "status": "done", + "tasks": [ + { + "id": "1", + "title": "Implement createWorktree to create .specflow/worktrees/// from current HEAD and capture the base SHA", + "status": "done" + }, + { + "id": "2", + "title": "Implement computeDiff, importPatch, and removeWorktree using binary-safe git commands", + "status": "done" + }, + { + "id": "3", + "title": "Implement listTouchedPaths to extract all touched repo-relative paths from patch content", + "status": "done" + }, + { + "id": "4", + "title": "Add unit tests for success and failure behavior of the helper primitives", + "status": "done" + } + ], + "owner_capabilities": [ + "utility-cli-suite", + "workspace-context", + "run-artifact-store-conformance" + ], + "size_score": 4 + }, + { + "id": "enforce-integration-authority", + "title": "Enforce Integration Authority", + "goal": "Validate subagent worktree output before import so only declared, non-protected, non-empty changes can advance a bundle to done.", + "depends_on": [ + "build-worktree-helper-primitives", + "extend-bundle-status-model" + ], + "inputs": [ + "design.md", + "specs/apply-worktree-integration/spec.md", + "SubagentResult contract", + "worktree helper primitives", + "protected path rules for .specflow/ and workflow state artifacts" + ], + "outputs": [ + "main-agent integration step for worktree diffs", + "integration rejection causes surfaced to user output", + "bundle transitions to done or integration_rejected based on validation", + "positive and negative integration tests covering all rejection causes" + ], + "status": "done", + "tasks": [ + { + "id": "1", + "title": "Add an integration validation step that computes the worktree diff and rejects undeclared or protected-path changes", + "status": "done" + }, + { + "id": "2", + "title": "Reject successful subagent results that produce an empty diff and surface the empty_diff_on_success cause", + "status": "done" + }, + { + "id": "3", + "title": "Import accepted patches with git apply --binary and transition bundles to done only after successful import", + "status": "done" + }, + { + "id": "4", + "title": "Add tests covering undeclared_path, protected_path, empty_diff_on_success, patch_apply_failure, and the success path", + "status": "done" + } + ], + "owner_capabilities": [ + "bundle-subagent-execution", + "artifact-ownership-model", + "repo-responsibility" + ], + "size_score": 4 + }, + { + "id": "route-bundles-by-execution-mode", + "title": "Route Bundles By Execution Mode", + "goal": "Assign each bundle to inline-main or subagent-worktree deterministically from the existing subagent-eligibility rule.", + "depends_on": [ + "enforce-integration-authority" + ], + "inputs": [ + "design.md", + "specs/bundle-subagent-execution/spec.md", + "assignExecutionMode contract", + "bundle size_score and subagent configuration" + ], + "outputs": [ + "dispatcher execution-mode assignment logic", + "tests covering enabled true/false, size_score above/below threshold, and undefined size_score", + "execution-mode routing from dispatcher to inline or worktree path" + ], + "status": "done", + "tasks": [ + { + "id": "1", + "title": "Implement assignExecutionMode to return subagent-worktree only for bundles that are subagent-eligible under the existing rule", + "status": "done" + }, + { + "id": "2", + "title": "Wire dispatcher routing so eligible bundles go through the worktree integration path and others stay inline-main", + "status": "done" + }, + { + "id": "3", + "title": "Add dispatch tests for all required configuration and size-score combinations", + "status": "done" + } + ], + "owner_capabilities": [ + "bundle-subagent-execution", + "phase-router", + "task-planner" + ], + "size_score": 3 + }, + { + "id": "wire-fail-fast-and-retention", + "title": "Wire Fail-Fast And Retention", + "goal": "Integrate worktree mode into apply orchestration with fail-fast semantics on worktree creation errors and retention of failed worktrees for diagnosis.", + "depends_on": [ + "build-worktree-helper-primitives", + "enforce-integration-authority", + "route-bundles-by-execution-mode", + "extend-bundle-status-model" + ], + "inputs": [ + "design.md", + ".specflow/runs// state handling", + "chunk-drain-then-stop apply flow", + "worktree lifecycle and integration outputs" + ], + "outputs": [ + "apply orchestration using worktree mode for eligible bundles", + "fail-fast stop on git worktree add failure without inline fallback", + "retention policy removing successful worktrees and preserving failed/rejected ones", + "integration tests for worktree-unavailable stop behavior and retention outcomes" + ], + "status": "done", + "tasks": [ + { + "id": "1", + "title": "Create and prepare per-bundle worktrees during apply orchestration and run subagents inside them", + "status": "done" + }, + { + "id": "2", + "title": "Stop the apply immediately when createWorktree fails and leave the run in apply_draft with no further subagents spawned", + "status": "done" + }, + { + "id": "3", + "title": "Retain worktrees for subagent_failed and integration_rejected bundles and remove them after successful integration", + "status": "done" + }, + { + "id": "4", + "title": "Add end-to-end tests covering successful cleanup, retained failure worktrees, and fail-fast behavior on worktree add errors", + "status": "done" + } + ], + "owner_capabilities": [ + "bundle-subagent-execution", + "workflow-run-state", + "workspace-context" + ], + "size_score": 4 + }, + { + "id": "document-fix-apply-recovery", + "title": "Document Fix Apply Recovery", + "goal": "Document how operators and /specflow.fix_apply recover from subagent_failed and integration_rejected bundles using retained worktrees.", + "depends_on": [ + "wire-fail-fast-and-retention", + "extend-bundle-status-model" + ], + "inputs": [ + "design.md", + "specs/task-planner/spec.md", + "specs/bundle-subagent-execution/spec.md", + "retained worktree paths", + "new bundle status semantics" + ], + "outputs": [ + "recovery documentation for /specflow.fix_apply covering retained worktrees and reset-to-pending rules", + "operator guidance for diagnosing subagent_failed versus integration_rejected" + ], + "status": "done", + "tasks": [ + { + "id": "1", + "title": "Document the new terminal bundle statuses and the allowed reset-to-pending recovery path", + "status": "done" + }, + { + "id": "2", + "title": "Document how retained worktrees are located and used during fix-apply recovery", + "status": "done" + }, + { + "id": "3", + "title": "Add operator-facing notes distinguishing subagent execution failure from integration rejection", + "status": "done" + } + ], + "owner_capabilities": [ + "review-autofix-progress-observability", + "task-planner", + "bundle-subagent-execution" + ], + "size_score": 3 + } + ], + "generated_at": "2026-04-22T04:57:18.048Z", + "generated_from": "design.md" +} diff --git a/openspec/changes/archive/2026-04-23-apply-worktree-isolation/tasks.md b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/tasks.md new file mode 100644 index 0000000..17915e3 --- /dev/null +++ b/openspec/changes/archive/2026-04-23-apply-worktree-isolation/tasks.md @@ -0,0 +1,59 @@ +## 1. Extend Bundle Status Model ✓ + +> Add the new terminal bundle statuses and transition rules so apply, recovery, and rendering can distinguish subagent execution failure from integration rejection. + +- [x] 1.1 Extend the task-planner bundle status enum to include subagent_failed and integration_rejected +- [x] 1.2 Update bundle transition validation to allow the new terminal states and pending resets only from fix-apply/operator reset flows +- [x] 1.3 Render the new statuses distinctly in tasks.md output +- [x] 1.4 Add schema, reducer, and rendering tests covering valid and invalid transitions + +## 2. Build Worktree Helper Primitives ✓ + +> Provide the git-backed worktree lifecycle and diff/import helpers needed to isolate subagent execution per bundle. + +- [x] 2.1 Implement createWorktree to create .specflow/worktrees/// from current HEAD and capture the base SHA +- [x] 2.2 Implement computeDiff, importPatch, and removeWorktree using binary-safe git commands +- [x] 2.3 Implement listTouchedPaths to extract all touched repo-relative paths from patch content +- [x] 2.4 Add unit tests for success and failure behavior of the helper primitives + +## 3. Enforce Integration Authority ✓ + +> Validate subagent worktree output before import so only declared, non-protected, non-empty changes can advance a bundle to done. + +> Depends on: build-worktree-helper-primitives, extend-bundle-status-model + +- [x] 3.1 Add an integration validation step that computes the worktree diff and rejects undeclared or protected-path changes +- [x] 3.2 Reject successful subagent results that produce an empty diff and surface the empty_diff_on_success cause +- [x] 3.3 Import accepted patches with git apply --binary and transition bundles to done only after successful import +- [x] 3.4 Add tests covering undeclared_path, protected_path, empty_diff_on_success, patch_apply_failure, and the success path + +## 4. Route Bundles By Execution Mode ✓ + +> Assign each bundle to inline-main or subagent-worktree deterministically from the existing subagent-eligibility rule. + +> Depends on: enforce-integration-authority + +- [x] 4.1 Implement assignExecutionMode to return subagent-worktree only for bundles that are subagent-eligible under the existing rule +- [x] 4.2 Wire dispatcher routing so eligible bundles go through the worktree integration path and others stay inline-main +- [x] 4.3 Add dispatch tests for all required configuration and size-score combinations + +## 5. Wire Fail-Fast And Retention ✓ + +> Integrate worktree mode into apply orchestration with fail-fast semantics on worktree creation errors and retention of failed worktrees for diagnosis. + +> Depends on: build-worktree-helper-primitives, enforce-integration-authority, route-bundles-by-execution-mode, extend-bundle-status-model + +- [x] 5.1 Create and prepare per-bundle worktrees during apply orchestration and run subagents inside them +- [x] 5.2 Stop the apply immediately when createWorktree fails and leave the run in apply_draft with no further subagents spawned +- [x] 5.3 Retain worktrees for subagent_failed and integration_rejected bundles and remove them after successful integration +- [x] 5.4 Add end-to-end tests covering successful cleanup, retained failure worktrees, and fail-fast behavior on worktree add errors + +## 6. Document Fix Apply Recovery ✓ + +> Document how operators and /specflow.fix_apply recover from subagent_failed and integration_rejected bundles using retained worktrees. + +> Depends on: wire-fail-fast-and-retention, extend-bundle-status-model + +- [x] 6.1 Document the new terminal bundle statuses and the allowed reset-to-pending recovery path +- [x] 6.2 Document how retained worktrees are located and used during fix-apply recovery +- [x] 6.3 Add operator-facing notes distinguishing subagent execution failure from integration rejection diff --git a/openspec/specs/apply-worktree-integration/spec.md b/openspec/specs/apply-worktree-integration/spec.md new file mode 100644 index 0000000..91621d7 --- /dev/null +++ b/openspec/specs/apply-worktree-integration/spec.md @@ -0,0 +1,289 @@ +# apply-worktree-integration Specification + +## Purpose +TBD - created by archiving change apply-worktree-isolation. Update Purpose after archive. +## Requirements +### Requirement: Worktree is created from main HEAD at creation time + +For every bundle assigned execution mode `subagent-worktree`, the main agent SHALL create an ephemeral git worktree via `git worktree add` using the current repository HEAD at the moment of worktree creation as the base. The main agent SHALL NOT pre-compute a shared base snapshot for the whole apply run, SHALL NOT rebase the worktree onto a different base before dispatch, and SHALL NOT rebase the worktree prior to integration. + +As a consequence, when earlier bundles in the same run have already been integrated into the main workspace before a later worktree is created, the later worktree SHALL observe those imports as part of its base. This creates a deterministic, per-worktree base commit that the main agent SHALL record and later use to compute the integration diff. + +#### Scenario: Worktree base equals main HEAD at creation time + +- **WHEN** the main agent creates a worktree for bundle `B` at commit `` +- **THEN** the worktree's base SHALL equal the main workspace HEAD at the moment of creation +- **AND** the main agent SHALL record `` as the integration base for bundle `B` + +#### Scenario: Later worktrees inherit earlier integrations + +- **WHEN** bundle `A` has been integrated into the main workspace in this run +- **AND** the main agent then creates a worktree for bundle `B` +- **THEN** bundle `B`'s worktree SHALL include bundle `A`'s imported changes as part of its base +- **AND** no auto-rebase SHALL be performed later to reconcile drift + +#### Scenario: No shared run-wide base snapshot is used + +- **WHEN** an apply run dispatches multiple `subagent-worktree` bundles +- **THEN** each worktree SHALL record its own base commit at creation time +- **AND** worktrees created at different points in the run MAY have different base commits + +### Requirement: Worktree path convention + +Every ephemeral worktree for a `subagent-worktree` bundle SHALL be created at the path `.specflow/worktrees///` relative to the repository root. This path is fixed in Phase 1 and SHALL NOT be configurable. The main agent SHALL ensure the parent directory `.specflow/worktrees//` exists before invoking `git worktree add`. + +If the path already exists and the previous worktree cannot be reclaimed (e.g., it is registered in `git worktree list` and removal fails, or it is a non-worktree directory), the main agent SHALL trigger the worktree-unavailable fail-fast behavior defined below. + +#### Scenario: Worktree is created at the conventional path + +- **WHEN** the main agent creates a worktree for bundle `B` in run `R` +- **THEN** the worktree SHALL be located at `.specflow/worktrees///` + +#### Scenario: Existing stale worktree path triggers fail-fast + +- **WHEN** `.specflow/worktrees///` already exists +- **AND** `git worktree remove` on that path fails +- **THEN** the main agent SHALL trigger worktree-unavailable fail-fast +- **AND** the worktree path SHALL NOT be silently overwritten + +### Requirement: Worktree-unavailable fail-fast + +When the main agent cannot create a usable worktree for a subagent-eligible bundle, the main agent SHALL fail-fast the entire apply. Triggering conditions include, but are not limited to: + +- `git worktree` is unavailable (git version too old or binary missing), +- the target path cannot be created due to filesystem or permission errors, +- the target path already exists and cannot be reclaimed, +- `git worktree add` exits non-zero for any other reason. + +On any such trigger, the main agent SHALL: + +1. Surface an actionable error message that names the attempted worktree path and the underlying git/OS error. +2. Leave the run in `apply_draft`. +3. NOT silently fall back to `inline-main` for the failing bundle or for any other bundle. +4. NOT dispatch any further subagent in the current window, chunk, or subsequent windows. + +#### Scenario: git worktree add failure aborts the apply + +- **WHEN** the main agent invokes `git worktree add` for bundle `B` and the command exits non-zero +- **THEN** the main agent SHALL surface the git error with the attempted worktree path +- **AND** the run SHALL remain in `apply_draft` +- **AND** no subsequent subagent SHALL be dispatched + +#### Scenario: No silent degradation to inline-main + +- **WHEN** worktree creation fails for a `subagent-worktree` bundle +- **THEN** the main agent SHALL NOT run that bundle inline +- **AND** the main agent SHALL NOT warn-then-continue + +### Requirement: Subagent returns structured result with produced_artifacts + +The subagent SHALL return a structured result containing at minimum: + +- `status`: `"success"` | `"failure"` +- `produced_artifacts`: a set of repo-relative file paths that the subagent intended to create, modify, delete, rename (with new path), or change mode on. Over-declared entries (paths listed that do not appear in the diff) are permitted. +- `error`: human-readable message plus any structured diagnostic fields, required when `status = "failure"`; forbidden when `status = "success"`. + +The main agent SHALL parse this result and use it as the sole source of truth for integration validation on `status = "success"`. On `status = "failure"`, the main agent SHALL skip integration and record the failure per the bundle-status contract below. + +#### Scenario: Successful subagent result includes produced_artifacts + +- **WHEN** a subagent returns `status: "success"` +- **THEN** the result SHALL include a `produced_artifacts` field +- **AND** `produced_artifacts` SHALL be a set of repo-relative paths + +#### Scenario: Failed subagent result includes error + +- **WHEN** a subagent returns `status: "failure"` +- **THEN** the result SHALL include a non-empty `error` field +- **AND** the main agent SHALL NOT compute a diff or attempt integration for that bundle + +### Requirement: Main-agent integration authority — diff inspection and artifact cross-check + +Before a `subagent-worktree` bundle can reach `done`, the main agent SHALL perform integration validation on the worktree. Integration validation in Phase 1 is limited to **diff inspection + produced-artifact cross-check**; running lint, tests, or other side effects is explicitly OUT of scope for this requirement. + +The main agent SHALL: + +1. Compute the worktree diff via `git -C diff --binary ..HEAD` where `` is the base commit recorded at worktree-creation time. +2. Extract the set of touched paths from the diff, where: + - an added file contributes its path, + - a deleted file contributes its deleted path, + - a modified file contributes its path, + - a renamed file contributes the **new** path (not the old path), + - a mode-only change contributes its path (counts as a modification). +3. Compare the set of touched paths to `produced_artifacts`: + - Every touched path SHALL appear in `produced_artifacts`. + - Entries in `produced_artifacts` not present in the touched-path set (over-declaration) SHALL NOT cause rejection; the main agent MAY emit a warning. +4. Check the touched paths against the protected-path list (see below). +5. Check for the empty-diff-on-success condition (see below). + +If all checks pass, the main agent SHALL proceed to patch import. If any check fails, the main agent SHALL record the bundle status as `integration_rejected` per the bundle-status contract below. + +#### Scenario: Every diff path must be declared + +- **WHEN** the worktree diff touches paths `{a, b, c}` and `produced_artifacts = {a, b, c}` +- **THEN** the undeclared-paths check SHALL pass + +#### Scenario: Undeclared path causes integration rejection + +- **WHEN** the worktree diff touches `{a, b, c}` but `produced_artifacts = {a, b}` +- **THEN** the main agent SHALL reject integration +- **AND** the bundle status SHALL become `integration_rejected` + +#### Scenario: Over-declared artifact does not reject + +- **WHEN** the worktree diff touches `{a, b}` but `produced_artifacts = {a, b, c}` +- **THEN** the main agent MAY emit a warning about `c` +- **AND** the main agent SHALL NOT reject integration on this condition alone + +#### Scenario: Renamed file is matched by the new path + +- **WHEN** the worktree diff contains a rename from `old/p.ts` to `new/p.ts` +- **AND** `produced_artifacts` contains `new/p.ts` but not `old/p.ts` +- **THEN** the undeclared-paths check SHALL pass + +#### Scenario: Deletion is matched by the deleted path + +- **WHEN** the worktree diff deletes `x.ts` +- **AND** `produced_artifacts` contains `x.ts` +- **THEN** the undeclared-paths check SHALL pass + +#### Scenario: Mode-only change counts as modification + +- **WHEN** the worktree diff contains a mode-only change on `bin/run.sh` +- **AND** `produced_artifacts` contains `bin/run.sh` +- **THEN** the undeclared-paths check SHALL pass + +### Requirement: Protected-path touch causes integration rejection + +The main agent SHALL reject integration when the worktree diff touches any of the following paths: + +- `openspec/changes//task-graph.json` +- `openspec/changes//tasks.md` +- Any path under `.specflow/` + +These paths are reserved for main-agent mutation. Protected-path rejection takes precedence over the undeclared-paths check: even if a subagent declares a protected path in `produced_artifacts`, touching that path SHALL reject integration. + +#### Scenario: Touching task-graph.json rejects + +- **WHEN** the worktree diff modifies `openspec/changes//task-graph.json` +- **THEN** the main agent SHALL reject integration +- **AND** the bundle status SHALL become `integration_rejected` + +#### Scenario: Touching tasks.md rejects + +- **WHEN** the worktree diff modifies `openspec/changes//tasks.md` +- **THEN** the main agent SHALL reject integration + +#### Scenario: Touching any path under .specflow/ rejects + +- **WHEN** the worktree diff adds, modifies, or deletes a path under `.specflow/` +- **THEN** the main agent SHALL reject integration + +#### Scenario: Declaring a protected path in produced_artifacts does not bypass the check + +- **WHEN** the worktree diff modifies a protected path +- **AND** that path is listed in `produced_artifacts` +- **THEN** the main agent SHALL still reject integration + +### Requirement: Empty-diff-on-success causes integration rejection + +If a subagent returns `status: "success"` but the worktree diff is empty (no path is touched), the main agent SHALL reject integration. An empty diff paired with success indicates the subagent did not produce the work the bundle claims to have done. + +#### Scenario: Empty diff with success rejects + +- **WHEN** a subagent returns `status: "success"` +- **AND** `git -C diff --binary ..HEAD` produces no changes +- **THEN** the main agent SHALL reject integration +- **AND** the bundle status SHALL become `integration_rejected` + +### Requirement: Patch import via git apply covers all standard change types + +When all integration-validation checks pass, the main agent SHALL import the subagent's changes into the main workspace via `git -C diff --binary ..HEAD | git apply --binary` executed at the repository root. The patch-import mechanism SHALL support the full set of change types that `git diff --binary` and `git apply --binary` themselves cover: + +- file creation +- file deletion +- file modification (text content) +- file mode change +- file rename +- binary file content change + +The main agent SHALL NOT use `--3way` fallback in Phase 1. If `git apply --binary` exits non-zero at the repo root, the main agent SHALL reject integration. + +#### Scenario: Text modification applies cleanly + +- **WHEN** integration validation passes and the patch modifies a text file +- **THEN** `git apply --binary` SHALL be invoked at the repo root +- **AND** the bundle SHALL progress toward `done` on successful apply + +#### Scenario: Binary change is included in the patch + +- **WHEN** the worktree diff includes a binary file change +- **THEN** the diff SHALL be extracted with `git diff --binary` +- **AND** `git apply --binary` SHALL be invoked at the repo root + +#### Scenario: Patch-apply failure rejects integration + +- **WHEN** `git apply --binary` exits non-zero +- **THEN** the main agent SHALL reject integration +- **AND** the bundle status SHALL become `integration_rejected` +- **AND** no `--3way` retry SHALL be attempted + +### Requirement: Bundle status transition after integration + +A `subagent-worktree` bundle SHALL reach `done` only after integration validation passes and `git apply --binary` succeeds at the repo root. The main agent SHALL call `specflow-advance-bundle done` ONLY after successful patch import. + +If the subagent returned `status: "failure"`, the main agent SHALL call `specflow-advance-bundle subagent_failed`. + +If the subagent returned `status: "success"` but integration validation (any reason: undeclared path, protected-path touch, empty diff, patch-apply failure) rejected, the main agent SHALL call `specflow-advance-bundle integration_rejected`. + +#### Scenario: Successful path advances to done only after integration + +- **WHEN** a subagent returns `status: "success"` for bundle `B` +- **AND** all integration checks pass and `git apply --binary` succeeds +- **THEN** the main agent SHALL invoke `specflow-advance-bundle B done` + +#### Scenario: Subagent failure advances to subagent_failed + +- **WHEN** a subagent returns `status: "failure"` for bundle `B` +- **THEN** the main agent SHALL invoke `specflow-advance-bundle B subagent_failed` +- **AND** no integration, diff inspection, or patch apply SHALL be performed for `B` + +#### Scenario: Integration rejection advances to integration_rejected + +- **WHEN** a subagent returns `status: "success"` for bundle `B` +- **AND** any integration check rejects (undeclared path, protected-path touch, empty-diff-on-success, or patch-apply failure) +- **THEN** the main agent SHALL invoke `specflow-advance-bundle B integration_rejected` + +### Requirement: Worktree retention policy + +The main agent SHALL clean up worktrees based on the bundle's final status in the current apply invocation: + +- On `done`: the worktree SHALL be removed immediately via `git worktree remove `. If `git worktree remove` fails (e.g., due to uncommitted subagent changes that should already have been imported), the main agent SHALL surface the error but SHALL NOT revert the bundle's `done` status. +- On `subagent_failed`: the worktree SHALL be retained at its path at `.specflow/worktrees///`. +- On `integration_rejected`: the worktree SHALL be retained at its path. + +Retention behavior in Phase 1 is fixed and SHALL NOT be configurable. `/specflow.fix_apply` and manual inspection SHALL use the retained worktree path to diagnose failures. + +#### Scenario: Successful bundle removes its worktree + +- **WHEN** bundle `B` reaches `done` +- **THEN** `git worktree remove .specflow/worktrees///` SHALL be invoked +- **AND** the worktree SHALL no longer appear in `git worktree list` + +#### Scenario: Failed subagent retains worktree + +- **WHEN** bundle `B` reaches `subagent_failed` +- **THEN** the worktree at `.specflow/worktrees///` SHALL remain +- **AND** the worktree SHALL still appear in `git worktree list` + +#### Scenario: Integration rejection retains worktree + +- **WHEN** bundle `B` reaches `integration_rejected` +- **THEN** the worktree at `.specflow/worktrees///` SHALL remain + +#### Scenario: Retention policy is not configurable in Phase 1 + +- **WHEN** an operator attempts to override retention via config in Phase 1 +- **THEN** the retention behavior SHALL be the fixed rule above +- **AND** no config key SHALL alter cleanup on success or retention on failure + diff --git a/openspec/specs/bundle-subagent-execution/spec.md b/openspec/specs/bundle-subagent-execution/spec.md index 3738e07..2a1b856 100644 --- a/openspec/specs/bundle-subagent-execution/spec.md +++ b/openspec/specs/bundle-subagent-execution/spec.md @@ -183,33 +183,123 @@ This preserves the existing `task-planner` contract that `specflow-advance-bundl ### Requirement: Fail-fast on subagent failure settles chunk then stops -The dispatcher SHALL fail fast on any subagent failure in the current chunk, but SHALL first drain the chunk so partial successes are recorded before stopping. When any subagent in the current chunk returns `status: "failure"`: +The dispatcher SHALL fail fast on any subagent failure or integration rejection in the current chunk, but SHALL first drain the chunk so partial successes are recorded before stopping. When any subagent in the current chunk returns `status: "failure"`, or any `subagent-worktree` bundle in the chunk is rejected during main-agent integration: -1. The main agent SHALL wait for every other subagent in the same chunk to settle (return `"success"` or `"failure"`). This ensures partial successes in the chunk are recorded and produced artifacts are not lost. -2. For each sibling subagent that returned `"success"`, the main agent SHALL invoke `specflow-advance-bundle done`. -3. The failed bundle SHALL remain in `in_progress`. The main agent SHALL NOT attempt to transition it to `done`, `pending`, or any other status as part of the fail-fast handling. +1. The main agent SHALL wait for every other subagent in the same chunk to settle (return `"success"` or `"failure"`) and for each success to be processed through integration. This ensures partial successes in the chunk are recorded and produced artifacts are not lost. +2. For each sibling subagent whose integration succeeded, the main agent SHALL invoke `specflow-advance-bundle done`. +3. The failing bundle SHALL be transitioned via `specflow-advance-bundle` to the appropriate terminal-for-this-invocation status, per `apply-worktree-integration`: + - `subagent_failed` when the subagent returned `status: "failure"`. + - `integration_rejected` when the subagent returned `status: "success"` but main-agent integration validation or patch-apply rejected. 4. After all siblings have settled and their status transitions have been applied, the main agent SHALL STOP the apply immediately. It SHALL NOT begin the next chunk or the next window. -5. The run SHALL remain in `apply_draft`. The main agent SHALL surface the failing subagent's `error` field to the user and document recovery paths (`/specflow.fix_apply` or manual intervention). +5. The run SHALL remain in `apply_draft`. The main agent SHALL surface the failure reason to the user (the subagent's `error` for `subagent_failed`; the specific integration-rejection cause for `integration_rejected`) and document recovery paths (`/specflow.fix_apply` using the retained worktree, or manual intervention). This behavior is consistent with the existing `specflow-advance-bundle` fail-fast contract: CLI-mandatory status transitions remain serial through the main agent, no auto-retry is introduced, and no un-dispatched bundles are silently promoted. #### Scenario: One failure in a chunk records siblings then stops -- **WHEN** a chunk contains subagents X, Y, Z where X returns `"failure"`, Y returns `"success"`, Z returns `"success"` (order-independent) +- **WHEN** a chunk contains subagents X, Y, Z where X returns `"failure"`, Y returns `"success"` (integration ok), Z returns `"success"` (integration ok) - **THEN** the main agent SHALL invoke `specflow-advance-bundle Y done` and `specflow-advance-bundle Z done` -- **AND** the main agent SHALL NOT invoke `specflow-advance-bundle` for X beyond the earlier `in_progress` transition -- **AND** the main agent SHALL STOP the apply after recording Y and Z +- **AND** the main agent SHALL invoke `specflow-advance-bundle X subagent_failed` +- **AND** the main agent SHALL STOP the apply after recording Y, Z, and X - **AND** subsequent chunks and windows SHALL NOT be dispatched -#### Scenario: Failing bundle remains in_progress after fail-fast +#### Scenario: Integration rejection in chunk settles siblings then stops + +- **WHEN** a chunk contains subagents X, Y where X returns `"success"` but integration is rejected, and Y returns `"success"` with integration ok +- **THEN** the main agent SHALL invoke `specflow-advance-bundle Y done` +- **AND** the main agent SHALL invoke `specflow-advance-bundle X integration_rejected` +- **AND** the main agent SHALL STOP the apply after recording both transitions +- **AND** subsequent chunks and windows SHALL NOT be dispatched + +#### Scenario: Failing bundle settles to a terminal-for-invocation status - **WHEN** subagent for bundle X returns `"failure"` -- **THEN** bundle X SHALL remain in `in_progress` in `task-graph.json` after the apply stops +- **THEN** bundle X SHALL have status `"subagent_failed"` in `task-graph.json` after the apply stops - **AND** the run SHALL remain in `apply_draft` -#### Scenario: Fail-fast surfaces subagent error to the user +#### Scenario: Integration-rejected bundle settles to integration_rejected + +- **WHEN** subagent for bundle X returns `"success"` but main-agent integration rejects +- **THEN** bundle X SHALL have status `"integration_rejected"` in `task-graph.json` after the apply stops +- **AND** the run SHALL remain in `apply_draft` + +#### Scenario: Fail-fast surfaces the failure reason to the user - **WHEN** a subagent returns `"failure"` with `error: ""` - **THEN** the main agent SHALL surface `` to the user -- **AND** the guide SHALL cite `/specflow.fix_apply` and manual intervention as recovery paths +- **AND** the guide SHALL cite `/specflow.fix_apply` (using the retained worktree) and manual intervention as recovery paths + +- **WHEN** an integration is rejected for a specific reason (undeclared path, protected-path touch, empty-diff-on-success, or patch-apply failure) +- **THEN** the main agent SHALL surface the specific reason to the user +- **AND** the guide SHALL cite `/specflow.fix_apply` and manual worktree inspection as recovery paths + +### Requirement: Bundle execution mode is derived from subagent-eligibility + +The dispatcher SHALL assign each bundle exactly one of two execution modes when `/specflow.apply` begins a window: + +- `inline-main`: the bundle is implemented directly by the main agent in the primary workspace. +- `subagent-worktree`: the bundle is dispatched to a subagent running inside a dedicated ephemeral git worktree, as defined by the `apply-worktree-integration` capability. + +The mode assignment rule SHALL be: + +- A bundle classified as **subagent-eligible** (per the existing "Bundle subagent-eligibility is derived from size_score" requirement) SHALL be assigned `subagent-worktree`. +- A bundle classified as **inline-only** SHALL be assigned `inline-main`. + +No third execution mode SHALL be introduced in this phase. In particular, a dispatched subagent without an isolated worktree (historically `subagent-shared`) SHALL NOT be a supported mode. + +Dispatch signals SHALL remain limited to the existing eligibility rule (`apply.subagent_dispatch.enabled = true` and `size_score > threshold`). No additional signals such as side-effect risk, lockfile/codegen touches, or changed-path count SHALL influence mode assignment in this phase. + +#### Scenario: Eligible bundle routes to subagent-worktree + +- **WHEN** dispatch is enabled and a bundle has `size_score = 8` and `threshold = 5` +- **THEN** the bundle SHALL be assigned execution mode `subagent-worktree` + +#### Scenario: Ineligible bundle routes to inline-main + +- **WHEN** a bundle is classified as inline-only (for any reason: dispatch disabled, `size_score <= threshold`, missing `size_score`, or task-graph.json absent) +- **THEN** the bundle SHALL be assigned execution mode `inline-main` + +#### Scenario: Subagent-shared is not a supported mode + +- **WHEN** the dispatcher assigns execution mode +- **THEN** the set of possible modes SHALL be exactly `{"inline-main", "subagent-worktree"}` +- **AND** a dispatched subagent executing without a dedicated worktree SHALL NOT occur + +#### Scenario: No extra dispatch signals influence mode + +- **WHEN** the dispatcher assigns execution mode for a bundle +- **THEN** the decision SHALL depend only on the existing subagent-eligibility rule +- **AND** signals such as side-effect risk, lockfile touches, or changed-path count SHALL NOT be consulted in this phase + +### Requirement: Bundle `done` requires main-agent integration success for subagent-worktree mode + +For every bundle assigned execution mode `subagent-worktree`, the main agent SHALL NOT invoke `specflow-advance-bundle done` on a subagent `status: "success"` alone. The main agent SHALL first run the integration authority contract defined in `apply-worktree-integration` (diff inspection, artifact cross-check, protected-path check, empty-diff-on-success check, and patch-apply). + +A `subagent-worktree` bundle SHALL reach `done` if and only if: + +1. The subagent returned `status: "success"`, AND +2. Integration validation passed, AND +3. `git apply --binary` at the repo root exited zero. + +If any of 1–3 fails, the bundle SHALL transition to one of the new terminal-for-this-invocation statuses defined in `task-planner` (`subagent_failed` for failed 1; `integration_rejected` for failed 2 or 3), per `apply-worktree-integration`. The main agent SHALL NOT silently record `done` on an unverified or unimported subagent success. + +For `inline-main` bundles, this requirement does NOT apply; inline bundles reach `done` under the existing completion rules. + +#### Scenario: Subagent success alone does not reach done + +- **WHEN** a `subagent-worktree` bundle's subagent returns `status: "success"` +- **AND** integration validation or patch-apply has not yet been executed +- **THEN** the main agent SHALL NOT invoke `specflow-advance-bundle ... done` + +#### Scenario: Done is reached only after successful integration + +- **WHEN** a `subagent-worktree` bundle's subagent returns `status: "success"` +- **AND** integration validation passes and `git apply --binary` succeeds +- **THEN** the main agent SHALL invoke `specflow-advance-bundle ... done` + +#### Scenario: Inline-main completion rules are unchanged + +- **WHEN** a bundle is assigned `inline-main` +- **THEN** this requirement SHALL NOT apply +- **AND** the bundle SHALL reach `done` under the existing inline completion rules diff --git a/openspec/specs/task-planner/spec.md b/openspec/specs/task-planner/spec.md index db282a0..1eedaf0 100644 --- a/openspec/specs/task-planner/spec.md +++ b/openspec/specs/task-planner/spec.md @@ -19,16 +19,20 @@ Each `Bundle` object SHALL have the following fields: - `depends_on`: array of bundle IDs representing soft dependencies (dependent bundle MAY start when dependency's output artifacts are available, not necessarily when dependency is fully complete) - `inputs`: array of artifact references the bundle consumes - `outputs`: array of artifact references the bundle produces -- `status`: enum of `"pending"` | `"in_progress"` | `"done"` | `"skipped"` +- `status`: enum of `"pending"` | `"in_progress"` | `"done"` | `"skipped"` | `"subagent_failed"` | `"integration_rejected"` - `tasks`: array of `Task` objects within the bundle - `owner_capabilities`: array of baseline spec names (from `openspec/specs/`) indicating which spec domain the bundle belongs to - `size_score`: **optional** non-negative integer. When present, it SHALL equal `bundle.tasks.length` at the time of graph generation. When absent, downstream consumers (notably `bundle-subagent-execution`) SHALL treat the bundle as having an undefined `size_score` rather than substituting a default value. +The two values `"subagent_failed"` and `"integration_rejected"` are introduced for the apply-class workflow when a subagent-worktree dispatched bundle cannot reach `done` (see `apply-worktree-integration`). They are **terminal within a single apply invocation** — the apply SHALL fail-fast and stop — but are **not terminal at the run level**; `/specflow.fix_apply` or an operator-initiated reset MAY transition the bundle back to `"pending"` in a subsequent invocation. They SHALL NOT be generated by the task-graph generator (`generateTaskGraph`); they exist only as writable apply-phase statuses. + Each `Task` object SHALL have at minimum: - `id`: unique identifier within the bundle - `title`: task description - `status`: enum of `"pending"` | `"in_progress"` | `"done"` | `"skipped"` +Child-task statuses SHALL remain the original four values. The new bundle-only statuses (`"subagent_failed"`, `"integration_rejected"`) SHALL NOT appear on child tasks. + #### Scenario: Valid task graph conforms to schema - **WHEN** a task graph JSON document is validated against the `TaskGraph` schema @@ -65,6 +69,28 @@ Each `Task` object SHALL have at minimum: - **THEN** validation SHALL pass - **AND** the graph SHALL be usable by the apply phase +#### Scenario: subagent_failed is a valid bundle status + +- **WHEN** a task graph has a bundle with `status = "subagent_failed"` +- **THEN** validation SHALL pass + +#### Scenario: integration_rejected is a valid bundle status + +- **WHEN** a task graph has a bundle with `status = "integration_rejected"` +- **THEN** validation SHALL pass + +#### Scenario: Child task cannot use new bundle-only statuses + +- **WHEN** a task graph is validated +- **AND** any `tasks[*].status` is `"subagent_failed"` or `"integration_rejected"` +- **THEN** validation SHALL fail + +#### Scenario: generateTaskGraph does not emit new statuses + +- **WHEN** `generateTaskGraph` produces a new task graph +- **THEN** every bundle SHALL have `status = "pending"` +- **AND** no bundle SHALL be emitted with `"subagent_failed"` or `"integration_rejected"` + ### Requirement: Task graph is generated from design.md via LLM-based inference The system SHALL provide a `generateTaskGraph(designContent, changeId, specNames)` function that uses LLM-based inference to analyze `design.md` content and produce a `TaskGraph` JSON document. @@ -174,6 +200,12 @@ The apply phase SHALL update bundle and task status in `task-graph.json` after e - `"pending"` → `"in_progress"`: when bundle execution begins - `"in_progress"` → `"done"`: when bundle completion check passes - `"pending"` → `"skipped"`: when explicitly skipped by user or system +- `"in_progress"` → `"subagent_failed"`: when a `subagent-worktree` bundle's subagent returned `status: "failure"` +- `"in_progress"` → `"integration_rejected"`: when a `subagent-worktree` bundle's subagent returned `status: "success"` but main-agent integration validation or patch-apply rejected the result +- `"subagent_failed"` → `"pending"`: allowed only via `/specflow.fix_apply` or an explicit operator reset; not permitted during normal apply-class transitions +- `"integration_rejected"` → `"pending"`: allowed only via `/specflow.fix_apply` or an explicit operator reset; not permitted during normal apply-class transitions + +All other transitions SHALL be rejected as invalid. In particular, a bundle in `"subagent_failed"` or `"integration_rejected"` SHALL NOT transition directly to `"done"`, `"in_progress"`, or `"skipped"` without first returning to `"pending"` via a reset. The system SHALL provide `updateBundleStatus(taskGraph, bundleId, newStatus)` that returns a new `TaskGraph` with the specified bundle's status updated. The original task graph SHALL NOT be mutated. @@ -183,13 +215,13 @@ When the new bundle status is a **terminal** status (`"done"` or `"skipped"`), ` Normalization applies unconditionally regardless of the prior child status (including children that already hold a different terminal status). The bundle is the authoritative execution unit; per-task status is informational. A bundle with an empty `tasks` array is a no-op with respect to child coercion; the terminal bundle status is still applied. -Normalization applies **only on terminal transitions**. Non-terminal transitions (`pending → in_progress`, and any other transition whose target is `pending` or `in_progress`) SHALL NOT modify child task statuses. +Normalization applies **only on terminal transitions to `"done"` or `"skipped"`**. Non-terminal transitions (`pending → in_progress`, `pending → pending`) SHALL NOT modify child task statuses. Transitions to the new statuses `"subagent_failed"` and `"integration_rejected"` SHALL also NOT modify child task statuses; child statuses at the time of failure are informational and SHALL be preserved as-is so that `/specflow.fix_apply` can inspect what was in flight. -The returned `TaskGraph` SHALL contain both the bundle status change and all child coercions as a single in-memory update. When the caller persists `task-graph.json`, it SHALL be written atomically (e.g., write-to-temp + rename) so that the persisted graph is either the pre-update state or the fully normalized post-update state — never a mismatched intermediate state. +The returned `TaskGraph` SHALL contain both the bundle status change and all child coercions (when applicable) as a single in-memory update. When the caller persists `task-graph.json`, it SHALL be written atomically (e.g., write-to-temp + rename) so that the persisted graph is either the pre-update state or the fully normalized post-update state — never a mismatched intermediate state. Whenever normalization actually changes a child task's status (from a value different from the target terminal status), the apply path SHALL emit a structured audit log entry containing at minimum: `bundle_id`, `task_id`, `from_status`, and `to_status`. Coercions that do not change a child's status (the child already matched the bundle's terminal status) SHALL NOT emit a log entry. -After status update, `tasks.md` SHALL be re-rendered from the normalized `TaskGraph` returned by `updateBundleStatus` (never from an unnormalized intermediate graph) to keep the human-readable view in sync. +After status update, `tasks.md` SHALL be re-rendered from the normalized `TaskGraph` returned by `updateBundleStatus` (never from an unnormalized intermediate graph) to keep the human-readable view in sync. Bundles in the new statuses `"subagent_failed"` and `"integration_rejected"` SHALL be rendered with a distinct visual marker so readers can identify failed bundles at a glance; the exact rendering is implementation-defined but SHALL differ from `"pending"`, `"in_progress"`, `"done"`, and `"skipped"`. #### Scenario: Status transitions from pending to in_progress @@ -204,11 +236,45 @@ After status update, `tasks.md` SHALL be re-rendered from the normalized `TaskGr - **THEN** the returned task graph SHALL have the bundle's status as `"done"` - **AND** every child task's status in that bundle SHALL be `"done"` in the returned graph +#### Scenario: Status transitions from in_progress to subagent_failed + +- **WHEN** `updateBundleStatus` is called with `("in_progress" bundle, "subagent_failed")` +- **THEN** the returned task graph SHALL have the bundle's status as `"subagent_failed"` +- **AND** every child task's status in that bundle SHALL be preserved as-is +- **AND** no child-coercion audit log entry SHALL be emitted + +#### Scenario: Status transitions from in_progress to integration_rejected + +- **WHEN** `updateBundleStatus` is called with `("in_progress" bundle, "integration_rejected")` +- **THEN** the returned task graph SHALL have the bundle's status as `"integration_rejected"` +- **AND** every child task's status in that bundle SHALL be preserved as-is +- **AND** no child-coercion audit log entry SHALL be emitted + +#### Scenario: Status transitions from subagent_failed back to pending via reset + +- **WHEN** `updateBundleStatus` is called with `("subagent_failed" bundle, "pending")` via `/specflow.fix_apply` or an explicit operator reset path +- **THEN** the returned task graph SHALL have the bundle's status as `"pending"` + +#### Scenario: Status transitions from integration_rejected back to pending via reset + +- **WHEN** `updateBundleStatus` is called with `("integration_rejected" bundle, "pending")` via `/specflow.fix_apply` or an explicit operator reset path +- **THEN** the returned task graph SHALL have the bundle's status as `"pending"` + #### Scenario: Invalid status transition is rejected - **WHEN** `updateBundleStatus` is called with `("done" bundle, "pending")` - **THEN** it SHALL return a typed error indicating an invalid status transition +#### Scenario: Direct transition from subagent_failed to done is rejected + +- **WHEN** `updateBundleStatus` is called with `("subagent_failed" bundle, "done")` +- **THEN** it SHALL return a typed error indicating an invalid status transition + +#### Scenario: Direct transition from integration_rejected to in_progress is rejected + +- **WHEN** `updateBundleStatus` is called with `("integration_rejected" bundle, "in_progress")` +- **THEN** it SHALL return a typed error indicating an invalid status transition + #### Scenario: Bundle transition to done normalizes pending child tasks - **WHEN** `updateBundleStatus` is called with `("in_progress" bundle, "done")` and that bundle's `tasks` contains at least one task with `status = "pending"` @@ -222,7 +288,7 @@ After status update, `tasks.md` SHALL be re-rendered from the normalized `TaskGr #### Scenario: Normalization force-coerces conflicting prior terminal child status -- **WHEN** `updateBundleStatus` is called with a terminal target status and at least one child task already holds a different terminal status (e.g., child is `"done"` and the new bundle status is `"skipped"`) +- **WHEN** `updateBundleStatus` is called with a terminal target status (`"done"` or `"skipped"`) and at least one child task already holds a different terminal status (e.g., child is `"done"` and the new bundle status is `"skipped"`) - **THEN** the returned task graph SHALL have that child's status rewritten to match the bundle's new terminal status - **AND** a structured audit log entry SHALL be emitted containing `bundle_id`, `task_id`, `from_status`, and `to_status` @@ -246,15 +312,21 @@ After status update, `tasks.md` SHALL be re-rendered from the normalized `TaskGr #### Scenario: tasks.md is re-rendered from normalized graph after terminal transition -- **WHEN** a bundle status is updated to a terminal value and `task-graph.json` is persisted +- **WHEN** a bundle status is updated to a terminal value (`"done"` or `"skipped"`) and `task-graph.json` is persisted - **THEN** `tasks.md` SHALL be re-rendered from the normalized `TaskGraph` returned by `updateBundleStatus` - **AND** the rendered checklist for that bundle SHALL show every task's checkbox state as matching the bundle's terminal status +#### Scenario: tasks.md renders subagent_failed and integration_rejected with a distinct marker + +- **WHEN** `tasks.md` is re-rendered for a graph containing a bundle with `status = "subagent_failed"` or `status = "integration_rejected"` +- **THEN** the rendered bundle section SHALL carry a visible marker that differs from `"pending"`, `"in_progress"`, `"done"`, and `"skipped"` +- **AND** each child task's rendered checkbox SHALL reflect its preserved informational status (no coercion) + #### Scenario: Atomic persistence avoids mismatched intermediate state -- **WHEN** `task-graph.json` is written after a terminal bundle transition +- **WHEN** `task-graph.json` is written after a bundle transition (including transitions to `"subagent_failed"` or `"integration_rejected"`) - **THEN** the persistence path SHALL use an atomic write (e.g., write-to-temp + rename) so that a concurrent reader observes either the pre-update graph or the fully normalized post-update graph -- **AND** the persisted file SHALL NOT contain a bundle whose status is terminal while any of its child task statuses disagree +- **AND** the persisted file SHALL NOT contain a bundle whose status is terminal (`"done"` or `"skipped"`) while any of its child task statuses disagree ### Requirement: Legacy fallback supports changes without task graph diff --git a/src/bin/specflow-advance-bundle.ts b/src/bin/specflow-advance-bundle.ts index f7eb764..2e14687 100644 --- a/src/bin/specflow-advance-bundle.ts +++ b/src/bin/specflow-advance-bundle.ts @@ -18,6 +18,8 @@ const VALID_STATUSES: readonly BundleStatus[] = [ "in_progress", "done", "skipped", + "subagent_failed", + "integration_rejected", ]; interface DieContext { @@ -56,11 +58,16 @@ function isBundleStatus(value: string): value is BundleStatus { } async function main(): Promise { - const args = process.argv.slice(2); + const allArgs = process.argv.slice(2); + // Extract --allow-reset flag; remaining positional args are CHANGE_ID BUNDLE_ID NEW_STATUS. + // Only /specflow.fix_apply or an explicit operator reset flow should pass this flag. + const allowReset = allArgs.includes("--allow-reset"); + const args = allArgs.filter((a) => a !== "--allow-reset"); if (args.length < 3) { die( - "Usage: specflow-advance-bundle . " + - `NEW_STATUS must be one of: ${VALID_STATUSES.join(" | ")}`, + "Usage: specflow-advance-bundle [--allow-reset]. " + + `NEW_STATUS must be one of: ${VALID_STATUSES.join(" | ")}. ` + + "--allow-reset is required for subagent_failed|integration_rejected → pending transitions.", ); } @@ -106,6 +113,7 @@ async function main(): Promise { taskGraph, bundleId, newStatus: rawStatus, + allowReset, writer: { writeTaskGraph(content) { void store.write(taskGraphRef, content); diff --git a/src/lib/apply-dispatcher/execution-mode.ts b/src/lib/apply-dispatcher/execution-mode.ts new file mode 100644 index 0000000..871cb8d --- /dev/null +++ b/src/lib/apply-dispatcher/execution-mode.ts @@ -0,0 +1,42 @@ +// Per-bundle execution mode assignment for apply-worktree-isolation. +// +// Assigns one of two modes to each bundle: +// - `inline-main`: implemented directly by the main agent in the primary +// workspace. +// - `subagent-worktree`: dispatched to a subagent running inside a +// dedicated ephemeral git worktree. +// +// The rule is intentionally the same as `bundle-subagent-execution`'s +// subagent-eligibility test: enabled + `size_score > threshold`. No +// additional signals (side-effect risk, lockfile touches, changed-path +// count) influence the decision in Phase 1. +// +// `subagent-shared` (a dispatched subagent without an isolated worktree) is +// NOT a supported mode. The enum is deliberately a two-value union. + +import type { Bundle } from "../task-planner/types.js"; +import type { DispatchConfig } from "./config.js"; + +export type BundleExecutionMode = "inline-main" | "subagent-worktree"; + +/** + * Decide which execution mode a single bundle is assigned based solely on + * its own subagent-eligibility. Returns `"subagent-worktree"` iff ALL of + * these hold: + * + * 1. `config.enabled` is true + * 2. `bundle.size_score` is defined + * 3. `bundle.size_score > config.threshold` + * + * Otherwise returns `"inline-main"`. The rule is deterministic and has no + * side effects — callers can safely evaluate it repeatedly. + */ +export function assignExecutionMode( + bundle: Bundle, + config: DispatchConfig, +): BundleExecutionMode { + if (!config.enabled) return "inline-main"; + if (bundle.size_score === undefined) return "inline-main"; + if (bundle.size_score <= config.threshold) return "inline-main"; + return "subagent-worktree"; +} diff --git a/src/lib/apply-dispatcher/index.ts b/src/lib/apply-dispatcher/index.ts index 211f8c7..e126822 100644 --- a/src/lib/apply-dispatcher/index.ts +++ b/src/lib/apply-dispatcher/index.ts @@ -30,6 +30,7 @@ export type { ChunkFailure, DispatchOutcome, RunDispatchedWindowArgs, + WorktreeCleanupWarning, } from "./orchestrate.js"; export { runDispatchedWindow } from "./orchestrate.js"; diff --git a/src/lib/apply-dispatcher/orchestrate.ts b/src/lib/apply-dispatcher/orchestrate.ts index 942693e..989d69f 100644 --- a/src/lib/apply-dispatcher/orchestrate.ts +++ b/src/lib/apply-dispatcher/orchestrate.ts @@ -1,23 +1,44 @@ // Chunked subagent orchestration — the "what the main agent does" half of -// the dispatcher. +// the dispatcher, extended by apply-worktree-isolation to create ephemeral +// per-bundle worktrees, run main-agent integration, and advance bundles to +// the new subagent_failed / integration_rejected terminal statuses. // -// Contract (D2–D4, review P1): +// Contract: // 1. Classify the window. If inline, return `{ outcome: "inline" }` without // touching any state; the caller falls back to the legacy main-agent path. // 2. For subagent-dispatched windows: preflight the ENTIRE window. If any // bundle has an unresolvable `owner_capability`, throw -// `MissingCapabilityError` before ANY `advance()` call. This is the -// P1/review-fix invariant: no bundle is ever left in `in_progress` due to -// a later bundle's missing capability. -// 3. For each chunk of size ≤ maxConcurrency (in bundle order): +// `MissingCapabilityError` before ANY `advance()` call. +// 3. If worktreeRuntime is provided, CREATE WORKTREES for every bundle in the +// current chunk BEFORE any advance() call. If any worktree creation fails, +// throw `WorktreeError` upward — the caller fails the apply fast. +// 4. For each chunk (in bundle order): // a. Assemble per-bundle ContextPackages (pure read). -// b. Advance every bundle in the chunk to `in_progress` via `advance()`. +// b. Advance every bundle in the chunk to `in_progress`. // c. Invoke subagents in parallel via `Promise.allSettled`. -// d. On each success result, advance the corresponding bundle to `done`. -// e. If ANY subagent returned failure OR threw, collect failures, STOP -// after the chunk drains. Subsequent chunks are NOT dispatched. Failed -// bundles remain in `in_progress`; successful siblings are `done`. +// d. For each result: +// - throw → advance to `subagent_failed`, retain worktree, record failure. +// - failure → advance to `subagent_failed`, retain worktree, record failure. +// - success → run main-agent integration: +// - accepted → advance to `done`, remove worktree. +// - rejected → advance to `integration_rejected`, retain worktree, +// record failure with the specific rejection cause. +// e. If ANY bundle in the chunk ended in a non-done terminal status, STOP +// after the chunk drains. Subsequent chunks are NOT dispatched. +import { + formatRejectionCause, + type IntegrationRejectionCause, + integrateBundle, + type SubagentSuccessResult, +} from "../apply-worktree/integrate.js"; +import { + createWorktree, + removeWorktree, + WorktreeError, + type WorktreeHandle, + type WorktreeRuntime, +} from "../apply-worktree/worktree.js"; import type { Bundle, BundleStatus, TaskGraph } from "../task-planner/types.js"; import { classifyWindow } from "./classify.js"; import type { DispatchConfig } from "./config.js"; @@ -30,14 +51,30 @@ export interface ChunkFailure { readonly message: string; readonly details?: unknown; }; + /** + * Set to `"subagent_failed"` when the subagent itself reported failure or + * threw, and `"integration_rejected"` when main-agent integration rejected + * a subagent success. Useful for /specflow.fix_apply to branch on the + * recovery path. + */ + readonly terminalStatus: "subagent_failed" | "integration_rejected"; + /** + * Structured rejection cause for `integration_rejected` failures; undefined + * for `subagent_failed`. + */ + readonly integrationCause?: IntegrationRejectionCause; } export type DispatchOutcome = | { readonly outcome: "inline" } - | { readonly outcome: "ok" } + | { + readonly outcome: "ok"; + readonly cleanupWarnings?: readonly WorktreeCleanupWarning[]; + } | { readonly outcome: "failed"; readonly failures: readonly ChunkFailure[]; + readonly cleanupWarnings?: readonly WorktreeCleanupWarning[]; }; export type AdvanceBundleFn = ( @@ -53,6 +90,22 @@ export interface RunDispatchedWindowArgs { readonly repoRoot: string; readonly invoke: SubagentInvoker; readonly advance: AdvanceBundleFn; + /** + * Enables subagent-worktree execution: each bundle in a subagent-dispatched + * chunk gets an ephemeral worktree, and successful subagent results go + * through main-agent integration before reaching `done`. REQUIRED for all + * subagent-dispatched windows — omitting it when the window classifies as + * subagent mode triggers a fail-fast error (`subagent-shared` is NOT a + * supported execution mode). Only inline-classified windows may proceed + * without it, since they return `{ outcome: "inline" }` before reaching + * the worktree code path. + */ + readonly worktreeRuntime?: WorktreeRuntime; + /** + * Required whenever `worktreeRuntime` is provided. Used as the parent + * directory segment under `.specflow/worktrees///`. + */ + readonly runId?: string; } function failureFromResult( @@ -64,6 +117,7 @@ function failureFromResult( error: result.error ?? { message: `Subagent for bundle '${bundleId}' returned failure without an error payload.`, }, + terminalStatus: "subagent_failed", }; } @@ -72,84 +126,256 @@ function failureFromThrow(bundleId: string, err: unknown): ChunkFailure { return { bundleId, error: { message: err.message, details: err.stack }, + terminalStatus: "subagent_failed", }; } return { bundleId, error: { message: String(err) }, + terminalStatus: "subagent_failed", + }; +} + +function failureFromIntegration( + bundleId: string, + cause: IntegrationRejectionCause, +): ChunkFailure { + return { + bundleId, + error: { message: formatRejectionCause(cause) }, + terminalStatus: "integration_rejected", + integrationCause: cause, }; } +/** + * Best-effort worktree cleanup on the success path. We only invoke this after + * the patch has been imported at main and the bundle advanced to `done`. + * Failing hard here would regress a bundle that is already complete — so we + * do NOT throw. Instead, the caller receives a warning describing the stale + * worktree so it can be surfaced to the operator (R4-F12: cleanup failures + * must not be silently swallowed, as they violate the fixed retention policy + * and can cause path collisions on rerun). + */ +async function tryRemoveWorktree( + runtime: WorktreeRuntime | undefined, + handle: WorktreeHandle, +): Promise { + if (!runtime) return undefined; + try { + removeWorktree(runtime, handle); + return undefined; + } catch (err) { + return { + bundleId: handle.bundleId, + worktreePath: handle.path, + message: + err instanceof Error + ? err.message + : `Unknown error removing worktree at ${handle.path}`, + }; + } +} + +/** + * Describes a non-fatal worktree cleanup failure on the success path. + * Callers should surface these to the operator so stale worktrees are not + * silently left behind. + */ +export interface WorktreeCleanupWarning { + readonly bundleId: string; + readonly worktreePath: string; + readonly message: string; +} + export async function runDispatchedWindow( args: RunDispatchedWindowArgs, ): Promise { - const { window, config, changeId, taskGraph, repoRoot, invoke, advance } = - args; + const { + window, + config, + changeId, + taskGraph, + repoRoot, + invoke, + advance, + worktreeRuntime, + runId, + } = args; const decision = classifyWindow(window, config); if (decision.mode === "inline") { return { outcome: "inline" }; } + // The proposal defines exactly two execution modes: `inline-main` and + // `subagent-worktree`. `subagent-shared` (dispatched subagent without an + // isolated worktree) is NOT a supported mode. If the window was classified + // as subagent-dispatched but the caller did not provide a worktreeRuntime, + // fail fast — do NOT silently fall back to the legacy shared-workspace path. + if (!worktreeRuntime) { + throw new Error( + "runDispatchedWindow: window classified as subagent-dispatched but worktreeRuntime was not provided. " + + "subagent-shared execution is not a supported mode — callers MUST provide worktreeRuntime (and runId) for all dispatched windows.", + ); + } + + if (!runId) { + throw new Error( + "runDispatchedWindow: worktreeRuntime provided without runId; runId is required for .specflow/worktrees// layout.", + ); + } + // P1: validate every bundle in the window BEFORE any mutation. This call // throws `MissingCapabilityError` on the first unresolvable capability; the // error propagates to the caller with no bundle state changed. preflightWindow(window, changeId, repoRoot); + const allCleanupWarnings: WorktreeCleanupWarning[] = []; + for (const chunk of decision.chunks) { - // Assemble context packages up-front (pure reads). If assembly throws - // despite preflight (e.g., race with a concurrent filesystem edit), the - // error propagates and no `in_progress` transition occurs for this chunk. - const packages = chunk.map((b) => - assembleContextPackage(b, changeId, taskGraph, repoRoot), - ); + // Create worktrees up-front (BEFORE any advance) so worktree-add errors + // are surfaced as apply fail-fast without leaving any bundle in + // `in_progress` with no worktree. If worktreeRuntime is absent, this + // loop degenerates to no-op and the legacy direct-subagent path applies. + const handles = new Map(); + if (worktreeRuntime && runId) { + for (const b of chunk) { + try { + handles.set(b.id, createWorktree(worktreeRuntime, runId, b.id)); + } catch (err) { + // Roll back any worktrees we just created in this chunk so a + // half-initialized chunk isn't left behind for /specflow.fix_apply + // to untangle. Cleanup is best-effort — if it fails, the caller + // still sees the fail-fast error that triggered rollback. + for (const handle of handles.values()) { + try { + removeWorktree(worktreeRuntime, handle); + } catch { + // ignore + } + } + if (err instanceof WorktreeError) { + throw err; + } + throw new WorktreeError( + `Unexpected error creating worktree for bundle '${b.id}': ${err instanceof Error ? err.message : String(err)}`, + { operation: "create-unexpected" }, + ); + } + } + } - // Advance each bundle in the chunk to `in_progress`, serialized through - // the main agent. Any advance failure aborts the chunk immediately. - for (let i = 0; i < chunk.length; i++) { - await advance(chunk[i]!.id, "in_progress"); + // R3-F08: wrap the pre-subagent phase (context assembly + in_progress + // transitions) in a try-catch so that worktrees created above are cleaned + // up if any of these steps throw. Without this, a race in + // `assembleContextPackage` or a failing `advance(in_progress)` would + // leave orphan worktrees on disk — causing path collisions on retry. + let packages: ReturnType[]; + try { + // Assemble context packages up-front (pure reads). If assembly throws + // despite preflight (e.g., race with a concurrent filesystem edit), + // the error propagates and no `in_progress` transition occurs. + packages = chunk.map((b) => + assembleContextPackage(b, changeId, taskGraph, repoRoot), + ); + + // Advance each bundle in the chunk to `in_progress`, serialized + // through the main agent. Any advance failure aborts the chunk. + for (let i = 0; i < chunk.length; i++) { + await advance(chunk[i]!.id, "in_progress"); + } + } catch (err) { + // Best-effort cleanup of all worktrees created for this chunk. + for (const handle of handles.values()) { + try { + removeWorktree(worktreeRuntime, handle); + } catch { + // ignore — the original error is more important + } + } + throw err; } // Fire all subagents in the chunk in parallel. `allSettled` drains the // chunk even if some subagents reject. const settled = await Promise.allSettled( - packages.map((pkg) => invoke(pkg)), + packages.map((pkg, i) => { + const b = chunk[i]!; + const handle = handles.get(b.id); + return invoke(pkg, handle); + }), ); const failures: ChunkFailure[] = []; for (let i = 0; i < chunk.length; i++) { const bundle = chunk[i]!; const outcome = settled[i]!; + const handle = handles.get(bundle.id); + if (outcome.status === "rejected") { + await advance(bundle.id, "subagent_failed"); failures.push(failureFromThrow(bundle.id, outcome.reason)); continue; } const result = outcome.value; if (result.status === "failure") { + await advance(bundle.id, "subagent_failed"); failures.push(failureFromResult(bundle.id, result)); continue; } - // Success: record `done`. A thrown `advance(done)` means the CLI - // mutation failed (e.g., `specflow-advance-bundle` exited non-zero). - // The `/specflow.apply` fail-fast contract requires STOP-immediately - // on any non-zero CLI exit (R4-F08) — so we: - // 1. Record the bundle as failed. - // 2. STOP processing the rest of the chunk immediately. We do NOT - // attempt `advance(done)` for later siblings, because that would - // mask the first CLI failure behind additional mutations and - // could leave task-graph.json in a state beyond the failure. + + // Success — run the main-agent integration step. worktreeRuntime is + // guaranteed non-null by the fail-fast check at the top of this + // function (subagent-shared is not a supported mode). The handle + // MUST exist for every bundle in the chunk because worktrees are + // created up-front before any advance. + const successResult: SubagentSuccessResult = { + status: "success", + produced_artifacts: result.produced_artifacts, + }; + const integration = integrateBundle({ + runtime: worktreeRuntime, + handle: handle!, + changeId, + subagentResult: successResult, + }); + if (!integration.ok) { + await advance(bundle.id, "integration_rejected"); + failures.push(failureFromIntegration(bundle.id, integration.cause)); + // Worktree retained for diagnosis — do NOT call removeWorktree. + continue; + } try { await advance(bundle.id, "done"); } catch (err) { failures.push(failureFromThrow(bundle.id, err)); - return { outcome: "failed", failures }; + return { + outcome: "failed", + failures, + cleanupWarnings: + allCleanupWarnings.length > 0 ? allCleanupWarnings : undefined, + }; + } + const cleanupWarning = await tryRemoveWorktree(worktreeRuntime, handle!); + if (cleanupWarning) { + allCleanupWarnings.push(cleanupWarning); } } if (failures.length > 0) { - return { outcome: "failed", failures }; + return { + outcome: "failed", + failures, + cleanupWarnings: + allCleanupWarnings.length > 0 ? allCleanupWarnings : undefined, + }; } } - return { outcome: "ok" }; + return { + outcome: "ok", + cleanupWarnings: + allCleanupWarnings.length > 0 ? allCleanupWarnings : undefined, + }; } diff --git a/src/lib/apply-dispatcher/types.ts b/src/lib/apply-dispatcher/types.ts index 6c71ac8..b5e3f01 100644 --- a/src/lib/apply-dispatcher/types.ts +++ b/src/lib/apply-dispatcher/types.ts @@ -46,7 +46,24 @@ export interface SubagentResult { }; } -export type SubagentInvoker = (pkg: ContextPackage) => Promise; +/** + * apply-worktree-isolation: when a bundle is dispatched in subagent-worktree + * mode, the main agent passes the ephemeral worktree handle so the subagent + * knows where to write its implementation. The handle is `undefined` for + * test fixtures and for any non-worktree dispatch path (none supported in + * production post-feature; `subagent-shared` is NOT a supported mode). + */ +export interface SubagentWorktreeHandle { + readonly path: string; + readonly baseSha: string; + readonly runId: string; + readonly bundleId: string; +} + +export type SubagentInvoker = ( + pkg: ContextPackage, + worktree?: SubagentWorktreeHandle, +) => Promise; /** * Window-level dispatch decision. `subagent` mode is used when at least one diff --git a/src/lib/apply-worktree/integrate.ts b/src/lib/apply-worktree/integrate.ts new file mode 100644 index 0000000..60d3c3c --- /dev/null +++ b/src/lib/apply-worktree/integrate.ts @@ -0,0 +1,177 @@ +// Main-agent integration authority for apply-worktree-isolation. +// +// Takes a subagent's success result plus a worktree handle and decides +// whether to import the worktree's changes into the main workspace or reject +// them. The only observable side effect on the accept path is a successful +// `git apply --binary` at the repo root. The reject path surfaces a +// structured cause and leaves the main workspace untouched so the caller can +// transition the bundle to `integration_rejected`. +// +// Phase 1 rejection causes (all exhaustively enumerated): +// - empty_diff_on_success (subagent claimed success but produced no diff) +// - protected_path (diff touches task-graph.json / tasks.md / .specflow/**) +// - undeclared_path (diff touches a path not in produced_artifacts) +// - patch_apply_failure (git apply --binary exited non-zero) +// +// Checks are ordered so that the MOST SEVERE cause is surfaced first: +// empty-diff (subagent contract violation) → protected-path (main-agent-only +// invariant) → undeclared-path (subagent contract violation) → +// patch-apply-failure (last, since it requires actually invoking apply). + +import { + computeDiff, + importPatch, + isProtectedPath, + listTouchedPaths, + WorktreeError, + type WorktreeHandle, + type WorktreeRuntime, +} from "./worktree.js"; + +export type IntegrationRejectionCause = + | { + readonly kind: "empty_diff_on_success"; + } + | { + readonly kind: "protected_path"; + readonly path: string; + } + | { + readonly kind: "undeclared_path"; + readonly path: string; + } + | { + readonly kind: "patch_apply_failure"; + readonly stderr: string; + }; + +export type IntegrationOutcome = + | { + readonly ok: true; + readonly touched: readonly string[]; + readonly overDeclared: readonly string[]; + } + | { + readonly ok: false; + readonly cause: IntegrationRejectionCause; + readonly touched: readonly string[]; + }; + +export interface SubagentSuccessResult { + readonly status: "success"; + readonly produced_artifacts: readonly string[]; +} + +export interface IntegrateBundleOptions { + readonly runtime: WorktreeRuntime; + readonly handle: WorktreeHandle; + readonly changeId: string; + readonly subagentResult: SubagentSuccessResult; +} + +/** + * Run the main-agent integration authority contract for a single bundle. + * + * This function does NOT handle subagent failures — the caller SHALL NOT + * invoke `integrateBundle` when the subagent returned `status: "failure"`; + * the bundle transitions straight to `subagent_failed` instead. This + * function ONLY decides accept-vs-reject for subagent successes. + * + * On accept (`ok: true`), the patch has been applied at the repo root and + * the caller SHOULD advance the bundle to `done`. On reject (`ok: false`), + * the main workspace is untouched and the caller SHOULD advance the bundle + * to `integration_rejected` while retaining the worktree. + */ +export function integrateBundle( + options: IntegrateBundleOptions, +): IntegrationOutcome { + const { runtime, handle, changeId, subagentResult } = options; + + // Step 1: compute diff. + const patch = computeDiff(runtime, handle); + + // Step 2: if the subagent returned success but the diff is empty, that + // means the subagent claimed to produce work but the worktree shows no + // changes. Contract violation — reject with empty_diff_on_success. + if (patch.length === 0) { + return { + ok: false, + cause: { kind: "empty_diff_on_success" }, + touched: [], + }; + } + + // Step 3: enumerate touched paths and the declared-artifacts set. + const touched = listTouchedPaths(patch); + const touchedArr = [...touched].sort(); + const declared = new Set(subagentResult.produced_artifacts); + + // Step 4: protected-path check. Runs BEFORE the undeclared-paths check so + // a subagent that declares a protected path in `produced_artifacts` still + // gets rejected (declaration does not bypass the invariant). + for (const path of touchedArr) { + if (isProtectedPath(path, changeId)) { + return { + ok: false, + cause: { kind: "protected_path", path }, + touched: touchedArr, + }; + } + } + + // Step 5: undeclared-paths check. Every touched path SHALL be in the + // declared set. Over-declaration (declared but not touched) is a warning, + // not a rejection, per spec. + for (const path of touchedArr) { + if (!declared.has(path)) { + return { + ok: false, + cause: { kind: "undeclared_path", path }, + touched: touchedArr, + }; + } + } + + const overDeclared = [...declared].filter((p) => !touched.has(p)).sort(); + + // Step 6: apply the patch at the repo root. Any non-zero exit from + // `git apply --binary` is a patch-apply failure — we reject rather than + // attempt a --3way retry, per the Phase 1 spec. + try { + importPatch(runtime, patch); + } catch (err) { + if (err instanceof WorktreeError && err.cause.operation === "apply") { + return { + ok: false, + cause: { + kind: "patch_apply_failure", + stderr: err.cause.stderr?.trim() ?? err.message, + }, + touched: touchedArr, + }; + } + // Unexpected error class — rethrow so the caller's top-level handler + // can surface it. This is distinct from a recognized patch-apply + // failure; rethrowing preserves the stack trace for debugging. + throw err; + } + + return { ok: true, touched: touchedArr, overDeclared }; +} + +/** + * Human-readable one-liner for an integration rejection cause. Used by the + * apply orchestrator when surfacing the failure to the user. + */ +export function formatRejectionCause(cause: IntegrationRejectionCause): string { + switch (cause.kind) { + case "empty_diff_on_success": + return "empty_diff_on_success: subagent returned status: success but the worktree diff is empty."; + case "protected_path": + return `protected_path: diff touches ${cause.path}, which is reserved for main-agent mutation.`; + case "undeclared_path": + return `undeclared_path: diff touches ${cause.path}, which is not in produced_artifacts.`; + case "patch_apply_failure": + return `patch_apply_failure: git apply --binary rejected the patch — ${cause.stderr}`; + } +} diff --git a/src/lib/apply-worktree/worktree.ts b/src/lib/apply-worktree/worktree.ts new file mode 100644 index 0000000..f9608ae --- /dev/null +++ b/src/lib/apply-worktree/worktree.ts @@ -0,0 +1,761 @@ +// Git worktree helper primitives for apply-worktree-isolation. +// +// Owns the ephemeral lifecycle of a per-bundle git worktree: +// create from HEAD → (subagent runs inside) → compute diff → import patch → +// remove on success OR retain on failure. +// +// Every function is pure with respect to state outside its arguments — the +// only observable side effects are `git` invocations and filesystem changes +// under `.specflow/worktrees///`. Designed for injected +// command runners so tests can stub git without spawning it. + +import { spawnSync } from "node:child_process"; +import * as fs from "node:fs"; +import * as path from "node:path"; + +export interface WorktreeHandle { + readonly path: string; + readonly baseSha: string; + readonly runId: string; + readonly bundleId: string; +} + +export interface GitCommandResult { + readonly status: number; + readonly stdout: Buffer; + readonly stderr: string; +} + +export type GitRunner = ( + args: readonly string[], + cwd: string, +) => GitCommandResult; + +/** + * Specialized runner for commands that take a patch on stdin (currently + * only `git apply --binary`). Separated from `GitRunner` because the default + * `spawnSync` path for stdin-driven git commands differs from the stdout-only + * path, and tests need an independent injection point. + */ +export type GitApplier = (patch: Buffer, cwd: string) => GitCommandResult; + +/** + * Minimal filesystem surface the worktree helper uses. Narrower than + * `node:fs` so tests can substitute an in-memory implementation without + * having to satisfy the full PathLike-accepting signatures. + */ +export interface WorktreeFs { + existsSync(path: string): boolean; + mkdirSync(path: string, opts?: { readonly recursive?: boolean }): void; + rmSync(path: string): void; +} + +export interface WorktreeRuntime { + readonly repoRoot: string; + readonly git?: GitRunner; + readonly applyPatch?: GitApplier; + readonly fs?: WorktreeFs; +} + +const defaultFs: WorktreeFs = { + existsSync: (p) => fs.existsSync(p), + mkdirSync: (p, opts) => { + fs.mkdirSync(p, opts); + }, + rmSync: (p) => { + fs.rmSync(p, { force: true, recursive: true }); + }, +}; + +export class WorktreeError extends Error { + constructor( + message: string, + readonly cause: { + readonly operation: string; + readonly args?: readonly string[]; + readonly stderr?: string; + readonly status?: number; + }, + ) { + super(message); + this.name = "WorktreeError"; + } +} + +function defaultGit(args: readonly string[], cwd: string): GitCommandResult { + // No `encoding` so stdout comes back as a Buffer — required for binary-safe + // `git diff --binary` output. stderr is captured as text for error reporting. + const result = spawnSync("git", [...args], { + cwd, + stdio: ["ignore", "pipe", "pipe"], + }); + return { + status: result.status ?? 1, + stdout: result.stdout ?? Buffer.alloc(0), + stderr: (result.stderr ?? Buffer.alloc(0)).toString("utf8"), + }; +} + +function runGit( + runtime: WorktreeRuntime, + args: readonly string[], + cwd: string, + operation: string, +): GitCommandResult { + const runner = runtime.git ?? defaultGit; + const result = runner(args, cwd); + if (result.status !== 0) { + throw new WorktreeError( + `git ${operation} failed: ${result.stderr.trim() || `exit ${result.status}`}`, + { + operation, + args, + stderr: result.stderr, + status: result.status, + }, + ); + } + return result; +} + +export function worktreePath( + repoRoot: string, + runId: string, + bundleId: string, +): string { + return path.join(repoRoot, ".specflow", "worktrees", runId, bundleId); +} + +/** + * Create an ephemeral git worktree at `.specflow/worktrees///` + * using the main workspace's CURRENT HEAD as the base. The returned handle + * records the base SHA so later integration can compute the exact diff. + * + * Throws WorktreeError on any failure (git unavailable, path collision, + * filesystem error). Callers (dispatcher) should catch this and trigger the + * worktree-unavailable fail-fast behavior. + */ +export function createWorktree( + runtime: WorktreeRuntime, + runId: string, + bundleId: string, +): WorktreeHandle { + const wtPath = worktreePath(runtime.repoRoot, runId, bundleId); + const fsApi: WorktreeFs = runtime.fs ?? defaultFs; + + if (fsApi.existsSync(wtPath)) { + throw new WorktreeError( + `Worktree path already exists: ${wtPath}. Remove it or pick a different run/bundle id.`, + { operation: "create-precheck" }, + ); + } + + const parent = path.dirname(wtPath); + try { + fsApi.mkdirSync(parent, { recursive: true }); + } catch (err) { + throw new WorktreeError( + `Failed to create parent directory ${parent}: ${err instanceof Error ? err.message : String(err)}`, + { operation: "create-mkdir" }, + ); + } + + // Capture HEAD SHA BEFORE creating the worktree so a concurrent main-workspace + // commit between `rev-parse` and `worktree add` is surfaced rather than silently + // recorded as a stale base. `git worktree add HEAD` resolves HEAD at invocation + // time, so the worktree's base will match this SHA. + const headResult = runGit( + runtime, + ["rev-parse", "HEAD"], + runtime.repoRoot, + "rev-parse HEAD", + ); + const baseSha = headResult.stdout.toString("utf8").trim(); + + // Use --detach so the worktree doesn't pin a branch. We never want the + // ephemeral worktree to show up in `git branch` or block branch deletion. + try { + runGit( + runtime, + ["worktree", "add", "--detach", wtPath, baseSha], + runtime.repoRoot, + "worktree add", + ); + } catch (err) { + // R2-F06: wrap the error so it identifies the target worktree path. + // The proposal requires worktree-unavailable failures to surface the + // `.specflow/worktrees///` path so operators know + // which worktree collided or needs cleanup. + if (err instanceof WorktreeError) { + throw new WorktreeError( + `Failed to create worktree at ${wtPath}: ${err.message}`, + { + operation: err.cause.operation, + args: err.cause.args, + stderr: err.cause.stderr, + status: err.cause.status, + }, + ); + } + throw err; + } + + // R3-F08: wrap post-add setup (materialize + snapshot) in a try-catch so + // that a failure here self-cleans the just-added worktree. Without this, + // a materialize or snapshot error leaves an orphan worktree on disk — the + // orchestrator's rollback only cleans handles that createWorktree() + // returned, so a throw here would leak the worktree and cause a path + // collision on retry. + try { + // Materialize the main workspace's uncommitted changes (if any) into the + // new worktree. Earlier bundle imports in the same apply run land via + // `git apply` at the repo root WITHOUT a commit, so `HEAD` is stale. + // Without this step, later worktrees would start from the old committed + // tree and miss earlier-imported bundle changes — violating the proposal's + // requirement that later worktrees naturally observe earlier imports. + materializeWorkspaceState(runtime, wtPath); + + // Snapshot the materialized state as a commit in the worktree so that + // `computeDiff(baseSha)` later captures ONLY the subagent's delta — not + // the pre-existing workspace changes that were just materialized. Without + // this, `importPatch` at the repo root would double-apply the materialized + // changes (which already exist there), causing patch conflicts. + const effectiveBase = snapshotMaterializedState(runtime, wtPath, baseSha); + + return { path: wtPath, baseSha: effectiveBase, runId, bundleId }; + } catch (err) { + // Best-effort removal of the just-added worktree. If removal itself + // fails (e.g., filesystem issue), the original setup error is still + // more important — swallow the cleanup error and propagate the original. + try { + const runner = runtime.git ?? defaultGit; + runner(["worktree", "remove", "--force", wtPath], runtime.repoRoot); + } catch { + // Cleanup failed — the worktree is leaked, but the original error + // is more actionable for the caller. + } + throw err; + } +} + +/** + * Copy uncommitted changes from the main workspace into a freshly-created + * worktree. Uses `git diff HEAD` (staged + unstaged) at the repo root to + * capture all pending changes — including patches imported from earlier + * bundles in the same apply run — and applies them into the worktree via + * `git apply --binary`. + * + * R4-F10: Also captures ordinary untracked files. Plain `git diff HEAD` + * ignores untracked files entirely, so a new-but-untracked file present in + * the main workspace would be absent from the subagent worktree. We mark + * untracked files as intent-to-add (`git add -N`) before diffing, then + * reset them afterward so the main workspace index is undisturbed. + * + * If the main workspace has no uncommitted changes (empty diff), this is a + * no-op. If the apply fails, the worktree is left in its HEAD state and the + * error propagates as a `WorktreeError`. + */ +function materializeWorkspaceState( + runtime: WorktreeRuntime, + wtPath: string, +): void { + const runner = runtime.git ?? defaultGit; + + // R4-F10: Enumerate untracked files BEFORE diffing so they can be included + // in the workspace snapshot via intent-to-add. Exclude `.specflow/` since + // that directory contains worktree infrastructure (ephemeral worktrees, + // config, etc.) — it is never user content and should not be materialized. + const untrackedResult = runner( + ["ls-files", "--others", "--exclude-standard", "--exclude=.specflow/"], + runtime.repoRoot, + ); + const untrackedFiles = + untrackedResult.status === 0 + ? untrackedResult.stdout + .toString("utf8") + .split("\n") + .filter((f) => f.length > 0) + : []; + + // If there are untracked files, mark them intent-to-add so `git diff HEAD` + // includes them as new-file diffs. We reset them after diffing. + if (untrackedFiles.length > 0) { + const addResult = runner( + ["add", "-N", "--", ...untrackedFiles], + runtime.repoRoot, + ); + if (addResult.status !== 0) { + throw new WorktreeError( + `git add -N (intent-to-add) failed while materializing workspace state: ${addResult.stderr.trim() || `exit ${addResult.status}`}`, + { + operation: "materialize-intent-add", + stderr: addResult.stderr, + status: addResult.status, + }, + ); + } + } + + let diffResult: GitCommandResult; + try { + diffResult = runner( + ["diff", "--binary", "--find-renames", "HEAD"], + runtime.repoRoot, + ); + } finally { + // Always reset intent-to-add files so the main workspace index is + // undisturbed, even if `git diff` throws or fails. + if (untrackedFiles.length > 0) { + runner(["reset", "--", ...untrackedFiles], runtime.repoRoot); + } + } + + // A non-zero exit from `git diff` is unexpected (broken index, etc.) — + // surface it so the caller's fail-fast logic catches it. + if (diffResult.status !== 0) { + throw new WorktreeError( + `git diff HEAD failed while materializing workspace state: ${diffResult.stderr.trim() || `exit ${diffResult.status}`}`, + { + operation: "materialize-diff", + stderr: diffResult.stderr, + status: diffResult.status, + }, + ); + } + + if (diffResult.stdout.length === 0) { + // No uncommitted changes — worktree already matches the workspace. + return; + } + + const applier = runtime.applyPatch ?? defaultApplier; + const applyResult = applier(diffResult.stdout, wtPath); + if (applyResult.status !== 0) { + throw new WorktreeError( + `Failed to materialize workspace changes into worktree at ${wtPath}: ${applyResult.stderr.trim() || `exit ${applyResult.status}`}`, + { + operation: "materialize-apply", + stderr: applyResult.stderr, + status: applyResult.status, + }, + ); + } +} + +/** + * Commit the materialized workspace state in the worktree so that the returned + * SHA becomes the effective base for `computeDiff`. If no changes were + * materialized (empty workspace diff), returns the original `fallbackSha` + * unchanged — `computeDiff` will correctly capture only the subagent's delta. + * + * When materialized changes ARE present, snapshot failures are FATAL. Silently + * falling back to `fallbackSha` in that case would leave `baseSha` pointing + * at the pre-materialization commit while the worktree already contains the + * materialized changes — so `computeDiff` would include those changes in the + * patch and `importPatch` would double-apply them at the repo root (they + * already exist there). Throwing on snapshot failure prevents this corruption. + */ +function snapshotMaterializedState( + runtime: WorktreeRuntime, + wtPath: string, + fallbackSha: string, +): string { + const runner = runtime.git ?? defaultGit; + + // Check whether there are any materialized changes to snapshot. + const statusResult = runner( + ["diff", "--binary", "--find-renames", "HEAD"], + wtPath, + ); + if (statusResult.status !== 0) { + // diff failed — this is unexpected (broken index, etc.). If materialize + // put changes in, we can't verify, so fail fast. + throw new WorktreeError( + `Failed to check materialized state in worktree at ${wtPath}: git diff exited ${statusResult.status}`, + { + operation: "snapshot-diff-check", + stderr: statusResult.stderr, + status: statusResult.status, + }, + ); + } + if (statusResult.stdout.length === 0) { + // No materialized changes — baseSha stays as HEAD. + return fallbackSha; + } + + // Materialized changes exist — the following steps MUST succeed. If any + // step fails, `baseSha` would be incorrect (pointing at the pre- + // materialization commit while the worktree contains the changes), causing + // double-apply on patch import. Treat failures as fatal. + + // Stage all materialized changes. + const addResult = runner(["add", "-A"], wtPath); + if (addResult.status !== 0) { + throw new WorktreeError( + `Failed to stage materialized changes in worktree at ${wtPath}: git add -A exited ${addResult.status}`, + { + operation: "snapshot-stage", + stderr: addResult.stderr, + status: addResult.status, + }, + ); + } + + // Commit with a deterministic message. This commit is ephemeral — it lives + // only in the detached-HEAD worktree and is removed with it. + const commitResult = runner( + ["commit", "--allow-empty", "-m", "specflow: materialized workspace state"], + wtPath, + ); + if (commitResult.status !== 0) { + throw new WorktreeError( + `Failed to commit materialized changes in worktree at ${wtPath}: git commit exited ${commitResult.status}`, + { + operation: "snapshot-commit", + stderr: commitResult.stderr, + status: commitResult.status, + }, + ); + } + + // Capture the new HEAD as the effective base SHA. + const newHead = runner(["rev-parse", "HEAD"], wtPath); + if (newHead.status !== 0) { + throw new WorktreeError( + `Failed to resolve snapshot HEAD in worktree at ${wtPath}: git rev-parse exited ${newHead.status}`, + { + operation: "snapshot-rev-parse", + stderr: newHead.stderr, + status: newHead.status, + }, + ); + } + return newHead.stdout.toString("utf8").trim(); +} + +/** + * Compute the binary-safe diff from the worktree's base commit to its current + * HEAD (including uncommitted changes). Uses `git diff --binary` with rename + * detection so the patch includes binary content and the header classifies + * renames, deletes, mode changes, and binary blobs consistently with what + * `listTouchedPaths` parses. + * + * Returns a Buffer so NUL bytes survive intact. Feed this directly to + * `importPatch`. + */ +export function computeDiff( + runtime: WorktreeRuntime, + handle: WorktreeHandle, +): Buffer { + // Subagents MAY leave new files untracked in the worktree. Plain + // `git diff ` ignores untracked files entirely — they would be + // silently dropped from the integration patch. Mark them as intent-to-add + // first (`git add -N .`) so they appear in the diff as additions. `-N` + // adds only the path entry to the index, NOT the file content, so this is + // a minimal mutation that does not interfere with a subsequent `git diff + // --binary` (which still reads content from the working tree). + const runner = runtime.git ?? defaultGit; + const intentAdd = runner(["add", "-N", "--", "."], handle.path); + if (intentAdd.status !== 0) { + throw new WorktreeError( + `git add -N failed in worktree ${handle.path}: ${intentAdd.stderr.trim() || `exit ${intentAdd.status}`}`, + { + operation: "diff-intent-add", + stderr: intentAdd.stderr, + status: intentAdd.status, + }, + ); + } + + // `git diff --binary` uses the working tree as the RHS, so uncommitted + // changes are included. `--find-renames` activates rename detection in the + // patch header so listTouchedPaths can resolve the new path. + const result = runGit( + runtime, + ["diff", "--binary", "--find-renames", handle.baseSha], + handle.path, + "diff --binary", + ); + return result.stdout; +} + +/** + * Apply a binary-safe patch at the repository root via `git apply --binary`. + * No `--3way` fallback in Phase 1 — a non-zero exit is a hard rejection + * surfaced to the caller so the bundle can transition to integration_rejected. + * + * Throws WorktreeError on patch-apply failure. The caller distinguishes + * "patch-apply failure" from other integration-rejection causes via the + * `operation: "apply"` on the thrown error. + */ +function defaultApplier(patch: Buffer, cwd: string): GitCommandResult { + // R1-F01: `--index` ensures newly created files are also staged in the + // index. Without it, `git apply` leaves new files untracked. Later calls + // to `git diff HEAD` (in materializeWorkspaceState) only capture tracked + // files, so untracked files from earlier-bundle imports would be missed + // when creating worktrees for later bundles in the same apply run. With + // `--index`, all applied files (creates, modifications, deletes) are + // tracked and visible to `git diff HEAD`. + const result = spawnSync("git", ["apply", "--binary", "--index"], { + cwd, + input: patch, + stdio: ["pipe", "pipe", "pipe"], + }); + return { + status: result.status ?? 1, + stdout: result.stdout ?? Buffer.alloc(0), + stderr: (result.stderr ?? Buffer.alloc(0)).toString("utf8"), + }; +} + +export function importPatch(runtime: WorktreeRuntime, patch: Buffer): void { + if (patch.length === 0) { + // An empty patch means no changes to import. Short-circuit rather than + // invoking `git apply` with no input (a no-op that could otherwise be + // mistaken for success by the caller). + return; + } + const applier = runtime.applyPatch ?? defaultApplier; + const result = applier(patch, runtime.repoRoot); + if (result.status !== 0) { + throw new WorktreeError( + `git apply --binary failed: ${result.stderr.trim() || `exit ${result.status}`}`, + { + operation: "apply", + status: result.status, + stderr: result.stderr, + }, + ); + } +} + +/** + * Remove a worktree via `git worktree remove --force`. The --force flag is + * required because subagents typically leave the worktree dirty (their + * uncommitted implementation changes) — after patch import the changes live + * at the main workspace root, so the worktree's dirty state is no longer load- + * bearing and removing it is safe. Without --force, `git worktree remove` + * refuses dirty worktrees with exit 1. + * + * Callers SHOULD only invoke this after successful integration. For the + * subagent_failed and integration_rejected paths, the worktree SHALL be + * retained; do NOT call this function there. + */ +export function removeWorktree( + runtime: WorktreeRuntime, + handle: WorktreeHandle, +): void { + // --force so post-import dirty worktrees can still be cleaned up. The + // worktree's content is no longer authoritative after patch-import lands + // the changes at main. + runGit( + runtime, + ["worktree", "remove", "--force", handle.path], + runtime.repoRoot, + "worktree remove", + ); +} + +/** + * Parse `diff --git a/ b/` headers from a binary-safe patch and + * return the set of touched repo-relative paths. Rules (matching the + * apply-worktree-integration spec): + * + * - added/modified/mode-only/binary paths → the `b/` path + * - renamed paths → the `b/` path (NEW path, not old) + * - deleted paths → the `a/` path (same as `b/` in git output) + * + * In practice, every non-rename `diff --git` line has `a/X b/X` where X is + * the same path, so emitting `b/` covers add/modify/delete/mode/binary. + * Only rename/copy distinguish old vs. new, and the spec calls for NEW. + * + * Paths that contain unusual characters may be git-quoted (surrounded with + * double quotes, special chars backslash-escaped). This implementation + * recognizes and dequotes those so produced_artifacts matching works against + * "real" repo-relative paths. + */ +export function listTouchedPaths(patch: string | Buffer): ReadonlySet { + const text = typeof patch === "string" ? patch : patch.toString("utf8"); + const touched = new Set(); + + const lines = text.split("\n"); + for (const line of lines) { + if (!line.startsWith("diff --git ")) { + continue; + } + const parsed = parseDiffGitLine(line); + if (parsed) { + touched.add(parsed.newPath); + } + } + return touched; +} + +interface DiffGitPaths { + readonly oldPath: string; + readonly newPath: string; +} + +/** + * Parse a `diff --git a/ b/` header line. Handles the two git- + * supported forms: + * + * unquoted: diff --git a/path/to/file b/path/to/file + * quoted: diff --git "a/path with spaces" "b/path with spaces" + * + * Both sides must use the same form (git never mixes them). Returns the + * `a/` prefix stripped from oldPath and the `b/` prefix stripped from + * newPath, or null if the line does not parse. + */ +function parseDiffGitLine(line: string): DiffGitPaths | null { + const rest = line.slice("diff --git ".length); + + // Quoted form: "a/..." "b/..." + if (rest.startsWith('"')) { + const firstEnd = findClosingQuote(rest, 0); + if (firstEnd === -1) return null; + const firstQuoted = rest.slice(1, firstEnd); + let i = firstEnd + 1; + while (i < rest.length && rest.charCodeAt(i) === 0x20) i++; // skip spaces + if (rest.charAt(i) !== '"') return null; + const secondStart = i; + const secondEnd = findClosingQuote(rest, secondStart); + if (secondEnd === -1) return null; + const secondQuoted = rest.slice(secondStart + 1, secondEnd); + const oldPath = stripPrefix(dequoteGitPath(firstQuoted), "a/"); + const newPath = stripPrefix(dequoteGitPath(secondQuoted), "b/"); + if (oldPath === null || newPath === null) return null; + return { oldPath, newPath }; + } + + // Unquoted form: a/... b/... + // Git uses a single space as the separator. Since `a/` prefix guarantees + // no leading space in the path itself, we can split on ' b/' literally. + const sep = " b/"; + const sepIndex = rest.indexOf(sep); + if (sepIndex === -1) return null; + const aPart = rest.slice(0, sepIndex); + const bPart = rest.slice(sepIndex + 1); // starts with 'b/' + const oldPath = stripPrefix(aPart, "a/"); + const newPath = stripPrefix(bPart, "b/"); + if (oldPath === null || newPath === null) return null; + return { oldPath, newPath }; +} + +function stripPrefix(s: string, prefix: string): string | null { + return s.startsWith(prefix) ? s.slice(prefix.length) : null; +} + +/** + * Find the index of the closing `"` for a git-quoted string starting at + * `start` (which must point at an opening `"`). Handles escaped quotes + * (`\"`) and escaped backslashes (`\\`). Returns -1 if no close found. + */ +function findClosingQuote(s: string, start: number): number { + let i = start + 1; + while (i < s.length) { + const ch = s.charAt(i); + if (ch === "\\") { + i += 2; // skip the backslash AND the following char + continue; + } + if (ch === '"') { + return i; + } + i++; + } + return -1; +} + +/** + * Dequote a git-quoted path. Git uses C-style escapes inside double quotes: + * \a \b \t \n \v \f \r \" \\, plus \ for non-ASCII bytes. For + * Phase 1 we dequote the common printable escapes and pass octal escapes + * through as literal `\nnn` sequences — this is acceptable because the + * spec only requires touched paths to match `produced_artifacts` strings, + * and produced_artifacts is provided by the subagent in git-quoted form + * only if the subagent chose to quote; otherwise they are plain bytes. + */ +function dequoteGitPath(quoted: string): string { + let out = ""; + let i = 0; + while (i < quoted.length) { + const ch = quoted.charAt(i); + if (ch !== "\\") { + out += ch; + i++; + continue; + } + const next = quoted.charAt(i + 1); + switch (next) { + case '"': + case "\\": + case "/": + out += next; + i += 2; + continue; + case "a": + out += "\x07"; + i += 2; + continue; + case "b": + out += "\b"; + i += 2; + continue; + case "f": + out += "\f"; + i += 2; + continue; + case "n": + out += "\n"; + i += 2; + continue; + case "r": + out += "\r"; + i += 2; + continue; + case "t": + out += "\t"; + i += 2; + continue; + case "v": + out += "\v"; + i += 2; + continue; + default: + // Unrecognized escape (likely octal): preserve as literal. + out += ch; + i++; + } + } + return out; +} + +/** + * Check whether a given repo-relative path is on the protected path list + * defined by apply-worktree-integration. Used by the integration step to + * reject diffs that touch main-agent-only artifacts. + * + * Protected paths: + * - openspec/changes//task-graph.json + * - openspec/changes//tasks.md + * - .specflow/** (any path under .specflow) + */ +export function isProtectedPath( + repoRelativePath: string, + changeId: string, +): boolean { + const normalized = repoRelativePath.replace(/\\/g, "/"); + if (normalized === `openspec/changes/${changeId}/task-graph.json`) + return true; + if (normalized === `openspec/changes/${changeId}/tasks.md`) return true; + if (normalized.startsWith(".specflow/")) return true; + return false; +} + +export const __internal_testing = { + parseDiffGitLine, + dequoteGitPath, + findClosingQuote, +} as const; diff --git a/src/lib/task-planner/advance.ts b/src/lib/task-planner/advance.ts index e34bdb8..5bf3f6c 100644 --- a/src/lib/task-planner/advance.ts +++ b/src/lib/task-planner/advance.ts @@ -37,6 +37,12 @@ export interface AdvanceBundleOptions { readonly newStatus: BundleStatus; readonly writer: AdvanceBundleWriter; readonly logger?: AdvanceBundleLogger; + /** + * Permit reset transitions out of `subagent_failed` or `integration_rejected` + * back to `pending`. Only /specflow.fix_apply or an operator reset flow + * should set this. Apply-class workflows MUST NOT. + */ + readonly allowReset?: boolean; } export interface AdvanceBundleSuccess { @@ -70,6 +76,7 @@ export function advanceBundleStatus( options.taskGraph, options.bundleId, options.newStatus, + { allowReset: options.allowReset === true }, ); if (!result.ok) { return result; diff --git a/src/lib/task-planner/render.ts b/src/lib/task-planner/render.ts index d9ece5f..2b542d0 100644 --- a/src/lib/task-planner/render.ts +++ b/src/lib/task-planner/render.ts @@ -23,6 +23,10 @@ function bundleStatusLabel(status: string): string { return " (in progress)"; case "skipped": return " (skipped)"; + case "subagent_failed": + return " ✗ (subagent failed — retained worktree)"; + case "integration_rejected": + return " ⚠ (integration rejected — retained worktree)"; default: return ""; } diff --git a/src/lib/task-planner/schema.ts b/src/lib/task-planner/schema.ts index 08a7902..1d8bc1c 100644 --- a/src/lib/task-planner/schema.ts +++ b/src/lib/task-planner/schema.ts @@ -7,7 +7,22 @@ export interface ValidationResult { readonly errors: readonly string[]; } -const VALID_STATUSES = ["pending", "in_progress", "done", "skipped"] as const; +// Bundles MAY carry the two apply-worktree-specific statuses. Child tasks +// MAY NOT — those values are bundle-only. +const VALID_BUNDLE_STATUSES = [ + "pending", + "in_progress", + "done", + "skipped", + "subagent_failed", + "integration_rejected", +] as const; +const VALID_TASK_STATUSES = [ + "pending", + "in_progress", + "done", + "skipped", +] as const; function isNonEmptyString(v: unknown): v is string { return typeof v === "string" && v.length > 0; @@ -126,10 +141,12 @@ export function validateTaskGraph(input: unknown): ValidationResult { // Status if ( typeof bundle.status !== "string" || - !VALID_STATUSES.includes(bundle.status as (typeof VALID_STATUSES)[number]) + !VALID_BUNDLE_STATUSES.includes( + bundle.status as (typeof VALID_BUNDLE_STATUSES)[number], + ) ) { errors.push( - `${prefix}.status: expected one of ${VALID_STATUSES.join(", ")}`, + `${prefix}.status: expected one of ${VALID_BUNDLE_STATUSES.join(", ")}`, ); } @@ -204,12 +221,12 @@ export function validateTaskGraph(input: unknown): ValidationResult { } if ( typeof task.status !== "string" || - !VALID_STATUSES.includes( - task.status as (typeof VALID_STATUSES)[number], + !VALID_TASK_STATUSES.includes( + task.status as (typeof VALID_TASK_STATUSES)[number], ) ) { errors.push( - `${tPrefix}.status: expected one of ${VALID_STATUSES.join(", ")}`, + `${tPrefix}.status: expected one of ${VALID_TASK_STATUSES.join(", ")}`, ); } } diff --git a/src/lib/task-planner/status.ts b/src/lib/task-planner/status.ts index 8954679..0627895 100644 --- a/src/lib/task-planner/status.ts +++ b/src/lib/task-planner/status.ts @@ -39,23 +39,55 @@ export interface StatusUpdateError { readonly error: string; } +// Default (non-reset) transitions. These are what apply-class workflows are +// allowed to invoke. Transitions OUT of `subagent_failed` / `integration_rejected` +// require `allowReset: true` — see `updateBundleStatus` options. const VALID_TRANSITIONS: ReadonlyMap = new Map([ ["pending", ["in_progress", "skipped"]], - ["in_progress", ["done"]], + ["in_progress", ["done", "subagent_failed", "integration_rejected"]], ["done", []], ["skipped", []], + ["subagent_failed", []], + ["integration_rejected", []], ]); -const TERMINAL_BUNDLE_STATUSES: ReadonlySet = new Set([ - "done", - "skipped", -]); +// Reset-only transitions. These are allowed ONLY when the caller passes +// `allowReset: true` — i.e., from /specflow.fix_apply or an explicit operator +// reset flow. Apply-class workflows SHALL NOT pass this flag. +const RESET_TRANSITIONS: ReadonlyMap = + new Map([ + ["subagent_failed", ["pending"]], + ["integration_rejected", ["pending"]], + ]); + +// Only `done` and `skipped` trigger child-task normalization. The two new +// apply-worktree statuses (`subagent_failed`, `integration_rejected`) preserve +// child statuses as-is so /specflow.fix_apply can inspect what was in flight. +// The type predicate narrows to `TaskStatus` because both terminal bundle +// statuses are also valid task statuses (child tasks never carry the +// apply-worktree-specific bundle statuses). +const TERMINAL_BUNDLE_STATUSES: ReadonlySet = + new Set(["done", "skipped"]); -function isTerminal(status: BundleStatus): boolean { +function isTerminal(status: BundleStatus): status is TaskStatus & BundleStatus { return TERMINAL_BUNDLE_STATUSES.has(status); } +function isTransitionAllowed( + from: BundleStatus, + to: BundleStatus, + allowReset: boolean, +): boolean { + if (VALID_TRANSITIONS.get(from)?.includes(to)) { + return true; + } + if (allowReset && RESET_TRANSITIONS.get(from)?.includes(to)) { + return true; + } + return false; +} + /** * Rebuild a bundle's `tasks` array so every `task.status` equals `target`. * Returns the rebuilt tasks plus the per-task coercion entries (only for @@ -85,10 +117,21 @@ function normalizeChildTasks( return { tasks: rebuilt, coercions }; } +export interface UpdateBundleStatusOptions { + /** + * When true, permits reset transitions out of `subagent_failed` or + * `integration_rejected` back to `pending`. Only /specflow.fix_apply or an + * explicit operator reset flow should set this to true; apply-class + * workflows SHALL NOT enable it. + */ + readonly allowReset?: boolean; +} + export function updateBundleStatus( taskGraph: TaskGraph, bundleId: string, newStatus: BundleStatus, + options: UpdateBundleStatusOptions = {}, ): StatusUpdateResult | StatusUpdateError { const bundleIndex = taskGraph.bundles.findIndex((b) => b.id === bundleId); if (bundleIndex === -1) { @@ -96,8 +139,8 @@ export function updateBundleStatus( } const bundle = taskGraph.bundles[bundleIndex]; - const allowed = VALID_TRANSITIONS.get(bundle.status); - if (!allowed || !allowed.includes(newStatus)) { + const allowReset = options.allowReset === true; + if (!isTransitionAllowed(bundle.status, newStatus, allowReset)) { return { ok: false, error: `Invalid status transition: ${bundle.status} → ${newStatus} for bundle '${bundleId}'`, diff --git a/src/lib/task-planner/types.ts b/src/lib/task-planner/types.ts index a698db5..ea0d07a 100644 --- a/src/lib/task-planner/types.ts +++ b/src/lib/task-planner/types.ts @@ -1,6 +1,17 @@ // Task graph types — bundle-based structure for specflow-owned task planning. -export type BundleStatus = "pending" | "in_progress" | "done" | "skipped"; +// Bundle status superset. `subagent_failed` and `integration_rejected` are +// introduced by apply-worktree-isolation: they are terminal-for-invocation (the +// apply fails fast and stops) but NOT terminal at the run level — only +// /specflow.fix_apply or an operator reset can return the bundle to `pending`. +// They are bundle-only; child tasks never carry these values. +export type BundleStatus = + | "pending" + | "in_progress" + | "done" + | "skipped" + | "subagent_failed" + | "integration_rejected"; export type TaskStatus = "pending" | "in_progress" | "done" | "skipped"; diff --git a/src/tests/__snapshots__/specflow.apply.md.snap b/src/tests/__snapshots__/specflow.apply.md.snap index d60d6e8..f165dd4 100644 --- a/src/tests/__snapshots__/specflow.apply.md.snap +++ b/src/tests/__snapshots__/specflow.apply.md.snap @@ -135,7 +135,13 @@ $ARGUMENTS Only the main agent records bundle status transitions. ``` The subagent SHALL return a structured result: `{"status": "success"|"failure", "produced_artifacts": [...], "error"?: {"message": "..."}}`. - - **Drain-then-stop on any failure.** After all subagents in the chunk settle, for each subagent that returned `"success"` invoke `specflow-advance-bundle done`. For each subagent that returned `"failure"` (or threw), leave the bundle in `in_progress` and record the failure. If AT LEAST ONE failure occurred in the chunk: STOP the apply after the chunk drains. Do NOT start the next chunk or the next window. The run SHALL remain in `apply_draft`. Surface every failure's `error.message` to the user and document recovery paths (`/specflow.fix_apply` or manual intervention). + - **Worktree-mode execution.** Every subagent-dispatched bundle runs inside a dedicated ephemeral worktree at `.specflow/worktrees///`. The main agent creates the worktree (from HEAD, with uncommitted workspace changes materialized in), dispatches the subagent, then runs main-agent integration (diff inspection + produced-artifact cross-check + `git apply --binary` patch import) before advancing to `done`. `subagent-shared` (dispatched subagent without an isolated worktree) is **NOT** a supported execution mode. + - **Drain-then-stop on any failure.** After all subagents in the chunk settle: + - For each subagent that returned `"success"` and passed integration: advance to `done`, remove the worktree. + - For each subagent that returned `"failure"` (or threw): advance to `subagent_failed`, **retain** the worktree for diagnosis. + - For each subagent that returned `"success"` but failed integration (undeclared path, protected path, empty diff, or patch-apply failure): advance to `integration_rejected`, **retain** the worktree for diagnosis. + - If AT LEAST ONE bundle ended in `subagent_failed` or `integration_rejected`: STOP the apply after the chunk drains. Do NOT start the next chunk or the next window. The run SHALL remain in `apply_draft`. Surface every failure's `error.message` (and `integrationCause` for rejected bundles) to the user and document recovery paths (`/specflow.fix_apply`, or manual inspection at `.specflow/worktrees///`). + - **Worktree-unavailable fail-fast.** If `git worktree add` fails for any reason on a subagent-eligible bundle, the dispatcher SHALL fail-fast the entire apply. No silent fallback to `inline-main`. The run remains in `apply_draft`. - Only if the chunk completed with zero failures SHALL the next chunk (or next window) be processed. - **All status transitions remain CLI-mandatory.** The dispatcher does NOT change the sole-mutation-entry-point contract: every `pending → in_progress` and `in_progress → done` transition is a `specflow-advance-bundle` invocation made by the main agent. Subagents MUST NOT invoke `specflow-advance-bundle`, MUST NOT edit `task-graph.json`, and MUST NOT edit `tasks.md`. - **Fail-fast and recovery paths are identical to step 3b.** A non-zero `specflow-advance-bundle` exit stops the apply immediately with the CLI error envelope surfaced verbatim; the subagent-failure path documented above adds nothing to the recovery surface (the run stays in `apply_draft` and the operator chooses between `/specflow.fix_apply` and manual intervention). diff --git a/src/tests/__snapshots__/specflow.fix_apply.md.snap b/src/tests/__snapshots__/specflow.fix_apply.md.snap index c5414b2..08fc422 100644 --- a/src/tests/__snapshots__/specflow.fix_apply.md.snap +++ b/src/tests/__snapshots__/specflow.fix_apply.md.snap @@ -222,6 +222,43 @@ AskUserQuestion: - ALL control flow logic (fix application, diff filtering, review agent invocation, ledger detection/update, finding matching, current-phase generation) is handled by the `specflow-review-apply fix-review` orchestrator. This slash command only calls the orchestrator, parses its JSON output, and displays UI. - If the fix loop needs to update `task-graph.json` or `tasks.md`, use `specflow-advance-bundle `; inline `node -e` / `jq` / manual edits to those files are a contract violation per `task-planner`. +## Recovering subagent_failed / integration_rejected bundles + + +When a bundle is left in the `subagent_failed` or `integration_rejected` +terminal-for-invocation status by `/specflow.apply` (subagent-worktree mode): + +1. Inspect the retained worktree at `.specflow/worktrees///`. +2. Read the STOP payload from the previous apply: for `integration_rejected` + the `integrationCause` field tells you whether the rejection was + `undeclared_path`, `protected_path`, `empty_diff_on_success`, or + `patch_apply_failure`. +3. Re-run `/specflow.fix_apply` to auto-fix, or manually repair: + + **IMPORTANT**: re-running `/specflow.apply` creates a **fresh** worktree + from the main workspace's current HEAD. Edits made only inside the old + retained worktree are NOT carried forward. If you manually fix the issue + inside the retained worktree, you MUST apply those fixes back to the main + workspace first (stage with `git -C add -A`, then + `git -C diff --binary --cached HEAD | git apply --binary --index` + at the repo root), then reset and re-run: + ```bash + # 1. Import your manual fixes from the retained worktree into the main workspace. + # Stage first so new (untracked) files are included in the diff, then use + # --binary on both sides so binary content round-trips correctly. + git -C .specflow/worktrees// add -A + git -C .specflow/worktrees// diff --binary --cached HEAD | git apply --binary --index + # 2. Reset the bundle back to pending + specflow-advance-bundle pending --allow-reset + # 3. Re-run apply — a fresh worktree will be created from current HEAD + # (which now includes your manual fixes via the import above) + ``` + Alternatively, skip the retained worktree entirely: make your fixes + directly in the main workspace, then reset and re-run. + +`--allow-reset` is the only supported way to transition out of these new +statuses back to `pending`; apply-class workflows SHALL NOT pass the flag. + ## Run State Hooks diff --git a/src/tests/apply-dispatcher-execution-mode.test.ts b/src/tests/apply-dispatcher-execution-mode.test.ts new file mode 100644 index 0000000..4db8fce --- /dev/null +++ b/src/tests/apply-dispatcher-execution-mode.test.ts @@ -0,0 +1,136 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import type { DispatchConfig } from "../lib/apply-dispatcher/config.js"; +import { assignExecutionMode } from "../lib/apply-dispatcher/execution-mode.js"; +import type { Bundle } from "../lib/task-planner/types.js"; + +function bundle(overrides: Partial): Bundle { + return { + id: "b", + title: "Bundle", + goal: "do the thing", + depends_on: [], + inputs: [], + outputs: [], + status: "pending", + tasks: [], + owner_capabilities: ["some-cap"], + ...overrides, + }; +} + +function cfg(overrides: Partial = {}): DispatchConfig { + return { + enabled: true, + threshold: 5, + maxConcurrency: 3, + ...overrides, + }; +} + +// --- enabled: false --- + +test("assignExecutionMode: inline-main when dispatch is disabled regardless of size_score", () => { + for (const size of [0, 1, 5, 6, 10, 100]) { + assert.equal( + assignExecutionMode( + bundle({ size_score: size }), + cfg({ enabled: false }), + ), + "inline-main", + `size_score=${size} should be inline-main when disabled`, + ); + } +}); + +// --- size_score above / at / below threshold --- + +test("assignExecutionMode: subagent-worktree when size_score > threshold and enabled", () => { + assert.equal( + assignExecutionMode(bundle({ size_score: 6 }), cfg({ threshold: 5 })), + "subagent-worktree", + ); + assert.equal( + assignExecutionMode(bundle({ size_score: 100 }), cfg({ threshold: 5 })), + "subagent-worktree", + ); +}); + +test("assignExecutionMode: inline-main when size_score == threshold (strict greater-than)", () => { + assert.equal( + assignExecutionMode(bundle({ size_score: 5 }), cfg({ threshold: 5 })), + "inline-main", + ); +}); + +test("assignExecutionMode: inline-main when size_score < threshold", () => { + assert.equal( + assignExecutionMode(bundle({ size_score: 0 }), cfg({ threshold: 5 })), + "inline-main", + ); + assert.equal( + assignExecutionMode(bundle({ size_score: 4 }), cfg({ threshold: 5 })), + "inline-main", + ); +}); + +// --- missing size_score (pre-feature graph) --- + +test("assignExecutionMode: inline-main when size_score is undefined regardless of threshold", () => { + assert.equal( + assignExecutionMode( + bundle({ size_score: undefined }), + cfg({ threshold: 0 }), + ), + "inline-main", + ); + assert.equal( + assignExecutionMode( + bundle({ size_score: undefined }), + cfg({ threshold: 100 }), + ), + "inline-main", + ); +}); + +// --- threshold = 0 --- + +test("assignExecutionMode: threshold 0 routes every bundle with size_score > 0 to subagent-worktree", () => { + assert.equal( + assignExecutionMode(bundle({ size_score: 1 }), cfg({ threshold: 0 })), + "subagent-worktree", + ); + assert.equal( + assignExecutionMode(bundle({ size_score: 0 }), cfg({ threshold: 0 })), + "inline-main", + ); +}); + +// --- determinism --- + +test("assignExecutionMode: same inputs yield same output on repeated calls (deterministic)", () => { + const b = bundle({ size_score: 7 }); + const c = cfg({ threshold: 5 }); + const a1 = assignExecutionMode(b, c); + const a2 = assignExecutionMode(b, c); + const a3 = assignExecutionMode(b, c); + assert.equal(a1, "subagent-worktree"); + assert.equal(a1, a2); + assert.equal(a2, a3); +}); + +// --- boundary: size_score = threshold + 1 is the smallest subagent-worktree --- + +test("assignExecutionMode: boundary — threshold=5 promotes bundle at size_score=6 only", () => { + for (let s = 0; s <= 5; s++) { + assert.equal( + assignExecutionMode(bundle({ size_score: s }), cfg({ threshold: 5 })), + "inline-main", + `size_score=${s} should be inline-main`, + ); + } + assert.equal( + assignExecutionMode(bundle({ size_score: 6 }), cfg({ threshold: 5 })), + "subagent-worktree", + ); +}); diff --git a/src/tests/apply-dispatcher-orchestrate.test.ts b/src/tests/apply-dispatcher-orchestrate.test.ts index 8ea49ee..8d16811 100644 --- a/src/tests/apply-dispatcher-orchestrate.test.ts +++ b/src/tests/apply-dispatcher-orchestrate.test.ts @@ -90,6 +90,65 @@ function setupRepo(capabilities: readonly string[] = ["alpha"]): { const dispatchAll = { enabled: true, threshold: 0, maxConcurrency: 3 }; +// Minimal pass-through WorktreeRuntime for tests that exercise orchestration +// logic but not worktree-specific behavior. Every git call succeeds with an +// empty diff, so integration accepts (empty_diff_on_success is only triggered +// when the subagent returned "success" — for failure paths it's irrelevant, +// and for success paths we return empty produced_artifacts matching empty diff). +// The WorktreeRuntime and GitCommandResult types are imported further down +// alongside the worktree-mode tests. TypeScript hoists all import statements +// to module scope, so they are available here. + +// Deterministic diff returned by noop runtime for computeDiff. The path +// "src/__noop__.ts" MUST match `NOOP_PRODUCED_ARTIFACTS` so integration +// passes the undeclared-path check. +const NOOP_DIFF = Buffer.from( + "diff --git a/src/__noop__.ts b/src/__noop__.ts\n--- a/src/__noop__.ts\n+++ b/src/__noop__.ts\n@@ -1 +1 @@\n-old\n+new\n", + "utf8", +); +const NOOP_PRODUCED_ARTIFACTS = ["src/__noop__.ts"]; + +function noopWorktreeRuntime(repoRoot: string): WorktreeRuntime { + const noop: GitCommandResult = { + status: 0, + stdout: Buffer.alloc(0), + stderr: "", + }; + const okBuf = (s: string): GitCommandResult => ({ + status: 0, + stdout: Buffer.from(s, "utf8"), + stderr: "", + }); + return { + repoRoot, + git: (args, cwd) => { + if (args[0] === "rev-parse") return okBuf("0000000\n"); + if (args[0] === "worktree") return noop; + // R4-F10: ls-files for untracked enumeration → empty (no untracked files). + if (args[0] === "ls-files") return noop; + // R4-F10: reset after intent-to-add → no-op. + if (args[0] === "reset") return noop; + // Materialize (diff HEAD at repoRoot): empty → no workspace changes. + // Snapshot check (diff HEAD in worktree): also empty → no snapshot commit. + // computeDiff (diff baseSha in worktree): deterministic diff. + if (args[0] === "diff") { + if (args.includes("HEAD")) return noop; // materialize or snapshot check + return { status: 0, stdout: NOOP_DIFF, stderr: "" }; // computeDiff + } + // Snapshot steps (add -A, commit) — no-op since materialize is empty. + if (args[0] === "add") return noop; + if (args[0] === "commit") return noop; + return noop; + }, + applyPatch: () => noop, + fs: { + existsSync: () => false, + mkdirSync: () => {}, + rmSync: () => {}, + }, + }; +} + // --- Inline short-circuit --- test("runDispatchedWindow: returns inline without mutation when classifier picks inline", async () => { @@ -128,7 +187,10 @@ test("runDispatchedWindow: dispatches one chunk, records in_progress→done for const invokedFor: string[] = []; const invoker: SubagentInvoker = async (pkg: ContextPackage) => { invokedFor.push(pkg.bundleId); - return { status: "success", produced_artifacts: [] }; + return { + status: "success", + produced_artifacts: NOOP_PRODUCED_ARTIFACTS, + }; }; const result = await runDispatchedWindow({ window, @@ -138,6 +200,8 @@ test("runDispatchedWindow: dispatches one chunk, records in_progress→done for repoRoot: root, invoke: invoker, advance: recordingAdvance(advances), + worktreeRuntime: noopWorktreeRuntime(root), + runId: "test-run", }); assert.equal(result.outcome, "ok"); assert.deepEqual(invokedFor.sort(), ["a", "b"]); @@ -166,7 +230,7 @@ test("runDispatchedWindow: splits window into chunks of maxConcurrency and dispa const advances: AdvanceCall[] = []; const invoker: SubagentInvoker = async () => ({ status: "success", - produced_artifacts: [], + produced_artifacts: NOOP_PRODUCED_ARTIFACTS, }); const result = await runDispatchedWindow({ window, @@ -176,6 +240,8 @@ test("runDispatchedWindow: splits window into chunks of maxConcurrency and dispa repoRoot: root, invoke: invoker, advance: recordingAdvance(advances), + worktreeRuntime: noopWorktreeRuntime(root), + runId: "test-run", }); assert.equal(result.outcome, "ok"); // Chunk 1: a, b in_progress → a, b done (interleaved). Chunk 2: c, d. @@ -199,7 +265,7 @@ test("runDispatchedWindow: splits window into chunks of maxConcurrency and dispa // --- Drain-then-stop on failure --- -test("runDispatchedWindow: on failure, drains chunk and records done for successful siblings; failed bundle stays in_progress", async () => { +test("runDispatchedWindow: on failure, drains chunk and records done for successful siblings; failed bundle transitions to subagent_failed (apply-worktree-isolation)", async () => { const { root, changeId } = setupRepo(); try { const window = [mkBundle("a", 3), mkBundle("b", 3), mkBundle("c", 3)]; @@ -215,7 +281,10 @@ test("runDispatchedWindow: on failure, drains chunk and records done for success } // Artificial delay to ensure a and c don't race ahead of b's rejection. await new Promise((r) => setTimeout(r, 10)); - return { status: "success", produced_artifacts: [] }; + return { + status: "success", + produced_artifacts: NOOP_PRODUCED_ARTIFACTS, + }; }; const result = await runDispatchedWindow({ window, @@ -225,14 +294,18 @@ test("runDispatchedWindow: on failure, drains chunk and records done for success repoRoot: root, invoke: invoker, advance: recordingAdvance(advances), + worktreeRuntime: noopWorktreeRuntime(root), + runId: "test-run", }); assert.equal(result.outcome, "failed"); if (result.outcome === "failed") { assert.equal(result.failures.length, 1); assert.equal(result.failures[0]?.bundleId, "b"); assert.equal(result.failures[0]?.error.message, "B crashed"); + // New: terminal status is classified at the failure site. + assert.equal(result.failures[0]?.terminalStatus, "subagent_failed"); } - // a, b, c all advanced to in_progress. Only a and c advanced to done. + // a, b, c all advanced to in_progress. a and c to done. b to subagent_failed. const ids = advances.map((e) => `${e.bundleId}:${e.status}`).sort(); assert.deepEqual( ids, @@ -240,6 +313,7 @@ test("runDispatchedWindow: on failure, drains chunk and records done for success "a:done", "a:in_progress", "b:in_progress", + "b:subagent_failed", "c:done", "c:in_progress", ].sort(), @@ -263,7 +337,10 @@ test("runDispatchedWindow: multiple failures in one chunk are all reported", asy error: { message: `${pkg.bundleId} crashed` }, }; } - return { status: "success", produced_artifacts: [] }; + return { + status: "success", + produced_artifacts: NOOP_PRODUCED_ARTIFACTS, + }; }; const result = await runDispatchedWindow({ window, @@ -273,6 +350,8 @@ test("runDispatchedWindow: multiple failures in one chunk are all reported", asy repoRoot: root, invoke: invoker, advance: recordingAdvance(advances), + worktreeRuntime: noopWorktreeRuntime(root), + runId: "test-run", }); assert.equal(result.outcome, "failed"); if (result.outcome === "failed") { @@ -314,7 +393,10 @@ test("runDispatchedWindow: failure in first chunk prevents second chunk from sta error: { message: "a crashed" }, }; } - return { status: "success", produced_artifacts: [] }; + return { + status: "success", + produced_artifacts: NOOP_PRODUCED_ARTIFACTS, + }; }; const result = await runDispatchedWindow({ window, @@ -324,6 +406,8 @@ test("runDispatchedWindow: failure in first chunk prevents second chunk from sta repoRoot: root, invoke: invoker, advance: recordingAdvance(advances), + worktreeRuntime: noopWorktreeRuntime(root), + runId: "test-run", }); assert.equal(result.outcome, "failed"); // c SHALL NOT have been invoked (it's in chunk 2). @@ -364,6 +448,8 @@ test("runDispatchedWindow: preflight failure throws MissingCapabilityError with repoRoot: root, invoke: invoker, advance: recordingAdvance(advances), + worktreeRuntime: noopWorktreeRuntime(root), + runId: "test-run", }), (err: Error) => { assert.ok(err instanceof MissingCapabilityError); @@ -397,7 +483,7 @@ test("runDispatchedWindow: advance(done) throw stops chunk immediately, no furth }; const invoker: SubagentInvoker = async () => ({ status: "success", - produced_artifacts: [], + produced_artifacts: NOOP_PRODUCED_ARTIFACTS, }); const result = await runDispatchedWindow({ window, @@ -407,6 +493,8 @@ test("runDispatchedWindow: advance(done) throw stops chunk immediately, no furth repoRoot: root, invoke: invoker, advance, + worktreeRuntime: noopWorktreeRuntime(root), + runId: "test-run", }); assert.equal(result.outcome, "failed"); if (result.outcome === "failed") { @@ -446,6 +534,8 @@ test("runDispatchedWindow: subagent throw is captured as ChunkFailure with error repoRoot: root, invoke: invoker, advance: async () => {}, + worktreeRuntime: noopWorktreeRuntime(root), + runId: "test-run", }); assert.equal(result.outcome, "failed"); if (result.outcome === "failed") { @@ -456,3 +546,503 @@ test("runDispatchedWindow: subagent throw is captured as ChunkFailure with error rmSync(root, { recursive: true, force: true }); } }); + +// --- R1-F02: subagent-shared mode is NOT supported --- + +test("runDispatchedWindow: throws when window is subagent-dispatched but worktreeRuntime is omitted (subagent-shared forbidden)", async () => { + const { root, changeId } = setupRepo(); + try { + const window = [mkBundle("a", 3)]; + const graph = mkGraph(window); + const invoker: SubagentInvoker = async () => ({ + status: "success", + produced_artifacts: [], + }); + await assert.rejects( + runDispatchedWindow({ + window, + config: dispatchAll, + changeId, + taskGraph: graph, + repoRoot: root, + invoke: invoker, + advance: async () => {}, + // worktreeRuntime intentionally omitted + }), + (err: Error) => { + assert.ok( + err.message.includes("subagent-shared"), + `expected 'subagent-shared' in error, got: ${err.message}`, + ); + return true; + }, + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +// --- apply-worktree-isolation: worktree mode end-to-end --- +// These tests inject a fake WorktreeRuntime so we don't have to spawn real +// `git worktree` processes. The runtime records every git call so we can +// assert worktree lifecycle: add → diff → apply → remove (on success), or +// add → diff → apply → retain (on failure/rejection). + +import type { + GitApplier, + GitCommandResult, + GitRunner, + WorktreeRuntime, +} from "../lib/apply-worktree/worktree.js"; +import { WorktreeError } from "../lib/apply-worktree/worktree.js"; + +interface GitCall { + readonly cmd: "run" | "apply"; + readonly args: readonly string[]; + readonly cwd: string; +} + +function makeFakeFs(): { + existsSync: (p: string) => boolean; + mkdirSync: (p: string, opts?: { recursive?: boolean }) => void; + rmSync: (p: string) => void; + _paths: Set; +} { + const paths = new Set(); + return { + _paths: paths, + existsSync: (p) => paths.has(p), + mkdirSync: (p) => { + paths.add(p); + }, + rmSync: (p) => { + paths.delete(p); + }, + }; +} + +function wtOk(stdout: string | Buffer = ""): GitCommandResult { + return { + status: 0, + stdout: Buffer.isBuffer(stdout) ? stdout : Buffer.from(stdout, "utf8"), + stderr: "", + }; +} + +function wtFail(stderr: string, status = 1): GitCommandResult { + return { status, stdout: Buffer.alloc(0), stderr }; +} + +/** + * A WorktreeRuntime that fakes every git call but tracks which worktrees + * currently "exist" via the fake fs. The `diffFor(bundleId)` map decides + * what the worktree diff looks like per bundle. + */ +function makeWorktreeRuntime(opts: { + readonly repoRoot: string; + readonly diffFor: (bundleId: string) => string; + readonly onApply?: (bundleId: string, patch: Buffer) => void; + readonly applyResultFor?: (bundleId: string) => GitCommandResult; + readonly failCreateFor?: (bundleId: string) => string | undefined; + readonly calls?: GitCall[]; +}): { + runtime: WorktreeRuntime; + calls: GitCall[]; + fs: ReturnType; +} { + const fs = makeFakeFs(); + const calls: GitCall[] = opts.calls ?? []; + let currentBundleId: string | null = null; + + const git: GitRunner = (args, cwd) => { + calls.push({ cmd: "run", args, cwd }); + if (args[0] === "rev-parse" && args[1] === "HEAD") { + return wtOk("basesha\n"); + } + if (args[0] === "worktree" && args[1] === "add") { + // args includes --detach . Extract bundle-id from path. + const path = args.find((a) => a.includes(".specflow/worktrees")); + const bundleId = path?.split("/").pop() ?? ""; + currentBundleId = bundleId; + const failReason = opts.failCreateFor?.(bundleId); + if (failReason) { + return wtFail(failReason, 128); + } + if (path) fs._paths.add(path); + return wtOk(); + } + // R4-F10: ls-files for untracked enumeration → empty (no untracked files). + if (args[0] === "ls-files") return wtOk(""); + // R4-F10: reset after intent-to-add → no-op. + if (args[0] === "reset") return wtOk(); + if (args[0] === "diff") { + // Materialize (diff HEAD at repoRoot): empty → no workspace changes. + // Snapshot check (diff HEAD in worktree): also empty → no snapshot. + // computeDiff (diff baseSha in worktree): per-bundle diff. + if (args.includes("HEAD")) { + return wtOk(""); // materialize or snapshot check: no workspace changes + } + const bundleId = cwd.split("/").pop() ?? ""; + return wtOk(opts.diffFor(bundleId)); + } + if (args[0] === "worktree" && args[1] === "remove") { + const path = args[args.length - 1]; + if (path) fs._paths.delete(path); + return wtOk(); + } + // Snapshot steps (add -A, commit) — no-op since materialize is empty. + if (args[0] === "add") return wtOk(); + if (args[0] === "commit") return wtOk(); + void currentBundleId; + return wtFail(`unexpected git call: ${args.join(" ")}`); + }; + + const applyPatch: GitApplier = (patch, cwd) => { + calls.push({ cmd: "apply", args: [], cwd }); + // Recover bundle id from the most recent diff call's cwd — approximate. + // We use the patch's first `diff --git` line to infer a bundle identity. + const header = patch.toString("utf8").split("\n")[0] ?? ""; + const match = header.match(/\.(bundle-\w+)\./); + const bundleId = match?.[1] ?? ""; + if (opts.onApply) opts.onApply(bundleId, patch); + if (opts.applyResultFor) { + return opts.applyResultFor(bundleId); + } + return wtOk(); + }; + + return { + runtime: { repoRoot: opts.repoRoot, git, applyPatch, fs }, + calls, + fs, + }; +} + +test("runDispatchedWindow (worktree): success path creates worktree, applies patch, removes worktree, advances done", async () => { + const { root, changeId } = setupRepo(); + try { + const window = [mkBundle("a", 3)]; + const graph = mkGraph(window); + const advances: AdvanceCall[] = []; + const { runtime, calls, fs } = makeWorktreeRuntime({ + repoRoot: root, + diffFor: () => + "diff --git a/src/a.ts b/src/a.ts\n--- a/src/a.ts\n+++ b/src/a.ts\n", + }); + const invoker: SubagentInvoker = async (_pkg, handle) => { + assert.ok(handle, "worktree handle must be passed in worktree mode"); + return { status: "success", produced_artifacts: ["src/a.ts"] }; + }; + + const result = await runDispatchedWindow({ + window, + config: dispatchAll, + changeId, + taskGraph: graph, + repoRoot: root, + invoke: invoker, + advance: recordingAdvance(advances), + worktreeRuntime: runtime, + runId: "run-1", + }); + assert.equal(result.outcome, "ok"); + assert.deepEqual( + advances.map((a) => `${a.bundleId}:${a.status}`), + ["a:in_progress", "a:done"], + ); + + // Worktree lifecycle: add → diff → apply → remove. Fs reflects removal. + assert.ok( + calls.some((c) => c.args.includes("add")), + "git worktree add must be invoked", + ); + assert.ok( + calls.some((c) => c.cmd === "apply"), + "git apply must be invoked", + ); + assert.ok( + calls.some((c) => c.args.includes("remove")), + "git worktree remove must be invoked on success", + ); + const wtPath = `${root}/.specflow/worktrees/run-1/a`; + assert.ok(!fs._paths.has(wtPath), "worktree path must be cleaned up"); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("runDispatchedWindow (worktree): subagent failure advances to subagent_failed and RETAINS worktree", async () => { + const { root, changeId } = setupRepo(); + try { + const window = [mkBundle("a", 3)]; + const graph = mkGraph(window); + const advances: AdvanceCall[] = []; + const { runtime, calls, fs } = makeWorktreeRuntime({ + repoRoot: root, + diffFor: () => "", // irrelevant on failure path + }); + const invoker: SubagentInvoker = async () => ({ + status: "failure", + produced_artifacts: [], + error: { message: "subagent exploded" }, + }); + + const result = await runDispatchedWindow({ + window, + config: dispatchAll, + changeId, + taskGraph: graph, + repoRoot: root, + invoke: invoker, + advance: recordingAdvance(advances), + worktreeRuntime: runtime, + runId: "run-1", + }); + assert.equal(result.outcome, "failed"); + if (result.outcome === "failed") { + assert.equal(result.failures[0]?.terminalStatus, "subagent_failed"); + } + assert.deepEqual( + advances.map((a) => `${a.bundleId}:${a.status}`), + ["a:in_progress", "a:subagent_failed"], + ); + + // Worktree created but NOT removed (retention on failure). + assert.ok(calls.some((c) => c.args.includes("add"))); + assert.ok( + !calls.some((c) => c.args.includes("remove")), + "worktree must be retained on subagent_failed", + ); + const wtPath = `${root}/.specflow/worktrees/run-1/a`; + assert.ok(fs._paths.has(wtPath), "worktree path must persist on failure"); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("runDispatchedWindow (worktree): integration rejection advances to integration_rejected and RETAINS worktree", async () => { + const { root, changeId } = setupRepo(); + try { + const window = [mkBundle("a", 3)]; + const graph = mkGraph(window); + const advances: AdvanceCall[] = []; + // Diff touches an undeclared path → integration rejection. + const { runtime, calls, fs } = makeWorktreeRuntime({ + repoRoot: root, + diffFor: () => + "diff --git a/undeclared.ts b/undeclared.ts\n--- a/undeclared.ts\n+++ b/undeclared.ts\n", + }); + const invoker: SubagentInvoker = async () => ({ + status: "success", + produced_artifacts: ["declared-only.ts"], // undeclared.ts NOT listed + }); + + const result = await runDispatchedWindow({ + window, + config: dispatchAll, + changeId, + taskGraph: graph, + repoRoot: root, + invoke: invoker, + advance: recordingAdvance(advances), + worktreeRuntime: runtime, + runId: "run-1", + }); + assert.equal(result.outcome, "failed"); + if (result.outcome === "failed") { + assert.equal(result.failures[0]?.terminalStatus, "integration_rejected"); + assert.equal( + result.failures[0]?.integrationCause?.kind, + "undeclared_path", + ); + } + assert.deepEqual( + advances.map((a) => `${a.bundleId}:${a.status}`), + ["a:in_progress", "a:integration_rejected"], + ); + + // Apply must not have run; worktree retained. + assert.ok( + !calls.some((c) => c.cmd === "apply"), + "apply must be skipped on undeclared_path rejection", + ); + assert.ok( + !calls.some((c) => c.args.includes("remove")), + "worktree retained on integration_rejected", + ); + const wtPath = `${root}/.specflow/worktrees/run-1/a`; + assert.ok(fs._paths.has(wtPath)); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("runDispatchedWindow (worktree): fail-fast when createWorktree fails — no advance, no subagent invocation", async () => { + const { root, changeId } = setupRepo(); + try { + const window = [mkBundle("a", 3)]; + const graph = mkGraph(window); + const advances: AdvanceCall[] = []; + const { runtime } = makeWorktreeRuntime({ + repoRoot: root, + diffFor: () => "", + failCreateFor: (id) => + id === "a" ? "fatal: 'path' already exists" : undefined, + }); + let invokerCalled = false; + const invoker: SubagentInvoker = async () => { + invokerCalled = true; + return { status: "success", produced_artifacts: [] }; + }; + + await assert.rejects( + runDispatchedWindow({ + window, + config: dispatchAll, + changeId, + taskGraph: graph, + repoRoot: root, + invoke: invoker, + advance: recordingAdvance(advances), + worktreeRuntime: runtime, + runId: "run-1", + }), + (err: unknown) => err instanceof WorktreeError, + ); + + assert.equal(invokerCalled, false, "subagent must NOT be invoked"); + assert.equal(advances.length, 0, "no advance calls on fail-fast"); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("runDispatchedWindow (worktree): create failure on second bundle rolls back first bundle's worktree", async () => { + const { root, changeId } = setupRepo(); + try { + const window = [mkBundle("a", 3), mkBundle("b", 3)]; + const graph = mkGraph(window); + const advances: AdvanceCall[] = []; + const { runtime, fs } = makeWorktreeRuntime({ + repoRoot: root, + diffFor: () => "", + failCreateFor: (id) => + id === "b" ? "fatal: permission denied" : undefined, + }); + const invoker: SubagentInvoker = async () => ({ + status: "success", + produced_artifacts: [], + }); + + await assert.rejects( + runDispatchedWindow({ + window, + config: dispatchAll, + changeId, + taskGraph: graph, + repoRoot: root, + invoke: invoker, + advance: recordingAdvance(advances), + worktreeRuntime: runtime, + runId: "run-1", + }), + (err: unknown) => err instanceof WorktreeError, + ); + + // Rollback: the worktree for 'a' must not be left on disk after 'b' fails. + const aPath = `${root}/.specflow/worktrees/run-1/a`; + assert.ok( + !fs._paths.has(aPath), + "first bundle's worktree must be cleaned up when a later bundle fails create", + ); + assert.equal(advances.length, 0, "no advance calls before rollback"); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +// --- R4-F12: cleanup warnings are surfaced, not silently swallowed --- + +test("runDispatchedWindow (worktree): worktree remove failure surfaces a cleanupWarning instead of silently swallowing (R4-F12)", async () => { + const { root, changeId } = setupRepo(); + try { + const window = [mkBundle("a", 3)]; + const graph = mkGraph(window); + const advances: AdvanceCall[] = []; + + // Custom runtime where `git worktree remove` fails. + const noop: GitCommandResult = { + status: 0, + stdout: Buffer.alloc(0), + stderr: "", + }; + const okBuf = (s: string): GitCommandResult => ({ + status: 0, + stdout: Buffer.from(s, "utf8"), + stderr: "", + }); + const runtime: WorktreeRuntime = { + repoRoot: root, + git: (args) => { + if (args[0] === "rev-parse") return okBuf("0000000\n"); + if (args[0] === "worktree" && args[1] === "add") return noop; + if (args[0] === "worktree" && args[1] === "remove") { + // Simulate removal failure (e.g., filesystem lock). + return { + status: 1, + stdout: Buffer.alloc(0), + stderr: "fatal: cannot remove", + }; + } + if (args[0] === "ls-files") return noop; + if (args[0] === "reset") return noop; + if (args[0] === "diff") { + if (args.includes("HEAD")) return noop; + return { status: 0, stdout: NOOP_DIFF, stderr: "" }; + } + if (args[0] === "add") return noop; + if (args[0] === "commit") return noop; + return noop; + }, + applyPatch: () => noop, + fs: { + existsSync: () => false, + mkdirSync: () => {}, + rmSync: () => {}, + }, + }; + const invoker: SubagentInvoker = async () => ({ + status: "success", + produced_artifacts: NOOP_PRODUCED_ARTIFACTS, + }); + + const result = await runDispatchedWindow({ + window, + config: dispatchAll, + changeId, + taskGraph: graph, + repoRoot: root, + invoke: invoker, + advance: recordingAdvance(advances), + worktreeRuntime: runtime, + runId: "test-run", + }); + + // The bundle should still succeed (done) — cleanup failure is non-fatal. + assert.equal(result.outcome, "ok"); + // But the cleanup warning must be surfaced, not silently swallowed. + if (result.outcome === "ok") { + assert.ok( + result.cleanupWarnings && result.cleanupWarnings.length > 0, + "cleanup warning must be surfaced when worktree removal fails", + ); + assert.equal(result.cleanupWarnings![0]!.bundleId, "a"); + assert.ok( + result.cleanupWarnings![0]!.message.includes("cannot remove"), + `warning message must include the git error; got: ${result.cleanupWarnings![0]!.message}`, + ); + } + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); diff --git a/src/tests/apply-worktree-helpers.test.ts b/src/tests/apply-worktree-helpers.test.ts new file mode 100644 index 0000000..a290489 --- /dev/null +++ b/src/tests/apply-worktree-helpers.test.ts @@ -0,0 +1,805 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import { + __internal_testing, + computeDiff, + createWorktree, + type GitApplier, + type GitCommandResult, + type GitRunner, + importPatch, + isProtectedPath, + listTouchedPaths, + removeWorktree, + WorktreeError, + type WorktreeHandle, + type WorktreeRuntime, + worktreePath, +} from "../lib/apply-worktree/worktree.js"; + +// --- Fake git/fs runtime fixtures --- + +interface FakeFs { + readonly existsSync: (path: string) => boolean; + readonly mkdirSync: (path: string, opts?: { recursive?: boolean }) => void; + readonly rmSync: (path: string) => void; + readonly _paths: Set; +} + +function makeFakeFs(preExisting: readonly string[] = []): FakeFs { + const paths = new Set(preExisting); + return { + _paths: paths, + existsSync: (p) => paths.has(p), + mkdirSync: (p) => { + paths.add(p); + }, + rmSync: (p) => { + paths.delete(p); + }, + }; +} + +function ok(stdout: string | Buffer = ""): GitCommandResult { + return { + status: 0, + stdout: Buffer.isBuffer(stdout) ? stdout : Buffer.from(stdout, "utf8"), + stderr: "", + }; +} + +function fail(stderr: string, status = 1): GitCommandResult { + return { status, stdout: Buffer.alloc(0), stderr }; +} + +function makeRuntime(opts: { + readonly git?: GitRunner; + readonly applyPatch?: GitApplier; + readonly fs?: FakeFs; + readonly repoRoot?: string; +}): WorktreeRuntime { + return { + repoRoot: opts.repoRoot ?? "/repo", + git: opts.git, + applyPatch: opts.applyPatch, + fs: opts.fs ?? makeFakeFs(), + }; +} + +// --- createWorktree --- + +test("createWorktree: succeeds and records base SHA from HEAD when no workspace changes exist", () => { + const invocations: Array = []; + const runtime = makeRuntime({ + git: (args) => { + invocations.push(args); + if (args[0] === "rev-parse" && args[1] === "HEAD") { + return ok("abc123\n"); + } + if (args[0] === "worktree" && args[1] === "add") { + return ok(); + } + // Materialize step: `git diff --binary --find-renames HEAD` at repo root. + // Return empty diff — no uncommitted changes to materialize. + // Snapshot step also diffs HEAD in the worktree — also empty. + if (args[0] === "diff" && args.includes("HEAD")) { + return ok(""); + } + return fail(`unexpected git call: ${args.join(" ")}`); + }, + }); + const handle = createWorktree(runtime, "run-1", "bundle-a"); + assert.equal(handle.runId, "run-1"); + assert.equal(handle.bundleId, "bundle-a"); + // No workspace changes → snapshot is a no-op → baseSha stays as HEAD. + assert.equal(handle.baseSha, "abc123"); + assert.equal(handle.path, "/repo/.specflow/worktrees/run-1/bundle-a"); + + // HEAD was resolved BEFORE worktree add — guards against the recorded base + // drifting from the actual worktree base if HEAD moves concurrently. + assert.deepEqual(invocations[0], ["rev-parse", "HEAD"]); + const addCall = invocations[1]; + assert.equal(addCall[0], "worktree"); + assert.equal(addCall[1], "add"); + assert.ok(addCall.includes("--detach")); + assert.ok(addCall.includes("/repo/.specflow/worktrees/run-1/bundle-a")); + assert.ok(addCall.includes("abc123")); +}); + +test("createWorktree: refuses to run if target path already exists", () => { + const runtime = makeRuntime({ + fs: makeFakeFs(["/repo/.specflow/worktrees/run-1/bundle-a"]), + git: () => { + throw new Error("git should not be invoked when pre-check fails"); + }, + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && err.cause.operation === "create-precheck", + ); +}); + +test("createWorktree: propagates git worktree add failure as WorktreeError with worktree path included (R2-F06)", () => { + const runtime = makeRuntime({ + git: (args) => { + if (args[0] === "rev-parse") return ok("abc123\n"); + if (args[0] === "worktree" && args[1] === "add") + return fail("fatal: 'path' already exists", 128); + if (args[0] === "diff") return ok(""); + return fail("unexpected"); + }, + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && + err.message.includes("/repo/.specflow/worktrees/run-1/bundle-a") && + err.message.includes("fatal:"), + ); +}); + +test("createWorktree: propagates rev-parse failure as WorktreeError", () => { + const runtime = makeRuntime({ + git: (args) => { + if (args[0] === "rev-parse") return fail("fatal: bad revision", 128); + return ok(); + }, + }); + assert.throws(() => createWorktree(runtime, "run-1", "bundle-a")); +}); + +test("createWorktree: materializes uncommitted workspace changes and snapshots them as a new baseSha", () => { + const workspaceDiff = Buffer.from( + "diff --git a/earlier-import.ts b/earlier-import.ts\n--- a/earlier-import.ts\n+++ b/earlier-import.ts\n", + "utf8", + ); + const appliedPatches: Array<{ patch: Buffer; cwd: string }> = []; + const gitCalls: Array<{ args: readonly string[]; cwd: string }> = []; + const runtime = makeRuntime({ + repoRoot: "/repo", + git: (args, cwd) => { + gitCalls.push({ args, cwd }); + // Initial rev-parse HEAD at repo root. + if (args[0] === "rev-parse" && args[1] === "HEAD" && cwd === "/repo") + return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + // Materialize step (diff at repo root): return non-empty diff. + if (args[0] === "diff" && args.includes("HEAD") && cwd === "/repo") + return ok(workspaceDiff); + // Snapshot step (diff at worktree): also non-empty (changes were applied). + if (args[0] === "diff" && args.includes("HEAD") && cwd !== "/repo") + return ok(workspaceDiff); + // Snapshot: git add -A in worktree. + if (args[0] === "add" && args[1] === "-A") return ok(); + // Snapshot: git commit in worktree. + if (args[0] === "commit") return ok(); + // Snapshot: rev-parse HEAD in worktree → new snapshot SHA. + if (args[0] === "rev-parse" && args[1] === "HEAD" && cwd !== "/repo") + return ok("snapshot-sha\n"); + return fail(`unexpected: ${args.join(" ")} in ${cwd}`); + }, + applyPatch: (patch, cwd) => { + appliedPatches.push({ patch, cwd }); + return ok(); + }, + }); + const handle = createWorktree(runtime, "run-1", "bundle-b"); + // The materialize step should apply the workspace diff INTO the worktree. + assert.equal(appliedPatches.length, 1); + assert.equal(appliedPatches[0].cwd, handle.path); + assert.deepEqual(appliedPatches[0].patch, workspaceDiff); + // baseSha must be the snapshot commit, NOT the original HEAD — so that + // computeDiff later captures only the subagent's delta, not the materialized + // workspace changes that are already present at the repo root. + assert.equal(handle.baseSha, "snapshot-sha"); + // Verify the snapshot sequence: add -A → commit → rev-parse HEAD in worktree. + const snapshotCalls = gitCalls.filter((c) => c.cwd === handle.path); + assert.ok( + snapshotCalls.some((c) => c.args[0] === "add" && c.args[1] === "-A"), + ); + assert.ok(snapshotCalls.some((c) => c.args[0] === "commit")); +}); + +test("createWorktree: snapshot stage failure is fatal when materialized changes exist (R2-F05)", () => { + const workspaceDiff = Buffer.from( + "diff --git a/x.ts b/x.ts\n--- a/x.ts\n+++ b/x.ts\n", + "utf8", + ); + const runtime = makeRuntime({ + repoRoot: "/repo", + git: (args, cwd) => { + if (args[0] === "rev-parse" && cwd === "/repo") return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + // Materialize: non-empty diff at repo root. + if (args[0] === "diff" && args.includes("HEAD") && cwd === "/repo") + return ok(workspaceDiff); + // Snapshot diff check in worktree: non-empty (changes were applied). + if (args[0] === "diff" && args.includes("HEAD") && cwd !== "/repo") + return ok(workspaceDiff); + // Snapshot: git add -A FAILS. + if (args[0] === "add" && args[1] === "-A") + return fail("error: unable to index", 1); + return fail("unexpected"); + }, + applyPatch: () => ok(), + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && err.cause.operation === "snapshot-stage", + ); +}); + +test("createWorktree: snapshot commit failure is fatal when materialized changes exist (R2-F05)", () => { + const workspaceDiff = Buffer.from( + "diff --git a/x.ts b/x.ts\n--- a/x.ts\n+++ b/x.ts\n", + "utf8", + ); + const runtime = makeRuntime({ + repoRoot: "/repo", + git: (args, cwd) => { + if (args[0] === "rev-parse" && cwd === "/repo") return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + if (args[0] === "diff" && args.includes("HEAD") && cwd === "/repo") + return ok(workspaceDiff); + if (args[0] === "diff" && args.includes("HEAD") && cwd !== "/repo") + return ok(workspaceDiff); + if (args[0] === "add" && args[1] === "-A") return ok(); + // Snapshot: git commit FAILS. + if (args[0] === "commit") return fail("error: unable to commit", 1); + return fail("unexpected"); + }, + applyPatch: () => ok(), + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && err.cause.operation === "snapshot-commit", + ); +}); + +test("createWorktree: snapshot rev-parse failure is fatal when materialized changes exist (R2-F05)", () => { + const workspaceDiff = Buffer.from( + "diff --git a/x.ts b/x.ts\n--- a/x.ts\n+++ b/x.ts\n", + "utf8", + ); + const runtime = makeRuntime({ + repoRoot: "/repo", + git: (args, cwd) => { + if (args[0] === "rev-parse" && cwd === "/repo") return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + if (args[0] === "diff" && args.includes("HEAD") && cwd === "/repo") + return ok(workspaceDiff); + if (args[0] === "diff" && args.includes("HEAD") && cwd !== "/repo") + return ok(workspaceDiff); + if (args[0] === "add" && args[1] === "-A") return ok(); + if (args[0] === "commit") return ok(); + // Snapshot: rev-parse HEAD in worktree FAILS. + if (args[0] === "rev-parse" && cwd !== "/repo") + return fail("fatal: bad object", 128); + return fail("unexpected"); + }, + applyPatch: () => ok(), + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && + err.cause.operation === "snapshot-rev-parse", + ); +}); + +test("createWorktree: snapshot diff-check failure is fatal (R2-F05)", () => { + const workspaceDiff = Buffer.from( + "diff --git a/x.ts b/x.ts\n--- a/x.ts\n+++ b/x.ts\n", + "utf8", + ); + const runtime = makeRuntime({ + repoRoot: "/repo", + git: (args, cwd) => { + if (args[0] === "rev-parse" && cwd === "/repo") return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + if (args[0] === "diff" && args.includes("HEAD") && cwd === "/repo") + return ok(workspaceDiff); + // Snapshot diff check in worktree FAILS. + if (args[0] === "diff" && args.includes("HEAD") && cwd !== "/repo") + return fail("error: bad index", 1); + return fail("unexpected"); + }, + applyPatch: () => ok(), + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && + err.cause.operation === "snapshot-diff-check", + ); +}); + +test("createWorktree: propagates materialize-apply failure as WorktreeError", () => { + const runtime = makeRuntime({ + git: (args) => { + if (args[0] === "rev-parse") return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + if (args[0] === "diff" && args.includes("HEAD")) + return ok("diff --git a/x b/x\n"); + return fail("unexpected"); + }, + applyPatch: () => fail("error: patch does not apply", 1), + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && + err.cause.operation === "materialize-apply", + ); +}); + +test("createWorktree: self-cleans the just-added worktree when post-add setup fails (R3-F08)", () => { + const removeCalls: Array = []; + const runtime = makeRuntime({ + repoRoot: "/repo", + git: (args) => { + if (args[0] === "rev-parse") return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + // Materialize diff: return non-empty so apply is attempted. + if (args[0] === "diff" && args.includes("HEAD")) + return ok("diff --git a/x b/x\n"); + // Self-cleanup: git worktree remove --force. + if (args[0] === "worktree" && args[1] === "remove") { + removeCalls.push(args); + return ok(); + } + return fail("unexpected"); + }, + // Materialize-apply FAILS — triggers the self-cleanup path. + applyPatch: () => fail("error: patch does not apply", 1), + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && + err.cause.operation === "materialize-apply", + ); + // The just-added worktree must have been removed via git worktree remove. + assert.equal( + removeCalls.length, + 1, + "self-cleanup must invoke worktree remove", + ); + assert.ok( + removeCalls[0].includes("/repo/.specflow/worktrees/run-1/bundle-a"), + "self-cleanup must target the correct worktree path", + ); +}); + +test("createWorktree: self-cleans when snapshot fails after successful materialize (R3-F08)", () => { + const removeCalls: Array = []; + const workspaceDiff = Buffer.from( + "diff --git a/x.ts b/x.ts\n--- a/x.ts\n+++ b/x.ts\n", + "utf8", + ); + const runtime = makeRuntime({ + repoRoot: "/repo", + git: (args, cwd) => { + if (args[0] === "rev-parse" && cwd === "/repo") return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + // Materialize diff at repo root: non-empty. + if (args[0] === "diff" && args.includes("HEAD") && cwd === "/repo") + return ok(workspaceDiff); + // Snapshot diff check in worktree: non-empty (changes were applied). + if (args[0] === "diff" && args.includes("HEAD") && cwd !== "/repo") + return ok(workspaceDiff); + // Snapshot: git add -A succeeds. + if (args[0] === "add" && args[1] === "-A") return ok(); + // Snapshot: git commit FAILS — triggers self-cleanup. + if (args[0] === "commit") return fail("error: unable to commit", 1); + // Self-cleanup: worktree remove. + if (args[0] === "worktree" && args[1] === "remove") { + removeCalls.push(args); + return ok(); + } + return fail("unexpected"); + }, + applyPatch: () => ok(), + }); + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && err.cause.operation === "snapshot-commit", + ); + assert.equal( + removeCalls.length, + 1, + "self-cleanup must invoke worktree remove on snapshot failure", + ); +}); + +test("createWorktree: self-cleanup failure does not mask the original error (R3-F08)", () => { + const runtime = makeRuntime({ + repoRoot: "/repo", + git: (args) => { + if (args[0] === "rev-parse") return ok("abc\n"); + if (args[0] === "worktree" && args[1] === "add") return ok(); + if (args[0] === "diff" && args.includes("HEAD")) + return ok("diff --git a/x b/x\n"); + // Self-cleanup also fails (e.g., filesystem issue). + if (args[0] === "worktree" && args[1] === "remove") + return fail("fatal: cannot remove", 128); + return fail("unexpected"); + }, + // Materialize-apply FAILS — triggers self-cleanup, which also fails. + applyPatch: () => fail("error: patch does not apply", 1), + }); + // The ORIGINAL error (materialize-apply) must propagate, not the cleanup error. + assert.throws( + () => createWorktree(runtime, "run-1", "bundle-a"), + (err) => + err instanceof WorktreeError && + err.cause.operation === "materialize-apply", + ); +}); + +// --- worktreePath --- + +test("worktreePath: produces the .specflow/worktrees// convention", () => { + assert.equal( + worktreePath("/repo", "run-1", "bundle-a"), + "/repo/.specflow/worktrees/run-1/bundle-a", + ); +}); + +// --- computeDiff --- + +test("computeDiff: stages untracked files via add -N then runs git diff --binary --find-renames from the base SHA", () => { + const invocations: Array = []; + const handle: WorktreeHandle = { + path: "/repo/.specflow/worktrees/r/b", + baseSha: "abc123", + runId: "r", + bundleId: "b", + }; + const runtime = makeRuntime({ + git: (args, cwd) => { + invocations.push(args); + assert.equal(cwd, handle.path); + // `git add -N` (intent-to-add) is invoked first so subsequent + // `git diff` includes untracked files. Return zero. + if (args[0] === "add" && args[1] === "-N") return ok(); + return ok(Buffer.from("diff --git a/x b/x\n...\n", "utf8")); + }, + }); + const patch = computeDiff(runtime, handle); + // First call: add -N -- . (intent-to-add, captures untracked) + const addCall = invocations[0]; + assert.equal(addCall[0], "add"); + assert.equal(addCall[1], "-N"); + // Second call: the actual diff. + const diffCall = invocations[1]; + assert.equal(diffCall[0], "diff"); + assert.ok(diffCall.includes("--binary")); + assert.ok(diffCall.includes("--find-renames")); + assert.ok(diffCall.includes("abc123")); + assert.ok(patch.toString("utf8").startsWith("diff --git a/x b/x")); +}); + +test("computeDiff: propagates git add -N failure as WorktreeError", () => { + const handle: WorktreeHandle = { + path: "/repo/wt", + baseSha: "abc123", + runId: "r", + bundleId: "b", + }; + const runtime = makeRuntime({ + git: (args) => { + if (args[0] === "add" && args[1] === "-N") + return fail("error: index lock", 1); + return ok(); + }, + }); + assert.throws( + () => computeDiff(runtime, handle), + (err) => + err instanceof WorktreeError && err.cause.operation === "diff-intent-add", + ); +}); + +test("computeDiff: returns raw binary buffer so NUL bytes survive", () => { + const handle: WorktreeHandle = { + path: "/repo/wt", + baseSha: "abc123", + runId: "r", + bundleId: "b", + }; + const binary = Buffer.concat([ + Buffer.from("diff --git a/img.png b/img.png\n", "utf8"), + Buffer.from([0x00, 0x01, 0x02, 0xff, 0x00, 0xab]), + ]); + const runtime = makeRuntime({ + git: (args) => { + if (args[0] === "add" && args[1] === "-N") return ok(); + return ok(binary); + }, + }); + const patch = computeDiff(runtime, handle); + assert.equal(patch.length, binary.length); + assert.equal(patch[binary.length - 1], 0xab); + assert.equal(patch[binary.length - 2], 0x00); +}); + +// --- importPatch --- + +test("importPatch: short-circuits on empty patch with no git invocation", () => { + let called = false; + const runtime = makeRuntime({ + applyPatch: () => { + called = true; + return ok(); + }, + }); + importPatch(runtime, Buffer.alloc(0)); + assert.equal(called, false); +}); + +test("importPatch: applies non-empty patch at repoRoot and succeeds", () => { + interface Captured { + patch: Buffer; + cwd: string; + } + const captured: Captured[] = []; + const runtime = makeRuntime({ + applyPatch: (patch, cwd) => { + captured.push({ patch, cwd }); + return ok(); + }, + }); + const patch = Buffer.from("diff --git a/x b/x\n--- a/x\n+++ b/x\n", "utf8"); + importPatch(runtime, patch); + assert.equal(captured.length, 1); + assert.equal(captured[0].cwd, "/repo"); + assert.equal(captured[0].patch.toString("utf8"), patch.toString("utf8")); +}); + +test("importPatch: throws WorktreeError with apply operation on failure", () => { + const runtime = makeRuntime({ + applyPatch: () => fail("error: patch does not apply", 1), + }); + const patch = Buffer.from("not-a-valid-patch", "utf8"); + assert.throws( + () => importPatch(runtime, patch), + (err) => + err instanceof WorktreeError && + err.cause.operation === "apply" && + err.message.includes("patch does not apply"), + ); +}); + +// --- removeWorktree --- + +test("removeWorktree: invokes git worktree remove --force to handle dirty trees", () => { + const invocations: Array = []; + const handle: WorktreeHandle = { + path: "/repo/.specflow/worktrees/r/b", + baseSha: "abc123", + runId: "r", + bundleId: "b", + }; + const runtime = makeRuntime({ + git: (args) => { + invocations.push(args); + return ok(); + }, + }); + removeWorktree(runtime, handle); + const call = invocations[0]; + assert.equal(call[0], "worktree"); + assert.equal(call[1], "remove"); + assert.ok(call.includes("--force")); + assert.ok(call.includes(handle.path)); +}); + +test("removeWorktree: propagates git failure", () => { + const handle: WorktreeHandle = { + path: "/repo/wt", + baseSha: "abc123", + runId: "r", + bundleId: "b", + }; + const runtime = makeRuntime({ + git: () => fail("fatal: not a working tree", 128), + }); + assert.throws(() => removeWorktree(runtime, handle)); +}); + +// --- listTouchedPaths --- + +test("listTouchedPaths: extracts paths from simple modify diff", () => { + const patch = [ + "diff --git a/src/a.ts b/src/a.ts", + "index 0000..1111 100644", + "--- a/src/a.ts", + "+++ b/src/a.ts", + "@@ -1 +1 @@", + "-old", + "+new", + "", + ].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths].sort(), ["src/a.ts"]); +}); + +test("listTouchedPaths: extracts new-path on rename", () => { + const patch = [ + "diff --git a/old/name.ts b/new/name.ts", + "similarity index 95%", + "rename from old/name.ts", + "rename to new/name.ts", + "", + ].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths], ["new/name.ts"]); + assert.ok(!paths.has("old/name.ts")); +}); + +test("listTouchedPaths: extracts path on delete", () => { + const patch = [ + "diff --git a/gone.ts b/gone.ts", + "deleted file mode 100644", + "--- a/gone.ts", + "+++ /dev/null", + "", + ].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths], ["gone.ts"]); +}); + +test("listTouchedPaths: extracts path on new file", () => { + const patch = [ + "diff --git a/new.ts b/new.ts", + "new file mode 100644", + "index 0000000..abc1234", + "--- /dev/null", + "+++ b/new.ts", + "", + ].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths], ["new.ts"]); +}); + +test("listTouchedPaths: extracts path on mode-only change", () => { + const patch = [ + "diff --git a/bin/run.sh b/bin/run.sh", + "old mode 100644", + "new mode 100755", + "", + ].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths], ["bin/run.sh"]); +}); + +test("listTouchedPaths: extracts path on binary-file change (no text hunks)", () => { + const patch = [ + "diff --git a/img.png b/img.png", + "index 1111..2222 100644", + "GIT binary patch", + "literal 10", + "Hcmeyu...", + "", + ].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths], ["img.png"]); +}); + +test("listTouchedPaths: extracts multiple paths across a combined patch", () => { + const patch = [ + "diff --git a/a.ts b/a.ts", + "--- a/a.ts", + "+++ b/a.ts", + "diff --git a/old.ts b/new.ts", + "rename from old.ts", + "rename to new.ts", + "diff --git a/bin.sh b/bin.sh", + "old mode 100644", + "new mode 100755", + "diff --git a/gone.ts b/gone.ts", + "deleted file mode 100644", + "", + ].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths].sort(), ["a.ts", "bin.sh", "gone.ts", "new.ts"]); + assert.ok(!paths.has("old.ts")); +}); + +test("listTouchedPaths: handles git-quoted paths with spaces", () => { + const patch = [ + 'diff --git "a/src/has space.ts" "b/src/has space.ts"', + '--- "a/src/has space.ts"', + '+++ "b/src/has space.ts"', + "", + ].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths], ["src/has space.ts"]); +}); + +test("listTouchedPaths: handles git-quoted paths with backslash escapes", () => { + const patch = ['diff --git "a/weird\\tpath" "b/weird\\tpath"', ""].join("\n"); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths], ["weird\tpath"]); +}); + +test("listTouchedPaths: accepts Buffer input (binary patch bytes)", () => { + const patch = Buffer.concat([ + Buffer.from("diff --git a/img.png b/img.png\n", "utf8"), + Buffer.from([0x00, 0x01, 0x02]), + Buffer.from("\nmore\n", "utf8"), + ]); + const paths = listTouchedPaths(patch); + assert.deepEqual([...paths], ["img.png"]); +}); + +test("listTouchedPaths: returns empty set on empty patch", () => { + const paths = listTouchedPaths(""); + assert.equal(paths.size, 0); +}); + +// --- isProtectedPath --- + +test("isProtectedPath: task-graph.json under the change dir is protected", () => { + assert.equal( + isProtectedPath("openspec/changes/my-change/task-graph.json", "my-change"), + true, + ); +}); + +test("isProtectedPath: tasks.md under the change dir is protected", () => { + assert.equal( + isProtectedPath("openspec/changes/my-change/tasks.md", "my-change"), + true, + ); +}); + +test("isProtectedPath: any path under .specflow/ is protected", () => { + assert.equal(isProtectedPath(".specflow/runs/r/state.json", "ch"), true); + assert.equal(isProtectedPath(".specflow/worktrees/r/b/foo.ts", "ch"), true); +}); + +test("isProtectedPath: openspec/specs is not protected (only the change's own graph/tasks are)", () => { + assert.equal( + isProtectedPath("openspec/specs/some-cap/spec.md", "my-change"), + false, + ); +}); + +test("isProtectedPath: another change's task-graph is not protected under this change", () => { + assert.equal( + isProtectedPath( + "openspec/changes/other-change/task-graph.json", + "my-change", + ), + false, + ); +}); + +test("isProtectedPath: unrelated source file is not protected", () => { + assert.equal(isProtectedPath("src/foo.ts", "my-change"), false); +}); + +// --- internal parsing guards --- + +test("parseDiffGitLine: returns null for non-matching input", () => { + assert.equal(__internal_testing.parseDiffGitLine("not a diff header"), null); + assert.equal(__internal_testing.parseDiffGitLine("diff --git "), null); + assert.equal( + __internal_testing.parseDiffGitLine("diff --git onlyone/path"), + null, + ); +}); + +test("dequoteGitPath: decodes common C-style escapes", () => { + assert.equal(__internal_testing.dequoteGitPath("plain"), "plain"); + assert.equal(__internal_testing.dequoteGitPath("a\\tb"), "a\tb"); + assert.equal(__internal_testing.dequoteGitPath("a\\nb"), "a\nb"); + assert.equal(__internal_testing.dequoteGitPath("a\\\\b"), "a\\b"); + assert.equal(__internal_testing.dequoteGitPath('say\\"hi'), 'say"hi'); +}); diff --git a/src/tests/apply-worktree-integrate.test.ts b/src/tests/apply-worktree-integrate.test.ts new file mode 100644 index 0000000..80a6998 --- /dev/null +++ b/src/tests/apply-worktree-integrate.test.ts @@ -0,0 +1,358 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import { + formatRejectionCause, + integrateBundle, +} from "../lib/apply-worktree/integrate.js"; +import type { + GitApplier, + GitCommandResult, + GitRunner, + WorktreeHandle, + WorktreeRuntime, +} from "../lib/apply-worktree/worktree.js"; + +const CHANGE_ID = "my-change"; + +function handle(overrides: Partial = {}): WorktreeHandle { + return { + path: "/repo/.specflow/worktrees/run-1/bundle-a", + baseSha: "base-sha", + runId: "run-1", + bundleId: "bundle-a", + ...overrides, + }; +} + +function ok(stdout: string | Buffer = ""): GitCommandResult { + return { + status: 0, + stdout: Buffer.isBuffer(stdout) ? stdout : Buffer.from(stdout, "utf8"), + stderr: "", + }; +} + +function failGit(stderr: string, status = 1): GitCommandResult { + return { status, stdout: Buffer.alloc(0), stderr }; +} + +function fakeRuntime(opts: { + readonly diffOutput: string; + readonly applyResult?: GitCommandResult; + readonly onApply?: (patch: Buffer) => void; +}): WorktreeRuntime { + const git: GitRunner = (args) => { + // `git add -N .` is invoked by computeDiff to surface untracked files + // in the subsequent `git diff`. Return success as a no-op. + if (args[0] === "add" && args[1] === "-N") { + return ok(); + } + if (args[0] === "diff") { + return ok(opts.diffOutput); + } + throw new Error( + `unexpected git call in integration test: ${args.join(" ")}`, + ); + }; + const applyPatch: GitApplier = (patch) => { + if (opts.onApply) opts.onApply(patch); + return opts.applyResult ?? ok(); + }; + return { repoRoot: "/repo", git, applyPatch }; +} + +// --- Success path --- + +test("integrateBundle: success when every touched path is declared and apply succeeds", () => { + let appliedPatch: Buffer | null = null; + const diff = [ + "diff --git a/src/a.ts b/src/a.ts", + "--- a/src/a.ts", + "+++ b/src/a.ts", + "", + ].join("\n"); + const runtime = fakeRuntime({ + diffOutput: diff, + onApply: (p) => { + appliedPatch = p; + }, + }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: ["src/a.ts"], + }, + }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.deepEqual(result.touched, ["src/a.ts"]); + assert.deepEqual(result.overDeclared, []); + assert.ok(appliedPatch, "patch must be applied on success"); +}); + +test("integrateBundle: success emits overDeclared warning list for non-touched declared paths", () => { + const diff = [ + "diff --git a/a.ts b/a.ts", + "--- a/a.ts", + "+++ b/a.ts", + "", + ].join("\n"); + const runtime = fakeRuntime({ diffOutput: diff }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: ["a.ts", "b.ts", "c.ts"], + }, + }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.deepEqual(result.touched, ["a.ts"]); + assert.deepEqual(result.overDeclared, ["b.ts", "c.ts"]); +}); + +test("integrateBundle: success with a rename matches new path against produced_artifacts", () => { + const diff = [ + "diff --git a/old.ts b/new.ts", + "rename from old.ts", + "rename to new.ts", + "", + ].join("\n"); + const runtime = fakeRuntime({ diffOutput: diff }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: ["new.ts"], // new path declared; old not declared + }, + }); + assert.equal(result.ok, true, "rename must match by new path"); +}); + +// --- Rejection: empty_diff_on_success --- + +test("integrateBundle: rejects with empty_diff_on_success when diff is empty", () => { + const runtime = fakeRuntime({ diffOutput: "" }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: ["a.ts"], + }, + }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.equal(result.cause.kind, "empty_diff_on_success"); + assert.deepEqual(result.touched, []); +}); + +// --- Rejection: protected_path --- + +test("integrateBundle: rejects with protected_path when diff touches task-graph.json", () => { + const diff = [ + `diff --git a/openspec/changes/${CHANGE_ID}/task-graph.json b/openspec/changes/${CHANGE_ID}/task-graph.json`, + "", + ].join("\n"); + const runtime = fakeRuntime({ diffOutput: diff }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: [`openspec/changes/${CHANGE_ID}/task-graph.json`], + }, + }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.equal(result.cause.kind, "protected_path"); + if (result.cause.kind === "protected_path") { + assert.equal( + result.cause.path, + `openspec/changes/${CHANGE_ID}/task-graph.json`, + ); + } +}); + +test("integrateBundle: rejects with protected_path when diff touches tasks.md", () => { + const diff = [ + `diff --git a/openspec/changes/${CHANGE_ID}/tasks.md b/openspec/changes/${CHANGE_ID}/tasks.md`, + "", + ].join("\n"); + const runtime = fakeRuntime({ diffOutput: diff }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: [`openspec/changes/${CHANGE_ID}/tasks.md`], + }, + }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.equal(result.cause.kind, "protected_path"); +}); + +test("integrateBundle: rejects with protected_path when diff touches any path under .specflow/", () => { + const diff = [ + "diff --git a/.specflow/runs/r/state.json b/.specflow/runs/r/state.json", + "", + ].join("\n"); + const runtime = fakeRuntime({ diffOutput: diff }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: [".specflow/runs/r/state.json"], + }, + }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.equal(result.cause.kind, "protected_path"); +}); + +test("integrateBundle: protected-path rejection takes precedence over undeclared-path rejection", () => { + const diff = [ + `diff --git a/openspec/changes/${CHANGE_ID}/task-graph.json b/openspec/changes/${CHANGE_ID}/task-graph.json`, + "diff --git a/other.ts b/other.ts", // also undeclared + "", + ].join("\n"); + const runtime = fakeRuntime({ diffOutput: diff }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: [], // both are undeclared, BUT task-graph is protected + }, + }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.equal(result.cause.kind, "protected_path"); +}); + +// --- Rejection: undeclared_path --- + +test("integrateBundle: rejects with undeclared_path on touched path not in produced_artifacts", () => { + const diff = [ + "diff --git a/declared.ts b/declared.ts", + "diff --git a/sneaky.ts b/sneaky.ts", // undeclared + "", + ].join("\n"); + const runtime = fakeRuntime({ diffOutput: diff }); + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: ["declared.ts"], + }, + }); + assert.equal(result.ok, false); + if (result.ok) return; + assert.equal(result.cause.kind, "undeclared_path"); + if (result.cause.kind === "undeclared_path") { + assert.equal(result.cause.path, "sneaky.ts"); + } +}); + +// --- Rejection: patch_apply_failure --- + +test("integrateBundle: rejects with patch_apply_failure when git apply returns non-zero", () => { + let applyCalled = false; + const diff = ["diff --git a/a.ts b/a.ts", ""].join("\n"); + const runtime: WorktreeRuntime = { + repoRoot: "/repo", + git: () => ok(diff), + applyPatch: () => { + applyCalled = true; + return failGit("error: patch failed at line 1", 1); + }, + }; + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: ["a.ts"], + }, + }); + assert.equal(result.ok, false); + assert.equal(applyCalled, true, "git apply must be invoked"); + if (result.ok) return; + assert.equal(result.cause.kind, "patch_apply_failure"); + if (result.cause.kind === "patch_apply_failure") { + assert.ok(result.cause.stderr.includes("patch failed")); + } +}); + +test("integrateBundle: patch_apply_failure is checked LAST (after declaration and protected-path checks)", () => { + // Valid diff, undeclared path. Apply would fail if invoked, but + // undeclared-path check should fire first and short-circuit. + let applyInvoked = false; + const diff = "diff --git a/undeclared.ts b/undeclared.ts\n"; + const runtime: WorktreeRuntime = { + repoRoot: "/repo", + git: () => ok(diff), + applyPatch: () => { + applyInvoked = true; + return failGit("would have failed", 1); + }, + }; + const result = integrateBundle({ + runtime, + handle: handle(), + changeId: CHANGE_ID, + subagentResult: { + status: "success", + produced_artifacts: [], + }, + }); + assert.equal(result.ok, false); + assert.equal(applyInvoked, false, "apply must not run before check passes"); + if (result.ok) return; + assert.equal(result.cause.kind, "undeclared_path"); +}); + +// --- formatRejectionCause --- + +test("formatRejectionCause: includes the cause kind and context for each cause", () => { + assert.ok( + formatRejectionCause({ kind: "empty_diff_on_success" }).includes( + "empty_diff_on_success", + ), + ); + assert.ok( + formatRejectionCause({ + kind: "protected_path", + path: ".specflow/x", + }).includes(".specflow/x"), + ); + assert.ok( + formatRejectionCause({ + kind: "undeclared_path", + path: "sneaky.ts", + }).includes("sneaky.ts"), + ); + assert.ok( + formatRejectionCause({ + kind: "patch_apply_failure", + stderr: "hunk failed", + }).includes("hunk failed"), + ); +}); diff --git a/src/tests/apply-worktree-realgit.test.ts b/src/tests/apply-worktree-realgit.test.ts new file mode 100644 index 0000000..8e2f86e --- /dev/null +++ b/src/tests/apply-worktree-realgit.test.ts @@ -0,0 +1,252 @@ +// Real-git integration tests for apply-worktree-isolation. +// +// The rest of the test suite uses injected WorktreeRuntime fakes. This file +// spawns actual `git` against a temp repository to verify the production +// `defaultGit` / `defaultApplier` paths — the code paths that use +// `git apply --binary --index` (R1-F01) and the full materialize + snapshot +// sequence end-to-end. Without this, a regression in the production-only +// branches (e.g., dropping `--index`) would not be caught by the injected- +// runner tests. + +import assert from "node:assert/strict"; +import { execFileSync } from "node:child_process"; +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import test from "node:test"; +import { + computeDiff, + createWorktree, + importPatch, + removeWorktree, +} from "../lib/apply-worktree/worktree.js"; + +function git(args: readonly string[], cwd: string): string { + return execFileSync("git", [...args], { cwd, encoding: "utf8" }).trim(); +} + +function initRepo(): string { + const root = mkdtempSync(join(tmpdir(), "specflow-realgit-")); + git(["init", "--quiet"], root); + git(["config", "user.email", "test@specflow.local"], root); + git(["config", "user.name", "specflow-test"], root); + git(["config", "commit.gpgsign", "false"], root); + writeFileSync(join(root, "README.md"), "initial\n", "utf8"); + git(["add", "README.md"], root); + git(["commit", "-q", "-m", "initial"], root); + return root; +} + +test("createWorktree (real git): later worktree inherits earlier imported bundle changes (R1-F01)", () => { + const root = initRepo(); + try { + // Simulate the full multi-bundle apply-run flow: + // + // 1. Bundle A "imports" a patch into the main workspace via importPatch. + // Production uses `git apply --binary --index` so the change is staged + // and will be visible to later `git diff HEAD` calls. + // 2. Bundle B's worktree is created. The materialize step picks up A's + // staged change and applies it into B's worktree. The snapshot step + // commits it in the worktree so baseSha points at the post-A commit. + // 3. The subagent for B makes its own edit. `computeDiff` captures ONLY + // B's delta, not A's already-materialized change. + + const patchFromBundleA = [ + "diff --git a/bundle-a-output.txt b/bundle-a-output.txt", + "new file mode 100644", + "--- /dev/null", + "+++ b/bundle-a-output.txt", + "@@ -0,0 +1 @@", + "+written by bundle A", + "", + ].join("\n"); + + importPatch({ repoRoot: root }, Buffer.from(patchFromBundleA, "utf8")); + + // Verify A's new file is staged (R1-F01: --index flag effect). + const stagedFiles = git(["diff", "--name-only", "--cached", "HEAD"], root); + assert.ok( + stagedFiles.split("\n").includes("bundle-a-output.txt"), + `bundle-a-output.txt must be staged in main workspace index; staged: ${stagedFiles}`, + ); + + // Also verify it would show up in git diff HEAD (the input to materialize). + const diffHead = git(["diff", "--name-only", "HEAD"], root); + assert.ok( + diffHead.split("\n").includes("bundle-a-output.txt"), + `bundle-a-output.txt must be visible to git diff HEAD; diff --name-only HEAD: ${diffHead}`, + ); + + // Now create bundle B's worktree. The materialize+snapshot sequence must + // pull A's change into B's worktree. + const handle = createWorktree({ repoRoot: root }, "run-1", "bundle-b"); + + // Bundle B's worktree must contain A's imported file. + const materializedFile = readFileSync( + join(handle.path, "bundle-a-output.txt"), + "utf8", + ); + assert.equal( + materializedFile, + "written by bundle A\n", + "bundle B's worktree must contain bundle A's imported changes", + ); + + // baseSha must point at the snapshot commit, not the pre-A HEAD. + const mainHead = git(["rev-parse", "HEAD"], root); + assert.notEqual( + handle.baseSha, + mainHead, + "handle.baseSha must point at the snapshot commit, not the main-workspace HEAD", + ); + + // Now simulate bundle B's subagent making its own edit in the worktree. + writeFileSync( + join(handle.path, "bundle-b-output.txt"), + "written by bundle B\n", + "utf8", + ); + + // computeDiff must capture ONLY B's delta (not A's already-materialized content). + const bDiff = computeDiff({ repoRoot: root }, handle).toString("utf8"); + assert.ok( + bDiff.includes("bundle-b-output.txt"), + "B's diff must include B's own file", + ); + assert.ok( + !bDiff.includes("bundle-a-output.txt"), + "B's diff MUST NOT include A's already-materialized file (would cause double-apply at main root)", + ); + + // Clean up the worktree explicitly so the temp dir can be removed. + removeWorktree({ repoRoot: root }, handle); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("createWorktree (real git): untracked files in main workspace are materialized into the worktree (R4-F10)", () => { + const root = initRepo(); + try { + // Create an untracked file in the main workspace (NOT staged, NOT committed). + writeFileSync(join(root, "untracked-new.txt"), "I am untracked\n", "utf8"); + + // Verify the file is truly untracked. + const lsFiles = git(["ls-files", "--others", "--exclude-standard"], root); + assert.ok( + lsFiles.split("\n").includes("untracked-new.txt"), + `untracked-new.txt must be listed as untracked; got: ${lsFiles}`, + ); + + // Create a worktree — materialize must include the untracked file. + const handle = createWorktree({ repoRoot: root }, "run-1", "bundle-ut"); + + // The untracked file must be present in the worktree. + const materialized = readFileSync( + join(handle.path, "untracked-new.txt"), + "utf8", + ); + assert.equal( + materialized, + "I am untracked\n", + "untracked file must be materialized into the worktree", + ); + + // The main workspace's untracked file must STILL be untracked after + // materialization (the intent-to-add + reset must leave the index clean). + const lsFilesAfter = git( + ["ls-files", "--others", "--exclude-standard"], + root, + ); + assert.ok( + lsFilesAfter.split("\n").includes("untracked-new.txt"), + `untracked-new.txt must remain untracked in main workspace after materialization; got: ${lsFilesAfter}`, + ); + + removeWorktree({ repoRoot: root }, handle); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("createWorktree (real git): clean workspace — no materialize, baseSha equals main HEAD", () => { + const root = initRepo(); + try { + const mainHead = git(["rev-parse", "HEAD"], root); + const handle = createWorktree({ repoRoot: root }, "run-1", "bundle-a"); + + // With no uncommitted changes, the snapshot step is a no-op and baseSha + // stays at the main HEAD. + assert.equal(handle.baseSha, mainHead); + // Worktree tree matches main HEAD exactly. + const wtReadme = readFileSync(join(handle.path, "README.md"), "utf8"); + assert.equal(wtReadme, "initial\n"); + + removeWorktree({ repoRoot: root }, handle); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("importPatch (real git): newly created file is staged in the index (R1-F01)", () => { + const root = initRepo(); + try { + const patch = [ + "diff --git a/new-file.txt b/new-file.txt", + "new file mode 100644", + "--- /dev/null", + "+++ b/new-file.txt", + "@@ -0,0 +1 @@", + "+hello", + "", + ].join("\n"); + + importPatch({ repoRoot: root }, Buffer.from(patch, "utf8")); + + // Under --index, the new file must be staged (not just in the working tree). + // Without --index, `git diff --cached HEAD` would be empty for untracked files. + const staged = git(["diff", "--name-only", "--cached", "HEAD"], root); + assert.ok( + staged.split("\n").includes("new-file.txt"), + `new-file.txt must be staged; actually staged: '${staged}'`, + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test("importPatch (real git): binary-safe patch applies cleanly", () => { + const root = initRepo(); + try { + // Create a binary file, commit it, then compute a real binary-safe patch + // that replaces it. This exercises `git apply --binary`. + const originalBytes = Buffer.from([0x00, 0x01, 0x02, 0xff, 0xfe]); + writeFileSync(join(root, "img.bin"), originalBytes); + git(["add", "img.bin"], root); + git(["commit", "-q", "-m", "add binary"], root); + + // Modify the binary and capture the diff. + const modifiedBytes = Buffer.from([0x10, 0x20, 0x30, 0x40, 0x50]); + writeFileSync(join(root, "img.bin"), modifiedBytes); + const diffOutput = execFileSync("git", ["diff", "--binary", "HEAD"], { + cwd: root, + }); + + // Reset and re-apply via importPatch. + git(["checkout", "--", "img.bin"], root); + const current = readFileSync(join(root, "img.bin")); + assert.deepEqual( + Buffer.from(current), + originalBytes, + "reset must restore original bytes", + ); + + importPatch({ repoRoot: root }, diffOutput); + + // The binary file must now match the modified bytes. + const applied = readFileSync(join(root, "img.bin")); + assert.deepEqual(Buffer.from(applied), modifiedBytes); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); diff --git a/src/tests/generation.test.ts b/src/tests/generation.test.ts index 1bdf1d9..c76e5f2 100644 --- a/src/tests/generation.test.ts +++ b/src/tests/generation.test.ts @@ -460,16 +460,26 @@ test("specflow.apply command body documents the subagent dispatcher branching", "apply body should state that subagents must not edit tasks.md", ); - // Drain-then-stop on failure (D4). + // Drain-then-stop on failure (D4, updated for apply-worktree-isolation). assert.ok( apply.includes("Drain-then-stop"), "apply body should document drain-then-stop semantics", ); assert.ok( - apply.includes("leave the bundle in `in_progress`") || - apply.includes("leave the failed bundle in_progress") || - apply.includes("leave the bundle in_progress"), - "apply body should state that failed bundles remain in_progress after drain-then-stop", + apply.includes("subagent_failed"), + "apply body should document the subagent_failed terminal status", + ); + assert.ok( + apply.includes("integration_rejected"), + "apply body should document the integration_rejected terminal status", + ); + assert.ok( + apply.includes("Worktree-mode execution"), + "apply body should document worktree-mode execution for dispatched bundles", + ); + assert.ok( + apply.includes("retain") || apply.includes("retain"), + "apply body should state that worktrees are retained on failure/rejection", ); // Legacy fallback (review P2 fix): graph absent bypasses the dispatcher. diff --git a/src/tests/task-planner-core.test.ts b/src/tests/task-planner-core.test.ts index cd58cfc..b8785f0 100644 --- a/src/tests/task-planner-core.test.ts +++ b/src/tests/task-planner-core.test.ts @@ -639,3 +639,161 @@ test("renderTasksMd: unchanged — produces the same output when invoked directl const second = renderTasksMd(result.taskGraph); assert.equal(first, second); }); + +// --- apply-worktree-isolation: subagent_failed / integration_rejected --- + +test("updateBundleStatus: in_progress → subagent_failed succeeds without child coercion", () => { + const graph = inProgressSchemaGraph(); + const result = updateBundleStatus(graph, "schema", "subagent_failed"); + assert.equal(result.ok, true); + if (!result.ok) return; + const updated = result.taskGraph.bundles.find((b) => b.id === "schema"); + assert.equal(updated?.status, "subagent_failed"); + // Child statuses preserved (no normalization): the bundle started at + // in_progress over pending children, so children stay pending. + for (const task of updated?.tasks ?? []) { + assert.equal(task.status, "pending"); + } + assert.equal(result.coercions.length, 0); +}); + +test("updateBundleStatus: in_progress → integration_rejected succeeds without child coercion", () => { + const graph = inProgressSchemaGraph(); + const result = updateBundleStatus(graph, "schema", "integration_rejected"); + assert.equal(result.ok, true); + if (!result.ok) return; + const updated = result.taskGraph.bundles.find((b) => b.id === "schema"); + assert.equal(updated?.status, "integration_rejected"); + for (const task of updated?.tasks ?? []) { + assert.equal(task.status, "pending"); + } + assert.equal(result.coercions.length, 0); +}); + +test("updateBundleStatus: pending → subagent_failed is rejected (must go through in_progress)", () => { + const result = updateBundleStatus(sampleGraph(), "schema", "subagent_failed"); + assert.equal(result.ok, false); +}); + +test("updateBundleStatus: pending → integration_rejected is rejected (must go through in_progress)", () => { + const result = updateBundleStatus( + sampleGraph(), + "schema", + "integration_rejected", + ); + assert.equal(result.ok, false); +}); + +test("updateBundleStatus: subagent_failed → done directly is rejected", () => { + const base = sampleGraph(); + const graph: TaskGraph = { + ...base, + bundles: [ + { ...base.bundles[0], status: "subagent_failed" }, + ...base.bundles.slice(1), + ], + }; + const result = updateBundleStatus(graph, "schema", "done"); + assert.equal(result.ok, false); + if (!result.ok) { + assert.ok(result.error.includes("Invalid status transition")); + } +}); + +test("updateBundleStatus: integration_rejected → in_progress directly is rejected", () => { + const base = sampleGraph(); + const graph: TaskGraph = { + ...base, + bundles: [ + { ...base.bundles[0], status: "integration_rejected" }, + ...base.bundles.slice(1), + ], + }; + const result = updateBundleStatus(graph, "schema", "in_progress"); + assert.equal(result.ok, false); +}); + +test("updateBundleStatus: subagent_failed → pending rejected without allowReset", () => { + const base = sampleGraph(); + const graph: TaskGraph = { + ...base, + bundles: [ + { ...base.bundles[0], status: "subagent_failed" }, + ...base.bundles.slice(1), + ], + }; + const result = updateBundleStatus(graph, "schema", "pending"); + assert.equal(result.ok, false); +}); + +test("updateBundleStatus: subagent_failed → pending succeeds with allowReset", () => { + const base = sampleGraph(); + const graph: TaskGraph = { + ...base, + bundles: [ + { ...base.bundles[0], status: "subagent_failed" }, + ...base.bundles.slice(1), + ], + }; + const result = updateBundleStatus(graph, "schema", "pending", { + allowReset: true, + }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal( + result.taskGraph.bundles.find((b) => b.id === "schema")?.status, + "pending", + ); +}); + +test("updateBundleStatus: integration_rejected → pending succeeds with allowReset", () => { + const base = sampleGraph(); + const graph: TaskGraph = { + ...base, + bundles: [ + { ...base.bundles[0], status: "integration_rejected" }, + ...base.bundles.slice(1), + ], + }; + const result = updateBundleStatus(graph, "schema", "pending", { + allowReset: true, + }); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal( + result.taskGraph.bundles.find((b) => b.id === "schema")?.status, + "pending", + ); +}); + +test("updateBundleStatus: done → pending rejected even with allowReset (reset only from new statuses)", () => { + const base = sampleGraph(); + const graph: TaskGraph = { + ...base, + bundles: [{ ...base.bundles[0], status: "done" }, ...base.bundles.slice(1)], + }; + const result = updateBundleStatus(graph, "schema", "pending", { + allowReset: true, + }); + assert.equal(result.ok, false); +}); + +test("renderTasksMd: subagent_failed renders a distinct marker", () => { + const graph = inProgressSchemaGraph(); + const result = updateBundleStatus(graph, "schema", "subagent_failed"); + assert.equal(result.ok, true); + if (!result.ok) return; + const md = renderTasksMd(result.taskGraph); + assert.ok(md.includes("subagent failed")); + assert.ok(md.includes("## 1. Define Schema ✗")); +}); + +test("renderTasksMd: integration_rejected renders a distinct marker", () => { + const graph = inProgressSchemaGraph(); + const result = updateBundleStatus(graph, "schema", "integration_rejected"); + assert.equal(result.ok, true); + if (!result.ok) return; + const md = renderTasksMd(result.taskGraph); + assert.ok(md.includes("integration rejected")); + assert.ok(md.includes("## 1. Define Schema ⚠")); +}); diff --git a/src/tests/task-planner-schema.test.ts b/src/tests/task-planner-schema.test.ts index 08acaf4..535d4bd 100644 --- a/src/tests/task-planner-schema.test.ts +++ b/src/tests/task-planner-schema.test.ts @@ -306,3 +306,70 @@ test("validateTaskGraph: accepts mixed graph where some bundles have size_score assert.equal(result.valid, true, result.errors.join(", ")); assert.equal(result.errors.length, 0); }); + +// --- apply-worktree-isolation schema extensions --- + +test("validateTaskGraph: accepts subagent_failed as a bundle status", () => { + const graph = validGraph(); + const g = { + ...graph, + bundles: [ + { ...graph.bundles[0], status: "subagent_failed" }, + graph.bundles[1], + ], + }; + const result = validateTaskGraph(g); + assert.equal(result.valid, true, result.errors.join(", ")); +}); + +test("validateTaskGraph: accepts integration_rejected as a bundle status", () => { + const graph = validGraph(); + const g = { + ...graph, + bundles: [ + { ...graph.bundles[0], status: "integration_rejected" }, + graph.bundles[1], + ], + }; + const result = validateTaskGraph(g); + assert.equal(result.valid, true, result.errors.join(", ")); +}); + +test("validateTaskGraph: rejects subagent_failed on a child task (bundle-only status)", () => { + const graph = validGraph(); + const g = { + ...graph, + bundles: [ + { + ...graph.bundles[0], + tasks: [{ id: "a-1", title: "Task A1", status: "subagent_failed" }], + }, + graph.bundles[1], + ], + }; + const result = validateTaskGraph(g); + assert.equal(result.valid, false); + assert.ok( + result.errors.some((e) => e.includes("tasks[0].status")), + `expected child-task status error, got: ${result.errors.join(" | ")}`, + ); +}); + +test("validateTaskGraph: rejects integration_rejected on a child task (bundle-only status)", () => { + const graph = validGraph(); + const g = { + ...graph, + bundles: [ + { + ...graph.bundles[0], + tasks: [ + { id: "a-1", title: "Task A1", status: "integration_rejected" }, + ], + }, + graph.bundles[1], + ], + }; + const result = validateTaskGraph(g); + assert.equal(result.valid, false); + assert.ok(result.errors.some((e) => e.includes("tasks[0].status"))); +});