diff --git a/WORKBOOK_v6.md b/WORKBOOK_v6.md index d3acc38..5ff363a 100644 --- a/WORKBOOK_v6.md +++ b/WORKBOOK_v6.md @@ -17,17 +17,17 @@ schema_version: 6 product: ordinary-user-loop-os version_target: loop-os-v1 -current_phase: V6-P3 # V6-P0..P6,见 §3;P2 卡片化座舱随本 PR 落地 -current_substep: p3_real_proof_closeout_pending -last_session_id: s_v6_0002 +current_phase: V6-P6 # V6-P0..P6,见 §3;P4/P5 代码随本 PR 落地 +current_substep: p3_real_proof_still_operator_gated # P3 真证明仍待操作员(真 Draft PR + 真判词) +last_session_id: s_v6_0003 open_holds: 0 blocked_on: none -test_baseline: 864 # main 基线,0 fail;本周期任何回归即闸红 +test_baseline: 889 # main 基线,0 fail;本周期任何回归即闸红 merge_policy: human_only # 系统永不 merge;auto-merge 本周期禁用 # next_action 硬上限 2 行: next_action: | - V6-P2 已交付:LoopCard 五卡主表面 + 浏览器 E2E 每步断言卡型与 next_step(证据入库)。 - 下一步 V6-P3:真实证明收口——真 Draft PR、库内真实 Gemini 判词、fail-closed 回归。 + V6-P4/P5 代码完成:递归 planner+cycle ledger(loop:plan)、soak 运营化(SOAK_OPERATIONS+soak:status)。 + 下一步 V6-P6 普通用户验收;V6-P3 真证明仍操作员闸(cycle-1 提案即此 gap)。 ``` --- diff --git a/docs/SESSION_LOG_v3.md b/docs/SESSION_LOG_v3.md index a1c5cea..2e60d36 100644 --- a/docs/SESSION_LOG_v3.md +++ b/docs/SESSION_LOG_v3.md @@ -1,5 +1,14 @@ # SESSION LOG v3 +## s_v6_0003 · 2026-06-11 · V6-P4 + V6-P5 code complete · recursive planner + soak operationalization + +- **V6-P4 recursive planner** (TDD, 20 tests first): `packages/daemon/src/recursive-planner.ts` — pure `planNextCycle` refuses on dirty tree / red tests / blocked budget (carries the budget reason) / ambiguous SoT (≠1 live root-workbook claim, `detectSotAmbiguity`) / open holds / unmerged previous-cycle PR / unparseable §0 / empty gap registry, each with a human recovery action; otherwise proposes exactly ONE gap by the fixed order safety_evidence > user_ux > automation > fleet > polish (ties by stable input order). Cycle ledger `appendCycleLedger` → `evidence/loop-cycles/cycle-.json` {decision, timestamp, workbook_phase, chosen_gap} AND event-sources `planner.cycle_planned` so `rebuildCycleLedgerFromEvents` reproduces the on-disk ledger (GR#5, tested). GR#8 kept: zero child_process in daemon src (eslint static guard stays green). +- Shell `scripts/loop-planner.ts` (`pnpm loop:plan`): gathers REAL inputs — `git status --porcelain`, WORKBOOK_v6 §0 yaml block, root-workbook SoT scan, budget via `checkHeadlessBudget` against `$AEDEV_HOME/state.db` when present (else default-allow with explicit `no-db` note), holds = max(db active holds, §0 open_holds), previous-cycle gate fail-closed unless `AEDEV_LOOP_PREV_PR_MERGED=1`, `pnpm test` actually run unless operator-asserted via `AEDEV_LOOP_TESTS_GREEN`. Prints a PlanCard-shaped JSON + human text, writes the ledger entry, and STOPS — it never implements, never pushes, never merges (GR#10). +- **First real run committed as evidence**: `evidence/loop-cycles/cycle-1.json` + full output — on a clean tree with the suite green it PROPOSED `v6-p3-real-proof-closeout` (top safety_evidence priority), i.e. the planner correctly points at the operator-gated V6-P3 closeout instead of inventing automatable work. +- **V6-P5 soak ops**: `docs/operations/SOAK_OPERATIONS.md` — exact one-week command (`AEDEV_SOAK_MS=604800000 pnpm test:fleet:soak`), launchd plist (mirrors `scripts/launchd/` direct-node pattern, KeepAlive crash recovery), evidence dir contract (`evidence/fleet-soak//`), one-step resume, failure-recovery table, ntfy wiring (notify-pr-ready.sh pattern; hold pushes already via watchdog), report classification template (GR#7). `soak-pending.json` artifact: `packages/daemon/src/soak-status.ts` (5 tests: build/derive/sticky-terminal/roundtrip/fail-closed reader) + `scripts/soak-status.ts` CLI (`pnpm soak:status [start|complete|fail]`); `running` past `expected_end` honestly reads `overdue`, never silent-completes. +- Gates: `pnpm typecheck` PASS; `pnpm lint` PASS; `pnpm test` PASS with 889 passed, 6 skipped (130 files; +25 over the 864 baseline, zero regressions). +- §0 → `current_phase: V6-P6`. Honesty (GR#7): V6-P4/P5 are **code complete**; V6-P3 real proof stays operator-gated (real Draft PR URL + real Gemini verdict still pending on the operator Mac — exactly what cycle-1 proposes); the one-week soak itself remains **unproven** until its evidence lands. + ## s_v6_0002 · 2026-06-11 · V6-P2 complete · card cockpit (UI renders the five loop cards) - New `apps/dashboard/src/pages/cockpit/LoopCard.tsx` (TDD: 13 component tests first): renders exactly the five GR#11 card types from `overview.operatorView.card`; calm bilingual copy; `next_step` is always the prominent first row (`cockpit-loop-card-next-step`); the `machine` sub-object is never visible text — raw codes live only in `data-card-type` / `data-user-state` / `data-machine-stage` / `data-hold-code` / `data-pr-gate-code`. Blocker card shows `human_explanation` + `why_it_matters` + recovery actions, zero raw codes. diff --git a/docs/operations/SOAK_OPERATIONS.md b/docs/operations/SOAK_OPERATIONS.md new file mode 100644 index 0000000..d95dfd8 --- /dev/null +++ b/docs/operations/SOAK_OPERATIONS.md @@ -0,0 +1,196 @@ +# SOAK OPERATIONS — one-week real fleet soak (V6-P5) + +> Runbook for taking the proven in-container soak harness +> (`scripts/fleet-soak.ts`, 5/5 PASS with simulated executors) to a real, +> unattended, ≥1-week run on the operator's Mac. Closes assessment gap #19's +> apparatus; the soak RESULT itself stays honest: until a week-long run's +> evidence lands in-repo, rubric #19 remains **unproven** (GR#7). +> +> Status artifact contract: `packages/daemon/src/soak-status.ts` (tested). +> CLI: `pnpm soak:status` (`scripts/soak-status.ts`). + +## 1. The one-week command + +```bash +cd ~/projects/claude-code-247 +# 604800000 ms = 7 days. Evidence lands in evidence/fleet-soak//. +AEDEV_SOAK_MS=604800000 pnpm test:fleet:soak +``` + +Recommended unattended wrapper (status artifact + ntfy on exit): + +```bash +pnpm soak:status start \ + && if AEDEV_SOAK_MS=604800000 pnpm test:fleet:soak; then + pnpm soak:status complete + else + pnpm soak:status fail + fi +``` + +Notes: +- The harness itself already enforces the safety/test env: remote writes off, + all external CLIs/APIs disabled — a week-long soak spends **zero** credit + while idle (idle-zero-credit is one of its PASS criteria). +- `AEDEV_SOAK_INTERVAL_MS` (default 200) can be raised to 1000–5000 for a + week-long run to keep CPU negligible. + +## 2. `soak-pending.json` status artifact (contract) + +Path: `evidence/fleet-soak/soak-pending.json` +(override: `AEDEV_SOAK_PENDING_PATH`). Exact shape: + +```json +{ + "started_at": "2026-06-11T00:00:00.000Z", + "expected_end": "2026-06-18T00:00:00.000Z", + "status": "running" +} +``` + +- `status` ∈ `running | completed | overdue | failed`. +- `expected_end = started_at + AEDEV_SOAK_MS` (default one week). +- `running` past `expected_end` **reads as** `overdue` — honest "needs a + human look", never a silent fake-complete. `completed`/`failed` are + terminal and sticky. +- Readers fail closed: a missing/corrupt artifact reads as "no soak pending" + (`pnpm soak:status` exits 1), so nothing acts on half-written state. + +Commands: + +```bash +pnpm soak:status # read + time-derived status +pnpm soak:status start # window from AEDEV_SOAK_MS (default 1 week) +pnpm soak:status complete # after the report is generated and checked +pnpm soak:status fail # the run died and will not be resumed +``` + +## 3. Evidence directory contract + +``` +evidence/fleet-soak/ + soak-pending.json status artifact (§2) + / one directory per soak run (the harness creates it) + soak-report.md PASS/FAIL per criterion + honesty note + metrics.json machine-readable criteria, drill, idle counters +``` + +The report MUST keep the harness's real/simulated classification: real +daemon + real HTTP + real Ed25519 vs simulated executors. A week-long run +with simulated executors still does NOT check rubric #19's "real-CLI on +operator machines" box — say so in the report. + +## 4. launchd (unattended + crash recovery) + +Mirror of `scripts/launchd/com.claude247.daemon.plist.tpl` (node executes the +entry DIRECTLY so launchd tracks the real PID — no pnpm/tsx wrapper chain). +Save as `~/Library/LaunchAgents/com.claude247.fleet-soak.plist`, replacing the +`@@…@@` placeholders like `scripts/install_launchd.sh` does: + +```xml + + + + + Labelcom.claude247.fleet-soak + ProgramArguments + + @@NODE@@ + --import + tsx + @@REPO_ROOT@@/scripts/fleet-soak.ts + + WorkingDirectory@@REPO_ROOT@@ + KeepAlive + + SuccessfulExit + + RunAtLoad + StandardOutPath@@LOG_DIR@@/fleet-soak.out.log + StandardErrorPath@@LOG_DIR@@/fleet-soak.err.log + EnvironmentVariables + + HOME@@HOME@@ + PATH@@PATH@@ + AEDEV_SOAK_MS604800000 + AEDEV_SOAK_INTERVAL_MS1000 + AEDEV_NTFY_TOPIC@@NTFY_TOPIC@@ + + + +``` + +```bash +launchctl load ~/Library/LaunchAgents/com.claude247.fleet-soak.plist # install + start +launchctl list | grep fleet-soak # check +launchctl unload ~/Library/LaunchAgents/com.claude247.fleet-soak.plist # stop/remove +``` + +`KeepAlive.SuccessfulExit=false` is the crash-recovery: a crash (or +`kill -9`) restarts the harness; a clean PASS/FAIL exit does not loop. + +## 5. Resume after a crash / kill -9 + +The harness is self-contained per run (each start creates a fresh +`evidence/fleet-soak//`): resume = restart. One step back to standby: + +```bash +launchctl kickstart -k gui/$(id -u)/com.claude247.fleet-soak +# without launchd: +pnpm soak:status start && AEDEV_SOAK_MS=604800000 pnpm test:fleet:soak +``` + +Honesty rule for the report: a restarted soak's wall-clock week starts over +(`soak-pending.json` shows the real `started_at`). Do not stitch two partial +runs into one "week" — record both directories and say what happened. + +## 6. Failure recovery + +| Symptom | Recovery | +|---|---| +| `pnpm soak:status` says `overdue` | The window elapsed without a completion mark. Check `@@LOG_DIR@@/fleet-soak.*.log` and the run's `soak-report.md`; then `pnpm soak:status complete` (report ok) or `fail` (run died) | +| Harness exits non-zero (criterion FAIL) | The report names the failing criterion. Read `metrics.json`, fix, restart (§5). Mark `pnpm soak:status fail` for the dead run | +| launchd crash-loop (`fleet-soak.err.log` repeats) | `launchctl unload`, fix the cause, `launchctl load` again. The plist never restarts after a clean exit, so loops mean a real startup error | +| Mac rebooted mid-soak | `RunAtLoad` restarts it on login; restart-honesty rule of §5 applies | +| Artifact corrupt/missing | Readers fail closed (§2); `pnpm soak:status start` rewrites it (a fresh window — say so in the report) | + +## 7. ntfy wiring + +Same pattern as `scripts/notify-pr-ready.sh` — topic from `AEDEV_NTFY_TOPIC` +(optional self-hosted base via `AEDEV_NTFY_URL`); without a topic it prints +instead of pushing (never blocks): + +```bash +# soak finished (wrap the §1 command): +pnpm soak:status start \ + && if AEDEV_SOAK_MS=604800000 pnpm test:fleet:soak; then + pnpm soak:status complete + curl -fsS -X POST "${AEDEV_NTFY_URL:-https://ntfy.sh}/$AEDEV_NTFY_TOPIC" \ + -H "Title: aedev · fleet soak PASS" -H "Priority: high" \ + -d "one-week soak complete — report in evidence/fleet-soak/" + else + pnpm soak:status fail + curl -fsS -X POST "${AEDEV_NTFY_URL:-https://ntfy.sh}/$AEDEV_NTFY_TOPIC" \ + -H "Title: aedev · fleet soak FAIL" -H "Priority: urgent" \ + -d "soak failed — check evidence/fleet-soak/ and logs" + fi +``` + +Hold-change pushes during the soak are already covered by the daemon-side +watchdog (`packages/daemon/src/watchdog.ts` → `ntfy.ts`): every new +`HOLD-*` (including the forged-evidence drill's `HOLD-EVIDENCE-MISMATCH`) +emits `operator.notify_requested` + an ntfy push when the daemon runs with +`AEDEV_NTFY_TOPIC` set. + +## 8. Report template (real/simulated explicit — GR#7) + +The harness writes `soak-report.md` per run. For the week-long acceptance, +append this classification block before committing the evidence: + +```markdown +## Classification (GR#7) +- real: daemon, HTTP fleet protocol, Ed25519 identities, freeze path, durations +- simulated: task executors (no subscription CLI was spawned) +- unproven-after-this-run: real-CLI multi-machine soak (rubric #19 full check) +- restarts during the week: (directories: ) +``` diff --git a/evidence/loop-cycles/cycle-1-loop-plan-output.txt b/evidence/loop-cycles/cycle-1-loop-plan-output.txt new file mode 100644 index 0000000..ca41055 --- /dev/null +++ b/evidence/loop-cycles/cycle-1-loop-plan-output.txt @@ -0,0 +1,59 @@ + +> aedev@2.4.0-patch1 loop:plan /home/user/claude-code-247 +> tsx scripts/loop-planner.ts + +[loop-planner] running the full test suite (set AEDEV_LOOP_TESTS_GREEN=1|0 to assert instead)… + +=== loop-planner inputs (real, gathered by this run) === +- repoDirty = false + via git status --porcelain → empty +- workbook §0 = current_phase=V6-P3 + via extracted yaml block from /home/user/claude-code-247/WORKBOOK_v6.md +- sotAmbiguous = false + via root workbooks scanned: WORKBOOK_v4.md, WORKBOOK_v6.md · live SoT claimants: WORKBOOK_v6.md +- testsGreen = true + via ran `pnpm test` in this invocation and used its exit code +- budgetVerdict = {"allowed":true,"reason":"no-db"} + via no /root/.aedev/state.db in this environment — nothing has spent headless credit (default-allow with no-db note) +- openHolds = 0 + via max(state.db active holds=0, WORKBOOK §0 open_holds=0) +- prevCyclePrMerged = true + via no previous proposal in evidence/loop-cycles — first cycle + +=== PlanCard (decision) === +{ + "type": "plan", + "title": "Planner proposal · 递归 planner 提案", + "next_step": "Human decision: accept this proposal by starting a session on it, or ignore it. The planner stops here — it never implements, never pushes, never merges.", + "machine": { + "user_state": "planner_proposal", + "stage": "loop-planner", + "hold_code": null, + "pr_gate_code": null + }, + "objective": "One bounded cycle toward: Operator-gated real-proof closeout: real Draft PR URL + real in-repo Gemini verdict artifact (operator Mac, runbook docs/operations/P4-first-real-draft-pr.md). Output stops at evidence + at most a Draft PR — the system never merges (GR#10).", + "phases": [ + "V6-P3" + ], + "acceptance_criteria": [ + "The chosen gap moves with in-repo evidence (GR#7)", + "Output stops at evidence + at most a Draft PR; merge stays human-only (GR#10)" + ], + "risk_level": "low", + "estimated_calls": 0, + "requires_approval": true, + "proposal": { + "gapId": "v6-p3-real-proof-closeout", + "phase": "V6-P3", + "rationale": "Highest-priority open gap by the fixed v6 order (safety_evidence > user_ux > automation > fleet > polish): category=safety_evidence, phase=V6-P3, workbook current_phase=V6-P3.", + "expectedDeliverable": "One bounded cycle toward: Operator-gated real-proof closeout: real Draft PR URL + real in-repo Gemini verdict artifact (operator Mac, runbook docs/operations/P4-first-real-draft-pr.md). Output stops at evidence + at most a Draft PR — the system never merges (GR#10)." + } +} + +=== human text === +PROPOSE cycle 1: gap "v6-p3-real-proof-closeout" (phase V6-P3) + why: Highest-priority open gap by the fixed v6 order (safety_evidence > user_ux > automation > fleet > polish): category=safety_evidence, phase=V6-P3, workbook current_phase=V6-P3. + deliverable: One bounded cycle toward: Operator-gated real-proof closeout: real Draft PR URL + real in-repo Gemini verdict artifact (operator Mac, runbook docs/operations/P4-first-real-draft-pr.md). Output stops at evidence + at most a Draft PR — the system never merges (GR#10). + +Ledger entry written: /home/user/claude-code-247/evidence/loop-cycles/cycle-1.json +loop-planner stops here. A human (or a human-started session) acts on this — never this script. diff --git a/evidence/loop-cycles/cycle-1.json b/evidence/loop-cycles/cycle-1.json new file mode 100644 index 0000000..6819983 --- /dev/null +++ b/evidence/loop-cycles/cycle-1.json @@ -0,0 +1,15 @@ +{ + "cycle": 1, + "decision": { + "action": "propose", + "cycle": { + "gapId": "v6-p3-real-proof-closeout", + "phase": "V6-P3", + "rationale": "Highest-priority open gap by the fixed v6 order (safety_evidence > user_ux > automation > fleet > polish): category=safety_evidence, phase=V6-P3, workbook current_phase=V6-P3.", + "expectedDeliverable": "One bounded cycle toward: Operator-gated real-proof closeout: real Draft PR URL + real in-repo Gemini verdict artifact (operator Mac, runbook docs/operations/P4-first-real-draft-pr.md). Output stops at evidence + at most a Draft PR — the system never merges (GR#10)." + } + }, + "timestamp": "2026-06-11T02:15:38.901Z", + "workbook_phase": "V6-P3", + "chosen_gap": "v6-p3-real-proof-closeout" +} diff --git a/package.json b/package.json index 1bc0b3b..9f92c5f 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,8 @@ "test:e2e:sandbox": "tsx scripts/e2e-sandbox.ts", "test:hermus:mission": "tsx scripts/hermus-mission-smoke.ts", "test:fleet:soak": "tsx scripts/fleet-soak.ts", + "loop:plan": "tsx scripts/loop-planner.ts", + "soak:status": "tsx scripts/soak-status.ts", "test:mission-os:dry-soak": "node --import tsx scripts/mission-os-dry-soak.ts", "test:workbook": "tsx scripts/workbook-acceptance.ts", "typecheck": "pnpm -r typecheck", diff --git a/packages/daemon/src/index.ts b/packages/daemon/src/index.ts index 16c4613..e52dab3 100644 --- a/packages/daemon/src/index.ts +++ b/packages/daemon/src/index.ts @@ -108,6 +108,30 @@ export type { DefaultValidatorSecretStatus, ValidatorSecretResolver, } from './validator-factory.js' +// V6-P4: budget facts for the loop-planner shell (scripts/loop-planner.ts). +export { checkHeadlessBudget, countHeadlessCallsToday } from './headless-budget-guard.js' +export { + CYCLE_PLANNED_EVENT, + GAP_PRIORITY, + appendCycleLedger, + claimsSourceOfTruth, + detectSotAmbiguity, + parseSection0, + planNextCycle, + rebuildCycleLedgerFromEvents, +} from './recursive-planner.js' +export type { + AppendCycleLedgerOptions, + CycleLedgerEntry, + GapCategory, + PhaseGap, + PlanDecision, + PlannerInput, + Section0State, +} from './recursive-planner.js' +// V6-P5: soak-pending status artifact (shell: scripts/soak-status.ts). +export { WEEK_MS, buildSoakPending, deriveSoakStatus, readSoakPending, writeSoakPending } from './soak-status.js' +export type { SoakPending, SoakStatus } from './soak-status.js' export { InterruptionPolicy } from './interruption-policy.js' export type { InterruptionReason, diff --git a/packages/daemon/src/recursive-planner.test.ts b/packages/daemon/src/recursive-planner.test.ts new file mode 100644 index 0000000..80285b0 --- /dev/null +++ b/packages/daemon/src/recursive-planner.test.ts @@ -0,0 +1,236 @@ +/** + * V6-P4 — recursive planner unit matrix. + * + * L1 acceptance (WORKBOOK_v6 §3 V6-P4): every refusal condition has a unit + * test; the planner picks exactly ONE gap by the fixed priority + * safety_evidence > user_ux > automation > fleet > polish (ties broken by + * stable input order); the cycle ledger is reconstructable from events (GR#5). + */ +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { mkdtempSync, readFileSync, readdirSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { AedevDb } from '@aedev/core' +import { + CYCLE_PLANNED_EVENT, + GAP_PRIORITY, + appendCycleLedger, + claimsSourceOfTruth, + detectSotAmbiguity, + parseSection0, + planNextCycle, + rebuildCycleLedgerFromEvents, + type PhaseGap, + type PlanDecision, + type PlannerInput, +} from './recursive-planner.js' + +const SECTION0 = [ + 'schema_version: 6', + 'product: ordinary-user-loop-os', + 'current_phase: V6-P4 # V6-P0..P6', + 'open_holds: 0', + 'merge_policy: human_only', +].join('\n') + +const GAPS: PhaseGap[] = [ + { id: 'g-polish', phase: 'V6-P6', description: 'polish docs wording', category: 'polish' }, + { id: 'g-fleet', phase: 'parked-v5', description: 'real multi-machine fleet', category: 'fleet' }, + { id: 'g-auto', phase: 'V6-P5', description: 'run the one-week real soak', category: 'automation' }, + { id: 'g-ux', phase: 'V6-P6', description: 'ordinary-user acceptance run', category: 'user_ux' }, + { id: 'g-safety', phase: 'V6-P3', description: 'real Draft PR + real Gemini verdict evidence in-repo', category: 'safety_evidence' }, +] + +function baseInput(overrides: Partial = {}): PlannerInput { + return { + workbookSection0: SECTION0, + repoDirty: false, + testsGreen: true, + budgetVerdict: { allowed: true, reason: 'ok' }, + sotAmbiguous: false, + openHolds: 0, + prevCyclePrMerged: true, + phaseGaps: [...GAPS], + ...overrides, + } +} + +function expectRefusal(decision: PlanDecision, reasonRe: RegExp, recoveryRe: RegExp): void { + expect(decision.action).toBe('refuse') + if (decision.action !== 'refuse') return + expect(decision.reason).toMatch(reasonRe) + expect(decision.recovery).toMatch(recoveryRe) +} + +describe('planNextCycle — refusal matrix (V6-P4 L1)', () => { + it('refuses on a dirty working tree with a human recovery action', () => { + const d = planNextCycle(baseInput({ repoDirty: true })) + expectRefusal(d, /dirty/i, /commit|stash/i) + }) + + it('refuses when tests are not green', () => { + const d = planNextCycle(baseInput({ testsGreen: false })) + expectRefusal(d, /test/i, /pnpm test|fix/i) + }) + + it('refuses when the budget verdict blocks, carrying the budget reason', () => { + const d = planNextCycle(baseInput({ budgetVerdict: { allowed: false, reason: 'day_cap' } })) + expectRefusal(d, /budget.*day_cap/is, /AEDEV_BUDGET|wait/i) + }) + + it('refuses when the SoT is ambiguous (more than one root workbook claims SoT)', () => { + const d = planNextCycle(baseInput({ sotAmbiguous: true })) + expectRefusal(d, /source of truth|SoT/i, /SUPERSEDED|single/i) + }) + + it('refuses when there are open holds', () => { + const d = planNextCycle(baseInput({ openHolds: 2 })) + expectRefusal(d, /2 open hold/i, /resolve/i) + }) + + it('refuses when the previous cycle PR is not merged (one in-flight cycle max)', () => { + const d = planNextCycle(baseInput({ prevCyclePrMerged: false })) + expectRefusal(d, /previous cycle/i, /merge|close/i) + }) + + it('refuses when §0 is unparseable (treated as SoT ambiguity)', () => { + const d = planNextCycle(baseInput({ workbookSection0: 'narrative text, no machine state' })) + expectRefusal(d, /§0|section 0/i, /WORKBOOK_v6/i) + }) + + it('refuses honestly when there is no gap to work on', () => { + const d = planNextCycle(baseInput({ phaseGaps: [] })) + expectRefusal(d, /no .*gap/i, /WORKBOOK/i) + }) + + it('refusal precedence: dirty tree is reported first when everything is wrong', () => { + const d = planNextCycle(baseInput({ + repoDirty: true, testsGreen: false, sotAmbiguous: true, openHolds: 3, + budgetVerdict: { allowed: false, reason: 'day_cap' }, prevCyclePrMerged: false, + })) + expectRefusal(d, /dirty/i, /commit|stash/i) + }) +}) + +describe('planNextCycle — single-pick priority (V6-P4 L1)', () => { + it('proposes exactly one cycle and picks the safety_evidence gap over all others', () => { + const d = planNextCycle(baseInput()) + expect(d.action).toBe('propose') + if (d.action !== 'propose') return + expect(d.cycle.gapId).toBe('g-safety') + expect(d.cycle.phase).toBe('V6-P3') + expect(d.cycle.rationale).toMatch(/safety_evidence/) + expect(d.cycle.expectedDeliverable).toContain('real Draft PR + real Gemini verdict evidence in-repo') + expect(d.cycle.expectedDeliverable).toMatch(/Draft PR/i) + }) + + it('follows the strict priority chain when higher categories are absent', () => { + // Drop categories one by one from the top; the next category must win. + const order = ['g-safety', 'g-ux', 'g-auto', 'g-fleet', 'g-polish'] + let gaps = [...GAPS] + for (const expected of order) { + const d = planNextCycle(baseInput({ phaseGaps: gaps })) + expect(d.action).toBe('propose') + if (d.action === 'propose') expect(d.cycle.gapId).toBe(expected) + gaps = gaps.filter((g) => g.id !== expected) + } + }) + + it('breaks ties inside a category by stable input order', () => { + const d = planNextCycle(baseInput({ + phaseGaps: [ + { id: 'safety-first', phase: 'V6-P3', description: 'first listed', category: 'safety_evidence' }, + { id: 'safety-second', phase: 'V6-P3', description: 'second listed', category: 'safety_evidence' }, + ], + })) + expect(d.action).toBe('propose') + if (d.action === 'propose') expect(d.cycle.gapId).toBe('safety-first') + }) + + it('exports the fixed priority order for shells and docs', () => { + expect(GAP_PRIORITY).toEqual(['safety_evidence', 'user_ux', 'automation', 'fleet', 'polish']) + }) +}) + +describe('parseSection0', () => { + it('extracts current_phase / open_holds / merge_policy, stripping comments', () => { + const s = parseSection0(SECTION0) + expect(s).toEqual({ currentPhase: 'V6-P4', openHolds: 0, mergePolicy: 'human_only' }) + }) + + it('returns null when current_phase is missing', () => { + expect(parseSection0('open_holds: 0')).toBeNull() + expect(parseSection0('')).toBeNull() + }) +}) + +describe('SoT claim detection', () => { + const V6_HEAD = '# WORKBOOK v6\n\n> **本文件是当前唯一事实源 (SoT)。** 取代 `WORKBOOK_v4.md`(该文件原地保留并加\n> SUPERSEDED 头)。' + const V4_HEAD = '# WORKBOOK v4\n\n> ⚠️ **SUPERSEDED(2026-06-11)。本文件不再是事实源。**\n\n> **本文件是新的唯一事实源 (SoT)。**(已失效)' + + it('counts a live SoT claim but not a SUPERSEDED one', () => { + expect(claimsSourceOfTruth(V6_HEAD)).toBe(true) + expect(claimsSourceOfTruth(V4_HEAD)).toBe(false) + expect(claimsSourceOfTruth('# random doc')).toBe(false) + }) + + it('is unambiguous with exactly one claimant, ambiguous with zero or two', () => { + expect(detectSotAmbiguity([ + { name: 'WORKBOOK_v6.md', text: V6_HEAD }, + { name: 'WORKBOOK_v4.md', text: V4_HEAD }, + ])).toEqual({ ambiguous: false, claimants: ['WORKBOOK_v6.md'] }) + expect(detectSotAmbiguity([ + { name: 'WORKBOOK_v6.md', text: V6_HEAD }, + { name: 'WORKBOOK_v7.md', text: V6_HEAD }, + ]).ambiguous).toBe(true) + expect(detectSotAmbiguity([{ name: 'WORKBOOK_v4.md', text: V4_HEAD }]).ambiguous).toBe(true) + }) +}) + +describe('cycle ledger (V6-P4 L1 — reconstructable from events, GR#5)', () => { + let dir: string + let db: AedevDb + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'aedev-loop-cycles-')) + db = new AedevDb(':memory:') + }) + + afterEach(() => { + db.close() + rmSync(dir, { recursive: true, force: true }) + }) + + it('appends cycle-1.json then cycle-2.json with the contract fields', () => { + const propose = planNextCycle(baseInput()) + const e1 = appendCycleLedger({ dir, decision: propose, workbookPhase: 'V6-P4' }) + expect(e1.cycle).toBe(1) + const refuse = planNextCycle(baseInput({ repoDirty: true })) + const e2 = appendCycleLedger({ dir, decision: refuse, workbookPhase: 'V6-P4' }) + expect(e2.cycle).toBe(2) + expect(readdirSync(dir).sort()).toEqual(['cycle-1.json', 'cycle-2.json']) + const onDisk = JSON.parse(readFileSync(join(dir, 'cycle-1.json'), 'utf8')) as Record + expect(Object.keys(onDisk).sort()).toEqual(['chosen_gap', 'cycle', 'decision', 'timestamp', 'workbook_phase']) + expect(onDisk['workbook_phase']).toBe('V6-P4') + expect(onDisk['chosen_gap']).toBe('g-safety') + expect(typeof onDisk['timestamp']).toBe('string') + const second = JSON.parse(readFileSync(join(dir, 'cycle-2.json'), 'utf8')) as Record + expect(second['chosen_gap']).toBeNull() + }) + + it('event-sources every entry so the ledger rebuilds from events alone', () => { + appendCycleLedger({ dir, decision: planNextCycle(baseInput()), workbookPhase: 'V6-P4', db }) + appendCycleLedger({ dir, decision: planNextCycle(baseInput({ openHolds: 1 })), workbookPhase: 'V6-P4', db }) + expect(db.queryEvents({ type: CYCLE_PLANNED_EVENT })).toHaveLength(2) + const rebuilt = rebuildCycleLedgerFromEvents(db) + const fromDisk = readdirSync(dir).sort().map((f) => JSON.parse(readFileSync(join(dir, f), 'utf8')) as unknown) + expect(rebuilt).toEqual(fromDisk) + }) + + it('a refused decision never proposes work in the same call (one decision per cycle)', () => { + const refused = planNextCycle(baseInput({ openHolds: 1 })) + expect(refused.action).toBe('refuse') + const entry = appendCycleLedger({ dir, decision: refused, workbookPhase: 'V6-P4' }) + expect(entry.chosen_gap).toBeNull() + }) +}) diff --git a/packages/daemon/src/recursive-planner.ts b/packages/daemon/src/recursive-planner.ts new file mode 100644 index 0000000..bcdafb1 --- /dev/null +++ b/packages/daemon/src/recursive-planner.ts @@ -0,0 +1,241 @@ +/** + * V6-P4 — minimal safe recursive planner (WORKBOOK_v6 §3). + * + * PURE decision logic only. `planNextCycle` reads pre-gathered facts about + * the repo and proposes exactly ONE next cycle, or refuses with a human + * recovery action. All shell probing (git status, workbook reads, budget DB) + * lives in `scripts/loop-planner.ts` — GR#8 keeps child_process out of + * daemon src, and the static eslint guard enforces it. + * + * Hard guardrails (each refusal has its own unit test): + * dirty working tree / red tests / blocked budget / ambiguous SoT / + * open holds / previous cycle PR not merged / unparseable §0 / no gaps. + * + * The planner NEVER implements, never pushes, never merges. Its output is a + * proposal; the produced work of an accepted cycle stops at a Draft PR + * (GR#10 — human merge only). Cycle ledger entries are written to + * `evidence/loop-cycles/cycle-.json` AND event-sourced as + * `planner.cycle_planned` so the ledger rebuilds from events alone (GR#5). + */ +import { mkdirSync, readdirSync, writeFileSync } from 'node:fs' +import { join } from 'node:path' +import type { AedevDb } from '@aedev/core' + +// ---- types ------------------------------------------------------------------- + +export type GapCategory = 'safety_evidence' | 'user_ux' | 'automation' | 'fleet' | 'polish' + +/** Fixed v6 priority — 安全证据 > 普通用户 UX > 自动化 > fleet 规模 > 打磨. */ +export const GAP_PRIORITY: readonly GapCategory[] = [ + 'safety_evidence', + 'user_ux', + 'automation', + 'fleet', + 'polish', +] as const + +export interface PhaseGap { + id: string + phase: string + description: string + category: GapCategory +} + +export interface PlannerInput { + /** Raw text of WORKBOOK_v6 §0 (the machine-readable yaml block). */ + workbookSection0: string + repoDirty: boolean + testsGreen: boolean + budgetVerdict: { allowed: boolean; reason: string } + /** True when ≠1 root workbook claims to be the source of truth. */ + sotAmbiguous: boolean + openHolds: number + /** False while the previous proposed cycle's PR is still unmerged. */ + prevCyclePrMerged: boolean + phaseGaps: PhaseGap[] +} + +export type PlanDecision = + | { action: 'refuse'; reason: string; recovery: string } + | { + action: 'propose' + cycle: { gapId: string; phase: string; rationale: string; expectedDeliverable: string } + } + +// ---- §0 parsing --------------------------------------------------------------- + +export interface Section0State { + currentPhase: string + openHolds: number | null + mergePolicy: string | null +} + +/** Minimal, dependency-free parse of the §0 yaml block. Returns null when the + * block has no `current_phase` — an unparseable SoT means the planner must + * not guess what phase it is in. */ +export function parseSection0(raw: string): Section0State | null { + const field = (name: string): string | null => { + const m = raw.match(new RegExp(`^${name}:\\s*([^\\n#]+)`, 'm')) + return m?.[1] === undefined ? null : m[1].trim() + } + const currentPhase = field('current_phase') + if (currentPhase === null || currentPhase === '') return null + const holdsRaw = field('open_holds') + const holds = holdsRaw === null ? NaN : Number(holdsRaw) + return { + currentPhase, + openHolds: Number.isFinite(holds) ? holds : null, + mergePolicy: field('merge_policy'), + } +} + +// ---- SoT claim detection (pure; the shell reads the files) -------------------- + +const SOT_CLAIM_RE = /本文件是.{0,8}唯一事实源|this file is the .{0,12}source of truth/i +const SUPERSEDED_HEADER_RE = /^>.{0,12}\*\*SUPERSEDED/im + +/** A workbook claims SoT when it carries a live claim line and no SUPERSEDED + * banner. (The live v6 workbook may MENTION the word "SUPERSEDED" while + * describing v4's banner — only a `> **SUPERSEDED` header line disqualifies.) */ +export function claimsSourceOfTruth(text: string): boolean { + return SOT_CLAIM_RE.test(text) && !SUPERSEDED_HEADER_RE.test(text) +} + +export function detectSotAmbiguity( + workbooks: Array<{ name: string; text: string }>, +): { ambiguous: boolean; claimants: string[] } { + const claimants = workbooks.filter((w) => claimsSourceOfTruth(w.text)).map((w) => w.name) + return { ambiguous: claimants.length !== 1, claimants } +} + +// ---- the decision ------------------------------------------------------------- + +const refuse = (reason: string, recovery: string): PlanDecision => ({ action: 'refuse', reason, recovery }) + +export function planNextCycle(input: PlannerInput): PlanDecision { + if (input.repoDirty) { + return refuse( + 'Working tree is dirty — the planner refuses to plan on top of uncommitted state.', + 'Human: review `git status`, then commit or stash the changes before re-running loop:plan.', + ) + } + if (!input.testsGreen) { + return refuse( + 'Test suite is not green — planning forward on a red baseline would bury the regression.', + 'Human: run `GIT_CONFIG_GLOBAL=/tmp/test-gitconfig pnpm test`, fix the failures (or hold the offending change), then re-run loop:plan.', + ) + } + if (!input.budgetVerdict.allowed) { + return refuse( + `Headless budget blocked (${input.budgetVerdict.reason}) — a planner cycle would spend Agent SDK credit past the cap (GR#1).`, + 'Human: wait for the daily window, or raise AEDEV_BUDGET_MAX_HEADLESS_PER_MISSION / AEDEV_BUDGET_MAX_HEADLESS_PER_DAY deliberately, then re-run loop:plan.', + ) + } + if (input.sotAmbiguous) { + return refuse( + 'Source-of-truth is ambiguous — not exactly one root workbook claims to be the current SoT.', + 'Human: keep exactly one live SoT claim; add a `> **SUPERSEDED` banner to every other root workbook, then re-run loop:plan.', + ) + } + if (input.openHolds > 0) { + return refuse( + `${input.openHolds} open hold(s) — holds are owed to a human before any new cycle starts.`, + 'Human: resolve the active holds (`aedev status`, ~/.aedev/logs/holds.md), then re-run loop:plan.', + ) + } + if (!input.prevCyclePrMerged) { + return refuse( + 'The previous cycle\'s PR is not merged — at most one planner cycle may be in flight (GR#10: merge is human-only).', + 'Human: merge or close the previous cycle\'s PR (your decision, never the system\'s), then re-run loop:plan.', + ) + } + const section0 = parseSection0(input.workbookSection0) + if (section0 === null) { + return refuse( + 'WORKBOOK §0 (section 0) is unparseable — the planner cannot know the current phase, which is SoT ambiguity.', + 'Human: restore the machine-readable §0 yaml block in WORKBOOK_v6.md (current_phase et al.), then re-run loop:plan.', + ) + } + if (input.phaseGaps.length === 0) { + return refuse( + 'No known gap to work on — proposing busywork would violate evidence honesty (GR#7).', + 'Human: update the gap registry from the WORKBOOK phase table / assessment before re-running loop:plan.', + ) + } + + // Exactly ONE pick: strict category priority, ties broken by stable order. + const rank = (g: PhaseGap): number => GAP_PRIORITY.indexOf(g.category) + let chosen = input.phaseGaps[0]! + for (const gap of input.phaseGaps) { + if (rank(gap) < rank(chosen)) chosen = gap + } + + return { + action: 'propose', + cycle: { + gapId: chosen.id, + phase: chosen.phase, + rationale: + `Highest-priority open gap by the fixed v6 order (${GAP_PRIORITY.join(' > ')}): ` + + `category=${chosen.category}, phase=${chosen.phase}, workbook current_phase=${section0.currentPhase}.`, + expectedDeliverable: + `One bounded cycle toward: ${chosen.description}. ` + + 'Output stops at evidence + at most a Draft PR — the system never merges (GR#10).', + }, + } +} + +// ---- cycle ledger --------------------------------------------------------------- + +export const CYCLE_PLANNED_EVENT = 'planner.cycle_planned' + +export interface CycleLedgerEntry { + cycle: number + decision: PlanDecision + timestamp: string + workbook_phase: string + chosen_gap: string | null +} + +export interface AppendCycleLedgerOptions { + /** Directory of the ledger, normally `/evidence/loop-cycles`. */ + dir: string + decision: PlanDecision + workbookPhase: string + /** When given, the entry is ALSO event-sourced (GR#5). */ + db?: AedevDb + now?: Date +} + +const CYCLE_FILE_RE = /^cycle-(\d+)\.json$/ + +/** Append `cycle-.json` (n = previous max + 1) and optionally event-source + * the same payload so `rebuildCycleLedgerFromEvents` can reproduce the + * on-disk ledger byte-for-byte at the JSON level. */ +export function appendCycleLedger(opts: AppendCycleLedgerOptions): CycleLedgerEntry & { path: string } { + mkdirSync(opts.dir, { recursive: true }) + const existing = readdirSync(opts.dir) + .map((f) => CYCLE_FILE_RE.exec(f)?.[1]) + .filter((n): n is string => n !== undefined) + .map((n) => Number(n)) + const cycle = existing.length === 0 ? 1 : Math.max(...existing) + 1 + const entry: CycleLedgerEntry = { + cycle, + decision: opts.decision, + timestamp: (opts.now ?? new Date()).toISOString(), + workbook_phase: opts.workbookPhase, + chosen_gap: opts.decision.action === 'propose' ? opts.decision.cycle.gapId : null, + } + const path = join(opts.dir, `cycle-${cycle}.json`) + writeFileSync(path, `${JSON.stringify(entry, null, 2)}\n`) + opts.db?.insertEvent(CYCLE_PLANNED_EVENT, 'planner_cycle', `cycle-${cycle}`, { ...entry }) + return { ...entry, path } +} + +/** GR#5 — the ledger must be reconstructable from events alone. */ +export function rebuildCycleLedgerFromEvents(db: AedevDb): CycleLedgerEntry[] { + return db + .queryEvents({ type: CYCLE_PLANNED_EVENT }) + .map((e) => e.payload as unknown as CycleLedgerEntry) + .sort((a, b) => a.cycle - b.cycle) +} diff --git a/packages/daemon/src/soak-status.test.ts b/packages/daemon/src/soak-status.test.ts new file mode 100644 index 0000000..f682cb7 --- /dev/null +++ b/packages/daemon/src/soak-status.test.ts @@ -0,0 +1,66 @@ +/** + * V6-P5 — `soak-pending.json` status artifact contract + * (docs/operations/SOAK_OPERATIONS.md; shell is scripts/soak-status.ts). + */ +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { + WEEK_MS, + buildSoakPending, + deriveSoakStatus, + readSoakPending, + writeSoakPending, + type SoakPending, +} from './soak-status.js' + +let dir: string +beforeEach(() => { dir = mkdtempSync(join(tmpdir(), 'aedev-soak-status-')) }) +afterEach(() => { rmSync(dir, { recursive: true, force: true }) }) + +describe('soak-pending artifact (V6-P5)', () => { + it('buildSoakPending: started_at + expected_end = start + soakMs, status running', () => { + const start = new Date('2026-06-11T00:00:00.000Z') + const p = buildSoakPending(start, WEEK_MS) + expect(p).toEqual({ + started_at: '2026-06-11T00:00:00.000Z', + expected_end: '2026-06-18T00:00:00.000Z', + status: 'running', + }) + expect(WEEK_MS).toBe(604_800_000) + }) + + it('deriveSoakStatus: running inside the window, overdue once expected_end passes', () => { + const p = buildSoakPending(new Date('2026-06-11T00:00:00Z'), WEEK_MS) + expect(deriveSoakStatus(p, new Date('2026-06-14T00:00:00Z'))).toBe('running') + expect(deriveSoakStatus(p, new Date('2026-06-18T00:00:00.001Z'))).toBe('overdue') + }) + + it('deriveSoakStatus: terminal states are sticky (completed/failed never flip)', () => { + const base = buildSoakPending(new Date('2026-06-11T00:00:00Z'), WEEK_MS) + const done: SoakPending = { ...base, status: 'completed' } + const failed: SoakPending = { ...base, status: 'failed' } + expect(deriveSoakStatus(done, new Date('2099-01-01T00:00:00Z'))).toBe('completed') + expect(deriveSoakStatus(failed, new Date('2026-06-12T00:00:00Z'))).toBe('failed') + }) + + it('write/read roundtrip preserves the exact contract fields', () => { + const path = join(dir, 'soak-pending.json') + const p = buildSoakPending(new Date('2026-06-11T01:02:03Z'), 1000) + writeSoakPending(path, p) + expect(readSoakPending(path)).toEqual(p) + const raw = JSON.parse(readFileSync(path, 'utf8')) as Record + expect(Object.keys(raw).sort()).toEqual(['expected_end', 'started_at', 'status']) + }) + + it('readSoakPending: missing file or invalid content reads as null (fail-closed)', () => { + expect(readSoakPending(join(dir, 'missing.json'))).toBeNull() + const bad = join(dir, 'bad.json') + writeFileSync(bad, 'not json') + expect(readSoakPending(bad)).toBeNull() + const wrongShape = join(dir, 'wrong.json') + writeFileSync(wrongShape, JSON.stringify({ started_at: 1, status: 'running' })) + expect(readSoakPending(wrongShape)).toBeNull() + }) +}) diff --git a/packages/daemon/src/soak-status.ts b/packages/daemon/src/soak-status.ts new file mode 100644 index 0000000..16fe946 --- /dev/null +++ b/packages/daemon/src/soak-status.ts @@ -0,0 +1,66 @@ +/** + * V6-P5 — `soak-pending.json` status artifact (soak operations contract). + * + * Pure logic for the one-week fleet soak's pending-state file at + * `evidence/fleet-soak/soak-pending.json`. The CLI shell is + * `scripts/soak-status.ts` (`pnpm soak:status`); the runbook is + * docs/operations/SOAK_OPERATIONS.md. The artifact lets ANY session (or the + * operator's phone via ntfy) answer "is a soak running, since when, and when + * should it be done" without parsing daemon logs. + */ +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs' +import { dirname } from 'node:path' + +export const WEEK_MS = 604_800_000 + +export type SoakStatus = 'running' | 'completed' | 'overdue' | 'failed' + +export interface SoakPending { + /** ISO timestamp the soak process was started. */ + started_at: string + /** ISO timestamp the soak window elapses (started_at + AEDEV_SOAK_MS). */ + expected_end: string + status: SoakStatus +} + +export function buildSoakPending(startedAt: Date, soakMs: number): SoakPending { + return { + started_at: startedAt.toISOString(), + expected_end: new Date(startedAt.getTime() + soakMs).toISOString(), + status: 'running', + } +} + +/** Time-derived status: terminal states (completed/failed) are sticky; a + * 'running' soak whose window has elapsed without anyone marking it + * completed reads as 'overdue' — honest "needs a human look", never a + * silent fake-complete. */ +export function deriveSoakStatus(pending: SoakPending, now: Date): SoakStatus { + if (pending.status === 'completed' || pending.status === 'failed') return pending.status + return now.getTime() > Date.parse(pending.expected_end) ? 'overdue' : 'running' +} + +export function writeSoakPending(filePath: string, pending: SoakPending): void { + mkdirSync(dirname(filePath), { recursive: true }) + writeFileSync(filePath, `${JSON.stringify(pending, null, 2)}\n`) +} + +const STATUSES: readonly string[] = ['running', 'completed', 'overdue', 'failed'] + +/** Fail-closed reader: missing file, broken JSON, or a wrong shape all read + * as null so callers never act on a half-written artifact. */ +export function readSoakPending(filePath: string): SoakPending | null { + if (!existsSync(filePath)) return null + try { + const raw = JSON.parse(readFileSync(filePath, 'utf8')) as Record + if ( + typeof raw['started_at'] !== 'string' + || typeof raw['expected_end'] !== 'string' + || typeof raw['status'] !== 'string' + || !STATUSES.includes(raw['status']) + ) return null + return { started_at: raw['started_at'], expected_end: raw['expected_end'], status: raw['status'] as SoakStatus } + } catch { + return null + } +} diff --git a/scripts/loop-planner.ts b/scripts/loop-planner.ts new file mode 100644 index 0000000..4d6fa4d --- /dev/null +++ b/scripts/loop-planner.ts @@ -0,0 +1,223 @@ +/** + * V6-P4 — loop-planner CLI shell (`pnpm loop:plan`). + * + * Gathers REAL inputs (git status, WORKBOOK_v6 §0, root SoT claims, headless + * budget from ~/.aedev/state.db when present, open holds, previous-cycle + * ledger) and asks the pure `planNextCycle` (packages/daemon, GR#8: all + * shell/git probing lives HERE, not in daemon src) for exactly one decision. + * + * It then prints the decision as a PlanCard-shaped JSON + human text and + * appends `evidence/loop-cycles/cycle-.json`. IT STOPS THERE: this shell + * never implements, never pushes, never opens a PR — a human (or a human- + * started session) acts on the proposal, and merge is human-only (GR#10). + * + * Knobs (all honest-by-default): + * AEDEV_LOOP_TESTS_GREEN=1|0 assert the suite state instead of running + * `pnpm test` (the run notes the assertion) + * AEDEV_LOOP_PREV_PR_MERGED=1 operator-asserts the previous cycle's PR + * was merged/closed (only needed once a + * previous proposal exists in the ledger) + * AEDEV_HOME where state.db lives (default ~/.aedev) + */ +import { execFileSync } from 'node:child_process' +import { existsSync, readFileSync, readdirSync } from 'node:fs' +import { homedir } from 'node:os' +import { join, resolve } from 'node:path' +import { AedevDb } from '@aedev/core' +import { + appendCycleLedger, + checkHeadlessBudget, + detectSotAmbiguity, + parseSection0, + planNextCycle, + type CycleLedgerEntry, + type PhaseGap, + type PlanDecision, +} from '@aedev/daemon' + +const REPO_ROOT = resolve(process.cwd()) +const LEDGER_DIR = join(REPO_ROOT, 'evidence', 'loop-cycles') +const WORKBOOK = join(REPO_ROOT, 'WORKBOOK_v6.md') + +/** Hand-maintained mirror of the open gaps in WORKBOOK_v6 §3 + the gap + * assessment §2. Honesty note: this registry is data, not discovery — when a + * phase closes, remove its gap here in the same PR that closes it. */ +const GAP_REGISTRY: PhaseGap[] = [ + { + id: 'v6-p3-real-proof-closeout', + phase: 'V6-P3', + category: 'safety_evidence', + description: + 'Operator-gated real-proof closeout: real Draft PR URL + real in-repo Gemini verdict artifact ' + + '(operator Mac, runbook docs/operations/P4-first-real-draft-pr.md)', + }, + { + id: 'v6-p6-ordinary-user-acceptance', + phase: 'V6-P6', + category: 'user_ux', + description: 'Ordinary-user acceptance: usability E2E across the five cards + final scored report in docs/assessments/', + }, + { + id: 'v6-p5-one-week-real-soak', + phase: 'V6-P5', + category: 'automation', + description: 'Run the one-week unattended soak per docs/operations/SOAK_OPERATIONS.md and land its evidence in-repo', + }, + { + id: 'v5-byo-fleet-real-machines', + phase: 'parked-v5', + category: 'fleet', + description: 'Real multi-machine BYO fleet (parked by WORKBOOK_v6 §6; inherits the v5 hard rules)', + }, + { + id: 'post-acceptance-docs-polish', + phase: 'V6-P6', + category: 'polish', + description: 'README/docs polish after the ordinary-user acceptance lands', + }, +] + +interface GatherNote { fact: string; value: string; how: string } +const notes: GatherNote[] = [] +const note = (fact: string, value: string, how: string): void => { notes.push({ fact, value, how }) } + +// ---- 1. git status -------------------------------------------------------- +const porcelain = execFileSync('git', ['status', '--porcelain'], { cwd: REPO_ROOT, encoding: 'utf8' }).trim() +const repoDirty = porcelain.length > 0 +note('repoDirty', String(repoDirty), repoDirty ? `git status --porcelain → ${porcelain.split('\n').length} entr(ies)` : 'git status --porcelain → empty') + +// ---- 2. WORKBOOK §0 -------------------------------------------------------- +const workbookText = existsSync(WORKBOOK) ? readFileSync(WORKBOOK, 'utf8') : '' +const section0Match = workbookText.match(/## §0[^\n]*\n+```yaml\n([\s\S]*?)```/) +const workbookSection0 = section0Match?.[1] ?? '' +const section0 = parseSection0(workbookSection0) +note('workbook §0', section0 ? `current_phase=${section0.currentPhase}` : 'UNPARSEABLE', `extracted yaml block from ${WORKBOOK}`) + +// ---- 3. SoT ambiguity ------------------------------------------------------- +const rootWorkbooks = readdirSync(REPO_ROOT) + .filter((f) => /^WORKBOOK.*\.md$/.test(f)) + .map((name) => ({ name, text: readFileSync(join(REPO_ROOT, name), 'utf8') })) +const sot = detectSotAmbiguity(rootWorkbooks) +note('sotAmbiguous', String(sot.ambiguous), `root workbooks scanned: ${rootWorkbooks.map((w) => w.name).join(', ')} · live SoT claimants: ${sot.claimants.join(', ') || '(none)'}`) + +// ---- 4. tests ---------------------------------------------------------------- +let testsGreen: boolean +const assertedTests = process.env['AEDEV_LOOP_TESTS_GREEN'] +if (assertedTests === '1' || assertedTests === '0') { + testsGreen = assertedTests === '1' + note('testsGreen', String(testsGreen), 'operator-asserted via AEDEV_LOOP_TESTS_GREEN (not verified by this run)') +} else { + console.log('[loop-planner] running the full test suite (set AEDEV_LOOP_TESTS_GREEN=1|0 to assert instead)…') + try { + execFileSync('pnpm', ['test'], { cwd: REPO_ROOT, stdio: ['ignore', 'ignore', 'inherit'] }) + testsGreen = true + } catch { + testsGreen = false + } + note('testsGreen', String(testsGreen), 'ran `pnpm test` in this invocation and used its exit code') +} + +// ---- 5. budget + holds from ~/.aedev/state.db -------------------------------- +const aedevHome = process.env['AEDEV_HOME'] ?? join(homedir(), '.aedev') +const dbPath = join(aedevHome, 'state.db') +let db: AedevDb | undefined +let budgetVerdict: { allowed: boolean; reason: string } +let dbHolds = 0 +if (existsSync(dbPath)) { + db = new AedevDb(dbPath) + const v = checkHeadlessBudget(db, 'loop-planner') + budgetVerdict = { allowed: v.allowed, reason: v.reason } + dbHolds = db.listActiveHolds().length + note('budgetVerdict', JSON.stringify(budgetVerdict), `cost.headless_call events in ${dbPath} vs env limits`) + note('openHolds(db)', String(dbHolds), `active holds in ${dbPath}`) +} else { + budgetVerdict = { allowed: true, reason: 'no-db' } + note('budgetVerdict', JSON.stringify(budgetVerdict), `no ${dbPath} in this environment — nothing has spent headless credit (default-allow with no-db note)`) +} +const section0Holds = section0?.openHolds ?? 0 +const openHolds = Math.max(dbHolds, section0Holds) +note('openHolds', String(openHolds), `max(state.db active holds=${dbHolds}, WORKBOOK §0 open_holds=${section0Holds})`) + +// ---- 6. previous cycle ----------------------------------------------------------- +let prevCyclePrMerged = true +let prevHow = 'no previous proposal in evidence/loop-cycles — first cycle' +if (existsSync(LEDGER_DIR)) { + const prevProposals = readdirSync(LEDGER_DIR) + .filter((f) => /^cycle-\d+\.json$/.test(f)) + .map((f) => JSON.parse(readFileSync(join(LEDGER_DIR, f), 'utf8')) as CycleLedgerEntry) + .filter((e) => e.decision.action === 'propose') + if (prevProposals.length > 0) { + prevCyclePrMerged = process.env['AEDEV_LOOP_PREV_PR_MERGED'] === '1' + prevHow = prevCyclePrMerged + ? `${prevProposals.length} previous proposal(s); operator-asserted merged via AEDEV_LOOP_PREV_PR_MERGED=1` + : `${prevProposals.length} previous proposal(s) in the ledger and AEDEV_LOOP_PREV_PR_MERGED is not 1 — treated as unmerged (fail-closed)` + } +} +note('prevCyclePrMerged', String(prevCyclePrMerged), prevHow) + +// ---- 7. decide ------------------------------------------------------------------- +const decision: PlanDecision = planNextCycle({ + workbookSection0, + repoDirty, + testsGreen, + budgetVerdict, + sotAmbiguous: sot.ambiguous, + openHolds, + prevCyclePrMerged, + phaseGaps: GAP_REGISTRY, +}) + +// ---- 8. ledger + output (the shell STOPS here — GR#10) ---------------------------- +const entry = appendCycleLedger({ + dir: LEDGER_DIR, + decision, + workbookPhase: section0?.currentPhase ?? 'unknown', + ...(db !== undefined ? { db } : {}), +}) +db?.close() + +const card = decision.action === 'propose' + ? { + type: 'plan' as const, + title: 'Planner proposal · 递归 planner 提案', + next_step: + 'Human decision: accept this proposal by starting a session on it, or ignore it. ' + + 'The planner stops here — it never implements, never pushes, never merges.', + machine: { user_state: 'planner_proposal', stage: 'loop-planner', hold_code: null, pr_gate_code: null }, + objective: decision.cycle.expectedDeliverable, + phases: [decision.cycle.phase], + acceptance_criteria: [ + 'The chosen gap moves with in-repo evidence (GR#7)', + 'Output stops at evidence + at most a Draft PR; merge stays human-only (GR#10)', + ], + risk_level: 'low' as const, + estimated_calls: 0, + requires_approval: true, + proposal: decision.cycle, + } + : { + type: 'blocker' as const, + title: 'Planner refused · 递归 planner 拒绝规划', + next_step: decision.recovery, + machine: { user_state: 'planner_refused', stage: 'loop-planner', hold_code: null, pr_gate_code: null }, + human_explanation: decision.reason, + why_it_matters: 'Planning on top of an unsafe baseline (dirty tree / red tests / blocked budget / ambiguous SoT / open holds) would burn credit and bury problems.', + recovery_actions: [decision.recovery], + recommended_action: decision.recovery, + } + +console.log('\n=== loop-planner inputs (real, gathered by this run) ===') +for (const n of notes) console.log(`- ${n.fact} = ${n.value}\n via ${n.how}`) +console.log('\n=== PlanCard (decision) ===') +console.log(JSON.stringify(card, null, 2)) +console.log('\n=== human text ===') +if (decision.action === 'propose') { + console.log(`PROPOSE cycle ${entry.cycle}: gap "${decision.cycle.gapId}" (phase ${decision.cycle.phase})`) + console.log(` why: ${decision.cycle.rationale}`) + console.log(` deliverable: ${decision.cycle.expectedDeliverable}`) +} else { + console.log(`REFUSE cycle ${entry.cycle}: ${decision.reason}`) + console.log(` recovery: ${decision.recovery}`) +} +console.log(`\nLedger entry written: ${entry.path}`) +console.log('loop-planner stops here. A human (or a human-started session) acts on this — never this script.') diff --git a/scripts/soak-status.ts b/scripts/soak-status.ts new file mode 100644 index 0000000..9a80e42 --- /dev/null +++ b/scripts/soak-status.ts @@ -0,0 +1,78 @@ +/** + * V6-P5 — soak-pending status CLI (`pnpm soak:status -- `). + * + * Writes/reads the `evidence/fleet-soak/soak-pending.json` artifact + * (contract: packages/daemon/src/soak-status.ts; runbook: + * docs/operations/SOAK_OPERATIONS.md). + * + * pnpm soak:status read + derive current status + * pnpm soak:status -- start mark a soak started now + * (window = AEDEV_SOAK_MS, default 1 week) + * pnpm soak:status -- complete mark the soak completed (terminal) + * pnpm soak:status -- fail mark the soak failed (terminal) + * + * Override the artifact path with AEDEV_SOAK_PENDING_PATH (used by the + * launchd wrapper so the artifact follows the evidence directory). + */ +import { join, resolve } from 'node:path' +import { + WEEK_MS, + buildSoakPending, + deriveSoakStatus, + readSoakPending, + writeSoakPending, + type SoakPending, +} from '@aedev/daemon' + +const PATH = process.env['AEDEV_SOAK_PENDING_PATH'] + ?? join(resolve(process.cwd()), 'evidence', 'fleet-soak', 'soak-pending.json') +// pnpm forwards a literal '--' separator; ignore it. +const command = process.argv.slice(2).filter((a) => a !== '--')[0] ?? 'status' +const now = new Date() + +function print(p: SoakPending | null): void { + if (p === null) { + console.log(`no soak pending (${PATH} missing or invalid)`) + return + } + const derived = deriveSoakStatus(p, now) + console.log(JSON.stringify({ ...p, status: derived }, null, 2)) + if (derived === 'overdue') { + console.log('NOTE: the soak window has elapsed without a completion mark — generate the report' + + ' (see docs/operations/SOAK_OPERATIONS.md §failure-recovery) and then `pnpm soak:status -- complete`.') + } +} + +switch (command) { + case 'start': { + const soakMs = Number(process.env['AEDEV_SOAK_MS'] ?? WEEK_MS) + const pending = buildSoakPending(now, Number.isFinite(soakMs) && soakMs > 0 ? soakMs : WEEK_MS) + writeSoakPending(PATH, pending) + console.log(`soak-pending written: ${PATH}`) + print(pending) + break + } + case 'complete': + case 'fail': { + const existing = readSoakPending(PATH) + if (existing === null) { + console.error(`cannot mark "${command}": no valid soak-pending at ${PATH}`) + process.exitCode = 1 + break + } + const updated: SoakPending = { ...existing, status: command === 'complete' ? 'completed' : 'failed' } + writeSoakPending(PATH, updated) + print(updated) + break + } + case 'status': { + const p = readSoakPending(PATH) + print(p) + if (p === null) process.exitCode = 1 + break + } + default: { + console.error(`unknown command "${command}" — use: status | start | complete | fail`) + process.exitCode = 1 + } +}