diff --git a/WORKBOOK_v6.md b/WORKBOOK_v6.md index 5ff363a..52a6869 100644 --- a/WORKBOOK_v6.md +++ b/WORKBOOK_v6.md @@ -18,10 +18,10 @@ schema_version: 6 product: ordinary-user-loop-os version_target: loop-os-v1 current_phase: V6-P6 # V6-P0..P6,见 §3;P4/P5 代码随本 PR 落地 -current_substep: p3_real_proof_still_operator_gated # P3 真证明仍待操作员(真 Draft PR + 真判词) +current_substep: v6p3_hold_real_proof_credentials # P3 真证明仍待操作员(真 Draft PR + 真判词) last_session_id: s_v6_0003 open_holds: 0 -blocked_on: none +blocked_on: operator_real_proof test_baseline: 889 # main 基线,0 fail;本周期任何回归即闸红 merge_policy: human_only # 系统永不 merge;auto-merge 本周期禁用 # next_action 硬上限 2 行: diff --git a/docs/SESSION_LOG_v3.md b/docs/SESSION_LOG_v3.md index 2e60d36..fa4f30e 100644 --- a/docs/SESSION_LOG_v3.md +++ b/docs/SESSION_LOG_v3.md @@ -1,5 +1,12 @@ # SESSION LOG v3 +## s_v6_0004 · 2026-06-11 · V6-P3 cycle-4 attempt → honest HOLD-REAL-PROOF-CREDENTIALS + +- Planner chain (real, committed): env false-red REFUSE (cycle-2) → dirty-tree honest REFUSE (cycle-3) → clean PROPOSE cycle-4 = v6-p3-real-proof-closeout. +- Container lacks codex/gh/gemini CLIs, AEDEV_GEMINI_API_KEY, and a registered safe repo → real Draft PR + real Gemini verdict CANNOT be produced here; HOLD with exact 5-step operator recovery in evidence/v6/real-proof/ (GR#6/#7, no fabrication). +- Regression proofs re-run and captured (41 passed): whitelist-off/off-list blocks, Gemini non-PASS blocks create-pr, missing Gemini key fails closed. +- §0: blocked_on=operator_real_proof. Machine exit unchanged: Draft PR only, never merge (GR#10). + ## s_v6_0003 · 2026-06-11 · V6-P4 + V6-P5 code complete · recursive planner + soak operationalization - **V6-P4 recursive planner** (TDD, 20 tests first): `packages/daemon/src/recursive-planner.ts` — pure `planNextCycle` refuses on dirty tree / red tests / blocked budget (carries the budget reason) / ambiguous SoT (≠1 live root-workbook claim, `detectSotAmbiguity`) / open holds / unmerged previous-cycle PR / unparseable §0 / empty gap registry, each with a human recovery action; otherwise proposes exactly ONE gap by the fixed order safety_evidence > user_ux > automation > fleet > polish (ties by stable input order). Cycle ledger `appendCycleLedger` → `evidence/loop-cycles/cycle-.json` {decision, timestamp, workbook_phase, chosen_gap} AND event-sources `planner.cycle_planned` so `rebuildCycleLedgerFromEvents` reproduces the on-disk ledger (GR#5, tested). GR#8 kept: zero child_process in daemon src (eslint static guard stays green). diff --git a/evidence/loop-cycles/cycle-2.json b/evidence/loop-cycles/cycle-2.json new file mode 100644 index 0000000..f6df336 --- /dev/null +++ b/evidence/loop-cycles/cycle-2.json @@ -0,0 +1,11 @@ +{ + "cycle": 2, + "decision": { + "action": "refuse", + "reason": "Test suite is not green — planning forward on a red baseline would bury the regression.", + "recovery": "Human: run `GIT_CONFIG_GLOBAL=/tmp/test-gitconfig pnpm test`, fix the failures (or hold the offending change), then re-run loop:plan." + }, + "timestamp": "2026-06-11T04:35:36.715Z", + "workbook_phase": "V6-P6", + "chosen_gap": null +} diff --git a/evidence/loop-cycles/cycle-3.json b/evidence/loop-cycles/cycle-3.json new file mode 100644 index 0000000..d00d4e9 --- /dev/null +++ b/evidence/loop-cycles/cycle-3.json @@ -0,0 +1,11 @@ +{ + "cycle": 3, + "decision": { + "action": "refuse", + "reason": "Working tree is dirty — the planner refuses to plan on top of uncommitted state.", + "recovery": "Human: review `git status`, then commit or stash the changes before re-running loop:plan." + }, + "timestamp": "2026-06-11T04:36:32.674Z", + "workbook_phase": "V6-P6", + "chosen_gap": null +} diff --git a/evidence/loop-cycles/cycle-4.json b/evidence/loop-cycles/cycle-4.json new file mode 100644 index 0000000..b32f367 --- /dev/null +++ b/evidence/loop-cycles/cycle-4.json @@ -0,0 +1,15 @@ +{ + "cycle": 4, + "decision": { + "action": "propose", + "cycle": { + "gapId": "v6-p3-real-proof-closeout", + "phase": "V6-P3", + "rationale": "Highest-priority open gap by the fixed v6 order (safety_evidence > user_ux > automation > fleet > polish): category=safety_evidence, phase=V6-P3, workbook current_phase=V6-P6.", + "expectedDeliverable": "One bounded cycle toward: Operator-gated real-proof closeout: real Draft PR URL + real in-repo Gemini verdict artifact (operator Mac, runbook docs/operations/P4-first-real-draft-pr.md). Output stops at evidence + at most a Draft PR — the system never merges (GR#10)." + } + }, + "timestamp": "2026-06-11T04:37:19.155Z", + "workbook_phase": "V6-P6", + "chosen_gap": "v6-p3-real-proof-closeout" +} diff --git a/evidence/v6/real-proof/2026-06-11T04-37-43Z/README.md b/evidence/v6/real-proof/2026-06-11T04-37-43Z/README.md new file mode 100644 index 0000000..d0e3b47 --- /dev/null +++ b/evidence/v6/real-proof/2026-06-11T04-37-43Z/README.md @@ -0,0 +1,25 @@ +# V6-P3 Real-Proof Closeout — cycle 4 attempt (2026-06-11T04-37-43Z, cloud container) + +**Outcome: HOLD-REAL-PROOF-CREDENTIALS(诚实 HOLD,非失败)。** +真实 Draft PR 与真实 Gemini 判词无法在本容器产生——按 GR#6/#7 进入 HOLD 并给出精确恢复动作,绝不伪造。 + +## 前置探测(本目录 probes.txt 为原始输出) +- codex CLI: ABSENT · gh CLI: ABSENT · gemini CLI: ABSENT +- claude CLI: present (2.1.172) +- AEDEV_GEMINI_API_KEY: ABSENT · ~/.aedev/repos.yaml(注册 safe repo): ABSENT + +## 已完成(real,证据在本目录) +- 递归 planner 真实运行链:env 假红 REFUSE(cycle-2) → 脏树诚实 REFUSE(cycle-3) → 干净树 PROPOSE cycle-4 = 本 gap(evidence/loop-cycles/) +- 回归证明 tests.txt(41 passed):白名单关/名单外 → REMOTE_WRITES_DISABLED / REPO_NOT_WHITELISTED;Gemini 非 PASS → create-pr 阻断;Gemini key 缺失 → validator 抛错(fail-closed,绝不当 pass) + +## 缺失的精确操作员动作(在 Mac 上,约 20 分钟) +1. git pull && pnpm install +2. .env 配 AEDEV_GEMINI_API_KEY;确认 claude/codex 订阅 CLI、gh 已登录 +3. export AEDEV_ALLOW_REMOTE_WRITES=1 AEDEV_REMOTE_WRITE_WHITELIST=hermus-agent +4. 按 docs/operations/P4-first-real-draft-pr.md 跑完整 mission → 真 Draft PR URL +5. 把本目录的 draft-pr-url.txt / gemini-verdict.json / mission-events.jsonl / changed-paths.json / 截图填实,提交到本分支 + +## Real vs Simulated +- Real:planner 运行、回归测试、前置探测 +- Simulated:无(本周期未模拟任何"真实证明") +- Unproven:真 Draft PR、真 Gemini 判词(即本 HOLD 的标的) diff --git a/evidence/v6/real-proof/2026-06-11T04-37-43Z/changed-paths.json b/evidence/v6/real-proof/2026-06-11T04-37-43Z/changed-paths.json new file mode 100644 index 0000000..fb7f339 --- /dev/null +++ b/evidence/v6/real-proof/2026-06-11T04-37-43Z/changed-paths.json @@ -0,0 +1 @@ +{"note":"placeholder — to be replaced by the operator Mac mission run"} diff --git a/evidence/v6/real-proof/2026-06-11T04-37-43Z/draft-pr-url.txt b/evidence/v6/real-proof/2026-06-11T04-37-43Z/draft-pr-url.txt new file mode 100644 index 0000000..e827eb3 --- /dev/null +++ b/evidence/v6/real-proof/2026-06-11T04-37-43Z/draft-pr-url.txt @@ -0,0 +1 @@ +HOLD-REAL-PROOF-CREDENTIALS: no real PR produced in container; see README.md step 1-5 for operator recovery diff --git a/evidence/v6/real-proof/2026-06-11T04-37-43Z/gemini-verdict.json b/evidence/v6/real-proof/2026-06-11T04-37-43Z/gemini-verdict.json new file mode 100644 index 0000000..5ca8867 --- /dev/null +++ b/evidence/v6/real-proof/2026-06-11T04-37-43Z/gemini-verdict.json @@ -0,0 +1 @@ +{"status":"HOLD-REAL-PROOF-CREDENTIALS","reason":"AEDEV_GEMINI_API_KEY absent in container; real verdict must be produced on operator Mac and committed here","verdict":null} diff --git a/evidence/v6/real-proof/2026-06-11T04-37-43Z/mission-events.jsonl b/evidence/v6/real-proof/2026-06-11T04-37-43Z/mission-events.jsonl new file mode 100644 index 0000000..51969d2 --- /dev/null +++ b/evidence/v6/real-proof/2026-06-11T04-37-43Z/mission-events.jsonl @@ -0,0 +1 @@ +{"note":"placeholder — operator Mac mission events will land here"} diff --git a/evidence/v6/real-proof/2026-06-11T04-37-43Z/probes.txt b/evidence/v6/real-proof/2026-06-11T04-37-43Z/probes.txt new file mode 100644 index 0000000..b3ca531 --- /dev/null +++ b/evidence/v6/real-proof/2026-06-11T04-37-43Z/probes.txt @@ -0,0 +1,3 @@ +claude: 2.1.172 (Claude Code) +AEDEV_GEMINI_API_KEY: ABSENT +repos.yaml: ABSENT diff --git a/evidence/v6/real-proof/2026-06-11T04-37-43Z/tests.txt b/evidence/v6/real-proof/2026-06-11T04-37-43Z/tests.txt new file mode 100644 index 0000000..66ff7b3 --- /dev/null +++ b/evidence/v6/real-proof/2026-06-11T04-37-43Z/tests.txt @@ -0,0 +1,2 @@ +Full gates: 889 passed / 0 failed / 6 skipped +Targeted regression (draft-pr-gate, remote-write-policy, server, gemini-validator): 41 passed / 1 skipped