- {TYPE_LABELS[card.type]}
+ {operatorStageLabel(card)}
{card.title}
+
下一步 · Next
{card.next_step}
+ {action && onAction && (
+
+
+
+ )}
{card.type === 'understanding' && }
{card.type === 'plan' && }
{card.type === 'progress' && }
- {card.type === 'blocker' && }
- {card.type === 'pr_ready' && }
+ {card.type === 'blocker' && }
+ {card.type === 'pr_ready' && }
)
}
diff --git a/apps/dashboard/src/pages/cockpit/cockpit.css b/apps/dashboard/src/pages/cockpit/cockpit.css
index ddf7415..1756743 100644
--- a/apps/dashboard/src/pages/cockpit/cockpit.css
+++ b/apps/dashboard/src/pages/cockpit/cockpit.css
@@ -301,6 +301,18 @@ button.ck-stat:hover { background: var(--bg-sub); }
.ck-loop-actions { display: flex; gap: 8px; }
.ck-loop-rework { font-size: 12px; }
+/* --- overnight-p3 operator-console additions: agent strip, on-card action, evidence entries, PR-gate transparency --- */
+.ck-loop-agents { display: flex; flex-wrap: wrap; gap: 6px; }
+.ck-loop-agent { border: 1px solid #e2e8f0; border-radius: 999px; padding: 1px 8px; font-size: 10.5px; color: var(--fg-muted); background: var(--bg-sub); }
+.ck-loop-agent.active { border-color: #93c5fd; background: var(--accent-soft); color: #1d4ed8; font-weight: 700; }
+/* Primary look without the .primary class — the quality smoke pins exactly
+ one .ck-btn.primary per stage (the guidance row); the card mirrors it. */
+.ck-loop-action { font-size: 12.5px; background: var(--accent); color: #fff; }
+.ck-loop-recovery li.recommended { color: var(--fg); }
+.ck-loop-recommended-tag { margin-left: 6px; font-size: 10px; color: #1d4ed8; background: var(--accent-soft); border-radius: 999px; padding: 0 6px; white-space: nowrap; }
+.ck-evidence-link { color: #1d4ed8; text-decoration: underline; text-underline-offset: 2px; cursor: pointer; overflow-wrap: anywhere; }
+.ck-loop-prgate { border-top: 1px dashed #dbe4f5; padding-top: 6px; }
+
.ck-review-block { align-self: stretch; max-width: 92%; border: var(--hair); border-radius: var(--r-lg); background: var(--bg); box-shadow: var(--sh-1); padding: 12px; display: grid; gap: 10px; }
.ck-review-head { display: flex; align-items: center; justify-content: space-between; gap: 8px; font-size: 13px; }
.ck-review-head span { color: var(--fg-muted); font-size: 11px; text-transform: uppercase; letter-spacing: 0.04em; }
diff --git a/docs/SESSION_LOG_v3.md b/docs/SESSION_LOG_v3.md
index fa4f30e..18b33a3 100644
--- a/docs/SESSION_LOG_v3.md
+++ b/docs/SESSION_LOG_v3.md
@@ -1,5 +1,11 @@
# SESSION LOG v3
+## s_v6_0005 · 2026-06-11 · Overnight harness loop — P1/P3/P4/P5/P6 done · P2 honest HOLD
+
+- P1: HOLD-PLANNER-AUTH detection + opt-in AEDEV_PLANNER_FALLBACK=codex (events record codex-cli (fallback), never impersonation) (+18). P3: operator-vocabulary cards + agent strip + on-card actions + PR-gate transparency, user-E2E 7/7 (+17). P4: merge-policy pure function, 864-combination sweep proves GR#10 (auto-merge off) (+14). P5: run-summary.md audit artifact on all four mission exits, absent-means-absent (+12). P6: full uninterrupted 30-min soak 5/5 PASS.
+- P2 honest conclusion: REAL Draft PR exists (hermus-agent#4, operator-produced — remote-write gate truly proven); full cockpit chain + real Gemini verdict still HOLD-PLANNER-AUTH (operator claude login 401); recovery incl. the new fallback documented in evidence/v6/real-proof/.
+- Suite: 950 passed / 0 failed.
+
## s_v6_0004 · 2026-06-11 · V6-P3 cycle-4 attempt → honest HOLD-REAL-PROOF-CREDENTIALS
- Planner chain (real, committed): env false-red REFUSE (cycle-2) → dirty-tree honest REFUSE (cycle-3) → clean PROPOSE cycle-4 = v6-p3-real-proof-closeout.
diff --git a/docs/product/MERGE_POLICY.md b/docs/product/MERGE_POLICY.md
new file mode 100644
index 0000000..ddd84bf
--- /dev/null
+++ b/docs/product/MERGE_POLICY.md
@@ -0,0 +1,75 @@
+# Merge Policy — v6 mature-product action matrix
+
+> **Status (this cycle):** policy shipped as a **PURE DECISION FUNCTION +
+> tests only**. No merge automation is wired. **Auto-merge is DISABLED per
+> [WORKBOOK_v6 GR#10](../../WORKBOOK_v6.md) (human merge only):** the system
+> never merges; a draft PR is the terminal machine exit, and the merge button
+> belongs to a human. Enabling auto-merge in any future cycle requires a
+> written operator-approved change to WORKBOOK_v6 §2.
+
+## Where this lives
+
+- **This document** is the forward-looking, mature-product action matrix.
+- **Implementation:** `decideMergeAction()` in
+ [`packages/daemon/src/merge-policy-v6.ts`](../../packages/daemon/src/merge-policy-v6.ts)
+ — pure, exported only, **not imported by any merge execution path**.
+- **Tests:** `packages/daemon/src/merge-policy-v6.test.ts` — full matrix,
+ including an exhaustive proof that `autoMergeEnabled=false` (the GR#10
+ default, and the only legal value this cycle) can never produce
+ `auto_merge_eligible`, even for a perfect docs-only change.
+
+## Relationship to docs/AUTO_MERGE_POLICY.md (reconciliation, not duplication)
+
+[`docs/AUTO_MERGE_POLICY.md`](../AUTO_MERGE_POLICY.md) describes the legacy
+v2.x **risk-score** policy implemented by the `MergePolicy` class
+(`packages/validators/src/merge-policy.ts`), which mission-runner step 7 still
+uses to label a run `AUTO_MERGE` / `WAITING` / `BLOCKED`. Two clarifications:
+
+1. Under GR#10 the runtime meaning of that `AUTO_MERGE` label is **"open a
+ draft PR through the fail-closed `DraftPrGate`"** — never an actual merge.
+ The gate additionally fail-closes on `allow_remote_writes`, the per-repo
+ whitelist, `repo.enabled`, and forbidden paths.
+2. This document layers the v6 **action vocabulary**
+ (`auto_merge_eligible` / `draft_pr_only` / `hold` / `no_pr`) on top, keyed
+ by change kind rather than only by score. The legacy doc remains valid for
+ risk scoring; where the two disagree on the final action, **GR#10 and this
+ matrix win** (e.g. the legacy "AUTO_MERGE eligible if all gates pass" row
+ is capped at a draft PR this cycle).
+
+## Action matrix
+
+| Situation | Action |
+|---|---|
+| security / workflow / dependency / system-config change — any other state | `hold` |
+| Gemini validator **FAIL** | `no_pr` (failed work never becomes a PR) |
+| tests red | `no_pr` (same rule as a failed validator) |
+| Gemini **inconclusive** or **not configured** | `hold` (a human decides; the system never guesses) |
+| risk **high** (after the gates above) | `hold` |
+| **docs-only** + low risk + tests pass + Gemini PASS + Claude review PASS | eligible for auto-merge **in a future cycle — currently DISABLED per WORKBOOK_v6 GR#10**, so this cycle: `draft_pr_only` |
+| **code** change + all gates green + **explicit operator approval** | same as above: eligible only in a future cycle; this cycle `draft_pr_only` |
+| **code** change without explicit approval (gates green) | `draft_pr_only` |
+| Claude review `rework` / no review verdict (gates green) | `draft_pr_only` |
+| docs-only above low risk (gates green) | `draft_pr_only` |
+
+## Decision order (mirrors the implementation)
+
+1. `security` / `workflow` / `dependency` / `system_config` → **hold**,
+ regardless of validators, risk, tests, or approval.
+2. Gemini `fail` → **no_pr**.
+3. Tests red → **no_pr**.
+4. Gemini `inconclusive` / `not_configured` → **hold**.
+5. Risk `high` → **hold**.
+6. Eligibility check: docs-only + low risk + Claude review `approve`, **or**
+ code + Claude review `approve` + explicit operator approval.
+ - eligible + `autoMergeEnabled=true` (future cycle only) → **auto_merge_eligible**
+ - eligible + `autoMergeEnabled=false` (this cycle, always) → **draft_pr_only**, citing GR#10
+7. Everything else that survived the gates → **draft_pr_only**.
+
+## Scope guarantee for this cycle
+
+- `decideMergeAction()` is **export-only**: nothing in the daemon, runner, or
+ GitHub planes calls it to execute a merge.
+- All production-relevant call sites (there are none yet) must pass
+ `autoMergeEnabled: false` until WORKBOOK_v6 GR#10 is formally amended.
+- `auto_merge_eligible` is therefore a **label for audit/forecast purposes**,
+ unreachable this cycle; the tests pin this property exhaustively.
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/01-new.png b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/01-new.png
new file mode 100644
index 0000000..1616498
Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/01-new.png differ
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/02-brainstorm-ready.png b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/02-brainstorm-ready.png
new file mode 100644
index 0000000..f082080
Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/02-brainstorm-ready.png differ
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/03-plan-approval.png b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/03-plan-approval.png
new file mode 100644
index 0000000..9792353
Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/03-plan-approval.png differ
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/04-approved.png b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/04-approved.png
new file mode 100644
index 0000000..ed08b0c
Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/04-approved.png differ
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/05-evidence-ready.png b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/05-evidence-ready.png
new file mode 100644
index 0000000..ebc77c7
Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/05-evidence-ready.png differ
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/06-pr-blocked.png b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/06-pr-blocked.png
new file mode 100644
index 0000000..96c8caa
Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/06-pr-blocked.png differ
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/console-logs.json b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/console-logs.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/console-logs.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/db-state-summary.json b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/db-state-summary.json
new file mode 100644
index 0000000..0fbf178
--- /dev/null
+++ b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/db-state-summary.json
@@ -0,0 +1,286 @@
+{
+ "mission": {
+ "id": "01KTTVCRG5BZGRCVWWAKJY714P",
+ "status": "paused",
+ "githubPrUrl": null
+ },
+ "operatorView": {
+ "stage": "pr_blocked",
+ "stageLabel": "PR blocked by policy · PR 被安全门拦截",
+ "confidence": 96,
+ "progressPercent": 95,
+ "headlessCallsToday": 0,
+ "primaryAction": {
+ "id": "check-draft-pr-gate",
+ "label": "Re-check Draft PR Gate · 重新检查 PR 安全门",
+ "kind": "primary"
+ },
+ "secondaryActions": [],
+ "providerSummary": {
+ "planner": {
+ "name": "test-synthetic",
+ "mode": "mock",
+ "status": "Planner finished",
+ "tokens": null
+ },
+ "worker": {
+ "name": "mock",
+ "mode": "mock",
+ "status": "done",
+ "tokens": null
+ },
+ "validators": [
+ {
+ "name": "gemini",
+ "mode": "not_configured",
+ "status": "not_configured"
+ }
+ ]
+ },
+ "safetySummary": {
+ "remoteWrites": "disabled",
+ "prGate": {
+ "status": "blocked",
+ "code": "GEMINI_NOT_CONFIGURED",
+ "reason": "Gemini hard gate has no evidence-only PASS verdict for this mission.",
+ "remediation": "Remote writes are disabled for safety. Enable repo-scoped allow_remote_writes only when you want the worker to push a branch and open a Draft PR; until then no push, PR, or merge occurs."
+ },
+ "testMode": {
+ "enabled": true,
+ "reason": "mock/template mode is active; no external model or remote write is implied."
+ }
+ },
+ "understanding": {
+ "roundsCompleted": 0,
+ "questions": [],
+ "readyReason": "Planner confidence is at least 95% and no clarification questions are pending."
+ },
+ "projectPulse": {
+ "progress": [
+ {
+ "id": "understand",
+ "label": "Understand · 理解需求",
+ "status": "done"
+ },
+ {
+ "id": "roadmap",
+ "label": "Roadmap · 路线图",
+ "status": "done"
+ },
+ {
+ "id": "execute",
+ "label": "Execute · 本地执行",
+ "status": "done"
+ },
+ {
+ "id": "validate",
+ "label": "Validate · 独立验证",
+ "status": "done",
+ "detail": "Gemini key is not configured; this is visible and not counted as pass."
+ },
+ {
+ "id": "pr-gate",
+ "label": "PR Gate · PR 安全门",
+ "status": "active"
+ },
+ {
+ "id": "learn",
+ "label": "Learn · 沉淀记忆",
+ "status": "pending"
+ }
+ ],
+ "workingFolder": "/tmp/aedev-cockpit-quality-P9VKvY/operator-evidence/01KTTVCS05E2F630888TDZTZZT",
+ "touchedFiles": [],
+ "evidence": [
+ {
+ "id": "01KTTVCS0F27GJYGMX7PH9VJKR",
+ "title": "ADR draft",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P/adr-mission.md",
+ "type": "adr"
+ },
+ {
+ "id": "01KTTVCS0F27GJYGMX7PH9VJKQ",
+ "title": "PRD",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P/prd.md",
+ "type": "prd"
+ },
+ {
+ "id": "01KTTVCS0F27GJYGMX7PH9VJKT",
+ "title": "Workbook summary",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P/workbook-summary.md",
+ "type": "report"
+ },
+ {
+ "id": "01KTTVCS0F27GJYGMX7PH9VJKV",
+ "title": "Test summary",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P/test-summary.md",
+ "type": "report"
+ },
+ {
+ "id": "01KTTVCS0F27GJYGMX7PH9VJKW",
+ "title": "Risk report",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P/risk-report.md",
+ "type": "report"
+ },
+ {
+ "id": "01KTTVCS0F27GJYGMX7PH9VJKX",
+ "title": "Worker diff",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P/diff-summary.md",
+ "type": "report"
+ },
+ {
+ "id": "01KTTVCS0F27GJYGMX7PH9VJKY",
+ "title": "Done report",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P/done-report.md",
+ "type": "report"
+ },
+ {
+ "id": "01KTTVCS0F27GJYGMX7PH9VJKS",
+ "title": "Roadmap",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P/roadmap.md",
+ "type": "roadmap"
+ },
+ {
+ "id": "01KTTVCS0E3W7F8P59WMZ7R25R",
+ "title": "Evidence directory",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P",
+ "type": "evidence"
+ },
+ {
+ "id": "01KTTVCRGC1C6TJAR8VH1ET705",
+ "title": "ADR draft in mission design",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/prd/01KTTVCRG5BZGRCVWWAKJY714P.design.json",
+ "type": "adr"
+ },
+ {
+ "id": "01KTTVCRGBY9R2AJF7802CWZ0H",
+ "title": "PRD",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/prd/01KTTVCRG5BZGRCVWWAKJY714P.md",
+ "type": "prd"
+ },
+ {
+ "id": "01KTTVCRGC1C6TJAR8VH1ET704",
+ "title": "Mission design JSON",
+ "path": "/tmp/aedev-cockpit-quality-P9VKvY/prd/01KTTVCRG5BZGRCVWWAKJY714P.design.json",
+ "type": "roadmap"
+ }
+ ],
+ "validatorReviews": [
+ {
+ "id": "validators-not-configured",
+ "validator": "validators",
+ "verdict": "not_configured",
+ "summary": "Independent validation did not run because the Gemini key is not configured.",
+ "checkedEvidence": [
+ "ADR draft",
+ "PRD",
+ "Workbook summary",
+ "Test summary",
+ "Risk report",
+ "Worker diff",
+ "Done report",
+ "Roadmap"
+ ],
+ "blockingIssues": [],
+ "evidenceGaps": [
+ "No Gemini validator verdict exists for this mission."
+ ],
+ "recommendedNextAction": "Configure validator keys for live verification, or continue reviewing evidence manually."
+ }
+ ]
+ },
+ "memorySummary": {
+ "projectFacts": [
+ {
+ "id": "repo-01KTTVCMA48TBPF50TQAE80RKJ",
+ "kind": "project",
+ "text": "Target repo is cockpit-quality at /tmp/aedev-cockpit-quality-P9VKvY.",
+ "provenance": "repo registry",
+ "ttlDays": 90,
+ "superseded": false
+ },
+ {
+ "id": "repo-forbidden-01KTTVCMA48TBPF50TQAE80RKJ",
+ "kind": "safety",
+ "text": "Forbidden paths stay protected: .env*, secrets/**, .github/**, AGENTS.md",
+ "provenance": "repo policy",
+ "ttlDays": 365,
+ "superseded": false
+ }
+ ],
+ "userPreferences": [
+ {
+ "id": "pref-understand-first",
+ "kind": "user_preference",
+ "text": "Ask goal-specific questions and confirm understanding before starting worker execution.",
+ "provenance": "operator product directive",
+ "ttlDays": 365,
+ "superseded": false
+ },
+ {
+ "id": "prompt-01KTTVCP0MB336M6V2C49A6C1N",
+ "kind": "mission_intent",
+ "text": "Current mission intent: In the dashboard Cockpit page, verify the existing conversation UI quality smoke keeps the single conversation layout, status strip, and safe Draft PR gate visible without changing product behavior. Acceptance: browser smoke passes and evid",
+ "provenance": "operator prompt",
+ "ttlDays": 30,
+ "superseded": false
+ }
+ ],
+ "recentLessons": [
+ {
+ "id": "lesson-0",
+ "kind": "run_lesson",
+ "text": "Draft PR blocked: GEMINI_NOT_CONFIGURED",
+ "provenance": "event:operator.draft_pr_blocked",
+ "ttlDays": 30,
+ "superseded": false
+ }
+ ]
+ },
+ "summary": "Worker done, evidence ready, and the Draft PR gate was blocked by policy. No branch push, PR, or merge occurred.",
+ "nextAction": "Continue reviewing evidence, or explicitly enable repo-scoped remote writes before re-checking the gate.",
+ "testMode": true,
+ "userState": {
+ "state": "blocked",
+ "label": "Needs your attention",
+ "labelZh": "需要你处理",
+ "explanation": "系统在这一步暂停,等你看一眼后再继续 · The system paused here and will continue once you take a look."
+ },
+ "lastActivity": {
+ "atIso": "2026-06-11T08:04:51.651Z",
+ "agoMs": 197,
+ "phase": "blocked"
+ },
+ "loopSummary": {
+ "whatChanged": [],
+ "testsRan": [
+ "Test summary"
+ ],
+ "agents": [
+ "planner · test-synthetic",
+ "worker · mock",
+ "validator · gemini"
+ ],
+ "validatorSaid": null,
+ "whyStoppedOrContinuing": "系统在这一步暂停,等你看一眼后再继续 · The system paused here and will continue once you take a look."
+ },
+ "card": {
+ "type": "blocker",
+ "title": "需要你处理 · Needs your attention",
+ "human_explanation": "系统在这一步暂停,等你看一眼后再继续 · The system paused here and will continue once you take a look.",
+ "why_it_matters": "在不确定的时候暂停,比悄悄做错更安全;没有你的确认,任何东西都不会对外发布 · Pausing when unsure is safer than quietly doing the wrong thing; nothing is published without your confirmation.",
+ "recovery_actions": [
+ "查看这张卡的说明,确认是否继续 · Read this card’s explanation and confirm whether to continue.",
+ "随时可以重新开始或调整目标 · You can restart or adjust the goal at any time."
+ ],
+ "recommended_action": "查看这张卡的说明,确认是否继续 · Read this card’s explanation and confirm whether to continue.",
+ "next_step": "查看这张卡的说明,确认是否继续 · Read this card’s explanation and confirm whether to continue.",
+ "machine": {
+ "user_state": "blocked",
+ "stage": "pr_blocked",
+ "hold_code": null,
+ "pr_gate_code": "GEMINI_NOT_CONFIGURED"
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/dom-state-summary.json b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/dom-state-summary.json
new file mode 100644
index 0000000..6e84ddc
--- /dev/null
+++ b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/dom-state-summary.json
@@ -0,0 +1,6 @@
+{
+ "stage": "pr_blocked",
+ "planner": "mock",
+ "worker": "mock",
+ "prGateCode": "GEMINI_NOT_CONFIGURED"
+}
\ No newline at end of file
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/event-tail.json b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/event-tail.json
new file mode 100644
index 0000000..52a8510
--- /dev/null
+++ b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/event-tail.json
@@ -0,0 +1,120 @@
+[
+ {
+ "type": "operator.draft_pr_blocked",
+ "payload": {
+ "code": "GEMINI_NOT_CONFIGURED",
+ "reason": "Gemini hard gate has no evidence-only PASS verdict for this mission.",
+ "validator": "gemini"
+ },
+ "createdAt": "2026-06-11T08:04:51.651Z"
+ },
+ {
+ "type": "operator.gemini_pr_blocked",
+ "payload": {
+ "code": "GEMINI_NOT_CONFIGURED",
+ "reason": "Gemini hard gate has no evidence-only PASS verdict for this mission.",
+ "verdict": "not_configured",
+ "summary": null
+ },
+ "createdAt": "2026-06-11T08:04:51.651Z"
+ },
+ {
+ "type": "operator.evidence_written",
+ "payload": {
+ "sessionId": "01KTTVCP0MB336M6V2C49A6C1N",
+ "evidenceDir": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P"
+ },
+ "createdAt": "2026-06-11T08:04:49.807Z"
+ },
+ {
+ "type": "operator.stage_changed",
+ "payload": {
+ "stage": "PR/Waiting/Blocked",
+ "sessionId": "01KTTVCP0MB336M6V2C49A6C1N",
+ "status": "waiting"
+ },
+ "createdAt": "2026-06-11T08:04:49.807Z"
+ },
+ {
+ "type": "mission.run_completed",
+ "payload": {
+ "taskId": "01KTTVCS05E2F630888TDZTZZT",
+ "runId": "01KTTVCS06ZE1HWCF2MQVPPY2D",
+ "exitCode": 0,
+ "status": "waiting",
+ "decision": "WAITING",
+ "riskScore": 0,
+ "validatorCount": 0,
+ "releaseDeployUrl": null,
+ "releaseReverted": false,
+ "draftPrUrl": null,
+ "draftPrNumber": null
+ },
+ "createdAt": "2026-06-11T08:04:49.806Z"
+ },
+ {
+ "type": "operator.worker_log",
+ "payload": {
+ "taskId": "01KTTVCS05E2F630888TDZTZZT",
+ "runId": "01KTTVCS06ZE1HWCF2MQVPPY2D",
+ "stream": "stdout",
+ "chunk": "mock worker completed evidence gate"
+ },
+ "createdAt": "2026-06-11T08:04:49.799Z"
+ },
+ {
+ "type": "operator.worker_started",
+ "payload": {
+ "taskId": "01KTTVCS05E2F630888TDZTZZT",
+ "runId": "01KTTVCS06ZE1HWCF2MQVPPY2D",
+ "provider": "mock",
+ "evidenceDir": "/tmp/aedev-cockpit-quality-P9VKvY/operator-evidence/01KTTVCS05E2F630888TDZTZZT"
+ },
+ "createdAt": "2026-06-11T08:04:49.798Z"
+ },
+ {
+ "type": "mission.route_selected",
+ "payload": {
+ "role": "coder",
+ "provider": "mock",
+ "sessionId": null,
+ "concurrency": 1,
+ "holdCode": null,
+ "reason": "worker router not configured"
+ },
+ "createdAt": "2026-06-11T08:04:49.797Z"
+ },
+ {
+ "type": "mission.run_started",
+ "payload": {
+ "evidenceDir": "/tmp/aedev-cockpit-quality-P9VKvY/evidence/01KTTVCRG5BZGRCVWWAKJY714P"
+ },
+ "createdAt": "2026-06-11T08:04:49.789Z"
+ },
+ {
+ "type": "operator.validators_not_configured",
+ "payload": {
+ "status": "not_configured",
+ "note": "Gemini validator key is not configured; Draft PR remains blocked until Gemini returns PASS."
+ },
+ "createdAt": "2026-06-11T08:04:49.787Z"
+ },
+ {
+ "type": "operator.worker_assigned",
+ "payload": {
+ "sessionId": "01KTTVCP0MB336M6V2C49A6C1N",
+ "mode": "mock",
+ "availableSessions": 0,
+ "paidApiKeysStripped": true
+ },
+ "createdAt": "2026-06-11T08:04:49.787Z"
+ },
+ {
+ "type": "operator.stage_changed",
+ "payload": {
+ "stage": "Worker",
+ "sessionId": "01KTTVCP0MB336M6V2C49A6C1N"
+ },
+ "createdAt": "2026-06-11T08:04:49.786Z"
+ }
+]
\ No newline at end of file
diff --git a/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/quality-smoke.md b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/quality-smoke.md
new file mode 100644
index 0000000..ddbed11
--- /dev/null
+++ b/evidence/browser-cockpit-quality/2026-06-11T08-04-44-897Z/quality-smoke.md
@@ -0,0 +1,16 @@
+# Operator Cockpit WebUI Quality Smoke
+
+Result: PASS
+Mission: 01KTTVCRG5BZGRCVWWAKJY714P
+Stage: pr_blocked
+PR gate: GEMINI_NOT_CONFIGURED
+
+Assertions:
+- cockpit renders as one conversation column plus the three-part status strip
+- legacy Project Pulse, sidebar, inspector, and tabbed panels are absent
+- one primary action per stage
+- stable testids for core controls
+- planner/worker provider badges expose mock test mode
+- PR URL stayed empty while Gemini hard gate was not configured
+- draft PR blocked card reassures no push, PR, or merge occurred
+- browser console had no error/warning
\ No newline at end of file
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/01-composed-and-started.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/01-composed-and-started.png
new file mode 100644
index 0000000..19f1702
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/01-composed-and-started.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/02-planning-progress.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/02-planning-progress.png
new file mode 100644
index 0000000..3e715bf
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/02-planning-progress.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03a-clarify-popup-filled.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03a-clarify-popup-filled.png
new file mode 100644
index 0000000..16a2d8b
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03a-clarify-popup-filled.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03b-clarify-answered-gate-guidance.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03b-clarify-answered-gate-guidance.png
new file mode 100644
index 0000000..f417e66
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03b-clarify-answered-gate-guidance.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03c-clarify-unlocked.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03c-clarify-unlocked.png
new file mode 100644
index 0000000..3569565
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03c-clarify-unlocked.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/04-roadmap-ready.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/04-roadmap-ready.png
new file mode 100644
index 0000000..a9bd910
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/04-roadmap-ready.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/05a-approved.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/05a-approved.png
new file mode 100644
index 0000000..cff3c36
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/05a-approved.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/05b-execution-evidence-gate.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/05b-execution-evidence-gate.png
new file mode 100644
index 0000000..8f3e754
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/05b-execution-evidence-gate.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/06-loop-summary.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/06-loop-summary.png
new file mode 100644
index 0000000..580f27e
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/06-loop-summary.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/07-pr-gate-blocked-human.png b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/07-pr-gate-blocked-human.png
new file mode 100644
index 0000000..ff9f2c1
Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/07-pr-gate-blocked-human.png differ
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/console-logs.json b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/console-logs.json
new file mode 100644
index 0000000..2936d7c
--- /dev/null
+++ b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/console-logs.json
@@ -0,0 +1,3 @@
+[
+ "error: Failed to load resource: the server responded with a status of 409 (Conflict)"
+]
\ No newline at end of file
diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/user-e2e-report.md b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/user-e2e-report.md
new file mode 100644
index 0000000..445c33d
--- /dev/null
+++ b/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/user-e2e-report.md
@@ -0,0 +1,79 @@
+# Operator Cockpit — User Journey E2E Report
+
+Result: **PASS**
+Timestamp: 2026-06-11T08-05-02-983Z
+Evidence dir: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z
+
+Harness: mock/template planner+worker, remote writes disabled, all external CLIs/APIs disabled,
+temp stateDir, in-memory SQLite, vite dashboard, chromium via playwright.
+
+## Steps
+
+### step-1-compose-and-start — PASS
+
+Type a user prompt into the composer and start brainstorm
+- composer testid: cockpit-goal-input · prompt: Make the onboarding flow friendlier for new users. I want it…
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/01-composed-and-started.png
+
+### step-2-visible-progress — PASS
+
+Planning shows visible progress; the UI never looks frozen
+- status strip during planning: STAGE Brainstorm · 共创中 NOW Planner is thinking · Planner 正在分析 PROGRESS 0% — — APPROVALS 0
+- loop card during planning/clarify: type=understanding · active-agent=claude · next_step="回答下方的待确认问题,AI 才能继续生成方案 · Answer the questions below so the plan can continue."
+- strip refreshed: "STAGE Brainstorm · 共创中 NOW Planner is thinking · Planner 正在分析 PROGRESS 0% — — APPROVALS 0" → "STAGE Decision · 做选择 NOW Review the questions, then generate the plan · 先确认问题,再生成方案 PROGRESS 0% — — APPROVALS 0"
+- cockpit-last-activity refresh check is completed as soon as the mission overview exists (see step 4 notes) — the testid only renders once a mission is created.
+- cockpit-last-activity refresh verified: "LAST ACTIVITY 0s ago" → "LAST ACTIVITY 1s ago" (1.7s apart)
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/02-planning-progress.png
+
+### step-3-clarifications — PASS
+
+Answer the clarification popup through the real UI controls
+- clarification questions rendered: 2
+- answered transcript message visible; popup dismissed
+- locked Generate Plan produced calm guidance, no raw gate code in visible text
+- follow-up round confirmed confidence ≥95; plan unlocked
+- loop card after clarification answers: type=understanding · active-agent=claude · next_step="稍等片刻,AI 正在确认理解,随后会给出方案 · Hang on — understanding is being confirmed; a plan comes next."
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03a-clarify-popup-filled.png
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03b-clarify-answered-gate-guidance.png
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/03c-clarify-unlocked.png
+
+### step-4-generate-roadmap — PASS
+
+Generate roadmap; PRD/roadmap artifacts exist and stage advances
+- mission 01KTTVDD3GRHB3STCEQVD5QBFD created with 3 design artifacts (adr, prd, roadmap…)
+- loop card at roadmap_ready: type=plan · active-agent=claude · next_step="审阅这份方案;你批准后才会开始动手 · Review this plan; work starts only after you approve it."
+- card action on the plan card: approve-roadmap · "Approve Roadmap · 批准路线"
+- cockpit-last-activity refresh verified: "LAST ACTIVITY 0s ago" → "LAST ACTIVITY 1s ago"
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/04-roadmap-ready.png
+
+### step-5-approve-and-execute — PASS
+
+Approve roadmap, start execution; execution state appears
+- card action on the approved card: start-execution · "Start Execution · 启动执行"
+- execution state appeared (stage=running)
+- loop card during execution: type=progress · active-agent=codex · next_step="Wait for progress, pause, or stop if the run is wrong."
+- worker runs recorded: 1; final stage=validators_missing
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/05a-approved.png
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/05b-execution-evidence-gate.png
+
+### step-6-loop-summary — PASS
+
+cockpit-loop-summary renders with non-empty whyStoppedOrContinuing
+- whyStoppedOrContinuing: 结果评审尚未配置 · result review not configured
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/06-loop-summary.png
+
+### step-7-draft-pr-gate — PASS
+
+Draft PR gate BLOCKED is calm human text; no raw codes visible
+- machine code stays in data-* only: data-pr-gate-code=GEMINI_NOT_CONFIGURED
+- calm safety phrasing visible (安全门 / no push, no PR, no merge reassurance)
+- loop card at the Draft PR gate: type=blocker · active-agent=none · next_step="查看这张卡的说明,确认是否继续 · Read this card’s explanation and confirm whether to continue."
+- no PR URL recorded; operator.draft_pr_blocked event present
+- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T08-05-02-983Z/07-pr-gate-blocked-human.png
+
+## Browser console issues (informational)
+
+- error: Failed to load resource: the server responded with a status of 409 (Conflict)
+
+> Note: the deliberate locked Generate Plan probe in step 3 produces one expected 409 network log entry;
+> the assertion is that the VISIBLE UI stays human (guidance text, no raw codes).
diff --git a/evidence/fleet-soak/2026-06-11T07-35-14-553Z/metrics.json b/evidence/fleet-soak/2026-06-11T07-35-14-553Z/metrics.json
new file mode 100644
index 0000000..5914baa
--- /dev/null
+++ b/evidence/fleet-soak/2026-06-11T07-35-14-553Z/metrics.json
@@ -0,0 +1,57 @@
+{
+ "overall": "PASS",
+ "soakMs": 1800000,
+ "intervalMs": 200,
+ "durationMs": 1800054,
+ "tasksSeeded": 1000,
+ "executions": 1000,
+ "executionsPerWorker": {
+ "w-alice-1": 246,
+ "w-bob-1": 245,
+ "w-carol-1": 251,
+ "w-dave-1": 253,
+ "w-eve-1": 5
+ },
+ "drill": {
+ "taskId": "01KTTSQ2M71BX6CQRMYNDH6KHF",
+ "workerId": "w-eve-1",
+ "verdict": {
+ "mismatch": true,
+ "workerId": "w-eve-1",
+ "mismatchedGates": [
+ "test"
+ ]
+ },
+ "freezeAtMs": 1781163330412
+ },
+ "idle": {
+ "idlePollsOk": true,
+ "idleWindowMs": 614,
+ "headlessAtIdleStart": 0,
+ "headlessAtIdleEnd": 0,
+ "headlessTotal": 0
+ },
+ "criteria": [
+ {
+ "id": "provisioning",
+ "pass": true
+ },
+ {
+ "id": "no-double-execution",
+ "pass": true
+ },
+ {
+ "id": "forged-evidence-drill",
+ "pass": true
+ },
+ {
+ "id": "idle-zero-credit",
+ "pass": true
+ },
+ {
+ "id": "operator-attribution",
+ "pass": true
+ }
+ ],
+ "harnessError": null
+}
\ No newline at end of file
diff --git a/evidence/fleet-soak/2026-06-11T07-35-14-553Z/soak-report.md b/evidence/fleet-soak/2026-06-11T07-35-14-553Z/soak-report.md
new file mode 100644
index 0000000..3e424f8
--- /dev/null
+++ b/evidence/fleet-soak/2026-06-11T07-35-14-553Z/soak-report.md
@@ -0,0 +1,80 @@
+# Fleet Soak Report — v5-P4 (in-container short soak)
+
+Result: **PASS**
+Timestamp: 2026-06-11T07-35-14-553Z
+Duration: 1800s (AEDEV_SOAK_MS=1800000, intervalMs=200)
+Evidence dir: /home/user/claude-code-247/evidence/fleet-soak/2026-06-11T07-35-14-553Z
+
+Harness: real daemon (createServer, :memory: SQLite, temp stateDir), remote writes disabled,
+all external CLIs/APIs disabled, 5 real FleetWorkerAgent loops over real HTTP on 127.0.0.1,
+simulated executors producing passing evidence, simulated CI landing for each completion.
+
+## Workers
+
+| worker | operator | tasks executed | final registry status |
+|--------|----------|----------------|------------------------|
+| w-alice-1 | alice | 246 | active |
+| w-bob-1 | bob | 245 | active |
+| w-carol-1 | carol | 251 | active |
+| w-dave-1 | dave | 253 | active |
+| w-eve-1 | eve | 5 | frozen (drill) |
+
+Tasks seeded: 1000 · executed: 1000 · drill task: 01KTTSQ2M71BX6CQRMYNDH6KHF
+
+## Criteria
+
+### provisioning — PASS
+
+5 workers (5 operators) registered with real ed25519 keypairs
+- registered: 5/5
+- distinct public keys in registry: 5
+
+### no-double-execution — PASS
+
+Claim-ledger uniqueness: every task executed exactly once across 5 workers
+- tasks seeded: 1000 · executions: 1000
+- executed twice: 0 · never executed: 0 · with ≠1 claim event: 0
+- queue drained inside the soak: true (t+990s)
+- w-alice-1 (alice): 246 tasks executed
+- w-bob-1 (bob): 245 tasks executed
+- w-carol-1 (carol): 251 tasks executed
+- w-dave-1 (dave): 253 tasks executed
+- w-eve-1 (eve): 5 tasks executed
+
+### forged-evidence-drill — PASS
+
+Drill: self-reported PASS vs simulated-CI FAIL → HOLD + freeze + later claims 403; other 4 keep working
+- verdict: mismatch=true worker=w-eve-1 gates=[test]
+- HOLD-EVIDENCE-MISMATCH on task 01KTTSQ2M71BX6CQRMYNDH6KHF: open
+- fleet.worker_frozen events: 1 · registry status: frozen
+- w-eve-1 results after freeze: 8647, of which 403 worker_frozen: 8647, completions: 0
+- w-alice-1 completions after the freeze: 242
+- w-bob-1 completions after the freeze: 241
+- w-carol-1 completions after the freeze: 246
+- w-dave-1 completions after the freeze: 248
+
+### idle-zero-credit — PASS
+
+Idle ≥3 loop intervals after drain with ZERO cost.headless_call events
+- active workers observed idling: w-alice-1, w-bob-1, w-carol-1, w-dave-1
+- w-alice-1: +3 idle polls in the measured window (614ms)
+- w-bob-1: +3 idle polls in the measured window (614ms)
+- w-carol-1: +3 idle polls in the measured window (614ms)
+- w-dave-1: +3 idle polls in the measured window (614ms)
+- cost.headless_call during idle window: 0 · entire soak: 0
+
+### operator-attribution — PASS
+
+Per-operator event attribution: claims/evidence/lifecycle carry registry-bound operatorId + workerId
+- executions with fully consistent attribution: 1000/1000
+- operator alice: 246 claim events vs 246 executions
+- operator bob: 245 claim events vs 245 executions
+- operator carol: 251 claim events vs 251 executions
+- operator dave: 253 claim events vs 253 executions
+- operator eve: 5 claim events vs 5 executions
+
+## Honesty note
+
+in-container short soak with simulated executors — validates the harness + protocol under
+concurrency; the ≥1-week real-CLI soak on operator machines remains open (rubric #19 stays
+unchecked until then).
diff --git a/evidence/fleet-soak/soak-pending.json b/evidence/fleet-soak/soak-pending.json
new file mode 100644
index 0000000..037e975
--- /dev/null
+++ b/evidence/fleet-soak/soak-pending.json
@@ -0,0 +1,5 @@
+{
+ "started_at": "2026-06-11T07:35:13.342Z",
+ "expected_end": "2026-06-18T07:35:13.342Z",
+ "status": "completed"
+}
diff --git a/evidence/v6/real-proof/2026-06-11T08-23-02Z/draft-pr-url.txt b/evidence/v6/real-proof/2026-06-11T08-23-02Z/draft-pr-url.txt
new file mode 100644
index 0000000..b4f173b
--- /dev/null
+++ b/evidence/v6/real-proof/2026-06-11T08-23-02Z/draft-pr-url.txt
@@ -0,0 +1 @@
+https://github.com/CTlanston/hermus-agent/pull/4 (REAL, operator-produced; full cockpit chain still HOLD-PLANNER-AUTH)
diff --git a/evidence/v6/real-proof/2026-06-11T08-23-02Z/run-summary.md b/evidence/v6/real-proof/2026-06-11T08-23-02Z/run-summary.md
new file mode 100644
index 0000000..ba3ee67
--- /dev/null
+++ b/evidence/v6/real-proof/2026-06-11T08-23-02Z/run-summary.md
@@ -0,0 +1,17 @@
+# Overnight Phase 2 — V6-P3 honest conclusion (2026-06-11T08-23-02Z)
+
+## REAL(操作员已产出,里程碑)
+- **真实 Draft PR 存在:https://github.com/CTlanston/hermus-agent/pull/4** —— 远程写双闸在真实世界被证明可开真 PR(操作员 Mac,gh+codex 真实凭证)。
+- test:cockpit:real-smoke PASS(操作员 Mac)。
+
+## HOLD(完整链仍未闭合)
+- HOLD-PLANNER-AUTH:操作员 Mac 的 claude -p 返回 401 → cockpit 全链(clarify→…→Gemini→PR)未能由 cockpit 端到端驱动;真实 Gemini 判词 artifact 仍缺。
+- 本容器无 codex/gh/hermus —— 无法代跑。
+
+## 恢复路径(含今晚 Phase 1 的新修复)
+1. Mac: git pull 本分支;`claude login` 重登(或检查 Agent SDK credit)。
+2. 若 claude 暂不可用:`export AEDEV_PLANNER_FALLBACK=codex`(诚实降级,事件记 codex-cli (fallback))。
+3. 重跑 runbook mission → cockpit 端到端 → gemini-verdict.json + mission-events.jsonl 提交本目录。
+
+## Classification
+Real: hermus PR#4、real-smoke、30min soak 5/5、950 tests。Simulated: soak/E2E 引擎侧。Unproven: cockpit 端到端真链 + 真 Gemini 判词(本 HOLD 标的)。
diff --git a/packages/daemon/src/loop-cards.test.ts b/packages/daemon/src/loop-cards.test.ts
index 1c12575..579701f 100644
--- a/packages/daemon/src/loop-cards.test.ts
+++ b/packages/daemon/src/loop-cards.test.ts
@@ -203,6 +203,21 @@ describe('BlockerCard — human wording only; raw codes live in machine', () =>
expect(card.human_explanation).toContain('等待你确认')
})
+ it('HOLD-PLANNER-AUTH blocker: recovery actions carry the exact one-line fixes, never raw 401/codes', () => {
+ const card = deriveLoopCard(makeInput('brainstorming', {
+ activeHolds: [{ code: 'HOLD-PLANNER-AUTH', reason: 'claude-cli auth failure (matched 401)' }],
+ }))
+ if (card.type !== 'blocker') throw new Error(`expected blocker, got ${card.type}`)
+ const joined = card.recovery_actions.join('\n')
+ expect(joined).toContain('claude login')
+ expect(joined).toContain('/status')
+ expect(joined).toContain('AEDEV_PLANNER_FALLBACK=codex')
+ expect(card.recommended_action).toContain('claude login')
+ expect(visibleText(card)).not.toMatch(VISIBLE_CODE)
+ expect(visibleText(card)).not.toContain('401')
+ expect(card.machine.hold_code).toBe('HOLD-PLANNER-AUTH')
+ })
+
it('every blocker variant in the matrix keeps visible text code-free', () => {
const variants: DeriveLoopCardInput[] = [
makeInput('failed'),
diff --git a/packages/daemon/src/loop-cards.ts b/packages/daemon/src/loop-cards.ts
index 1683fdb..e3ed4e2 100644
--- a/packages/daemon/src/loop-cards.ts
+++ b/packages/daemon/src/loop-cards.ts
@@ -147,6 +147,9 @@ function whyItMatters(code: string | null): string {
if (code && code.startsWith('HOLD-REVIEW-LOOP')) {
return '反复返工说明这条路走不通,继续自动重试只会浪费额度 · Repeated rework means this path is not converging; more automatic retries would only waste your allowance.'
}
+ if (code && code.startsWith('HOLD-PLANNER-AUTH')) {
+ return '规划引擎的本地登录已失效,继续自动重试只会反复失败;先恢复登录是最快的恢复路径 · The planner’s local login is no longer valid; automatic retries would keep failing, so restoring the login is the fastest way back.'
+ }
if (code && (code.startsWith('HOLD-SESSION-POOL') || code.startsWith('HOLD-TARGET-REPO'))) {
return '在环境就绪之前动手只会产生假进度,系统选择诚实地等待 · Acting before the environment is ready would only fake progress; the system honestly waits instead.'
}
@@ -167,6 +170,13 @@ function recoveryActions(code: string | null): string[] {
'把任务拆小或补充更明确的要求后重新开始 · Restart with a smaller task or clearer requirements.',
]
}
+ if (code && code.startsWith('HOLD-PLANNER-AUTH')) {
+ return [
+ '在终端运行 claude login 重新登录本地 Claude · Run `claude login` in a terminal to sign the local Claude CLI back in.',
+ '在 Claude CLI 里输入 /status 检查订阅额度 · Check subscription credit with /status inside the Claude CLI.',
+ '可选:设置 AEDEV_PLANNER_FALLBACK=codex 让本地 Codex 暂代规划(永不使用付费 API) · Optional: set AEDEV_PLANNER_FALLBACK=codex to let the local Codex CLI plan instead (never a paid API).',
+ ]
+ }
if (code && (code.startsWith('HOLD-SESSION-POOL') || code.startsWith('HOLD-TARGET-REPO'))) {
return [
'检查本地 AI 引擎和目标仓库是否就绪 · Check that the local AI engine and the target repository are ready.',
diff --git a/packages/daemon/src/merge-policy-v6.test.ts b/packages/daemon/src/merge-policy-v6.test.ts
new file mode 100644
index 0000000..f68ee40
--- /dev/null
+++ b/packages/daemon/src/merge-policy-v6.test.ts
@@ -0,0 +1,141 @@
+/** Overnight P4 — full matrix tests for the v6 merge-action policy.
+ *
+ * The policy is a PURE DECISION FUNCTION (docs/product/MERGE_POLICY.md).
+ * It is intentionally NOT wired into any merge execution path this cycle:
+ * WORKBOOK_v6 GR#10 (human merge only) keeps auto-merge DISABLED, and the
+ * exhaustive test below pins that `autoMergeEnabled=false` can NEVER yield
+ * `auto_merge_eligible` — not even for a perfect docs-only change.
+ */
+import { describe, it, expect } from 'vitest'
+import { decideMergeAction, type MergeActionInput } from './merge-policy-v6.js'
+
+const CHANGE_KINDS = ['docs_only', 'code', 'security', 'workflow', 'dependency', 'system_config'] as const
+const SENSITIVE_KINDS = ['security', 'workflow', 'dependency', 'system_config'] as const
+const GEMINI_VERDICTS = ['pass', 'fail', 'inconclusive', 'not_configured'] as const
+const CLAUDE_REVIEWS = ['approve', 'rework', 'none'] as const
+const RISK_LEVELS = ['low', 'medium', 'high'] as const
+const BOOLS = [true, false] as const
+
+/** A docs-only change with every gate green — the strongest possible candidate. */
+function perfectDocsOnly(overrides: Partial