diff --git a/WORKBOOK_v6.md b/WORKBOOK_v6.md index 9b0ff49..d3acc38 100644 --- a/WORKBOOK_v6.md +++ b/WORKBOOK_v6.md @@ -17,17 +17,17 @@ schema_version: 6 product: ordinary-user-loop-os version_target: loop-os-v1 -current_phase: V6-P2 # V6-P0..P6,见 §3;P0 文档部分 + P1 协议随本 PR 落地 -current_substep: p2_card_cockpit_ui_pending -last_session_id: s_v6_0001 +current_phase: V6-P3 # V6-P0..P6,见 §3;P2 卡片化座舱随本 PR 落地 +current_substep: p3_real_proof_closeout_pending +last_session_id: s_v6_0002 open_holds: 0 blocked_on: none -test_baseline: 818 # main 基线,0 fail;本周期任何回归即闸红 +test_baseline: 864 # main 基线,0 fail;本周期任何回归即闸红 merge_policy: human_only # 系统永不 merge;auto-merge 本周期禁用 # next_action 硬上限 2 行: next_action: | - V6-P0 文档收口 + V6-P1 通信协议(5 卡契约 + daemon 卡片派生)已随本 PR 交付。 - 下一步 V6-P2:座舱 UI 只渲染 5 卡,浏览器 E2E 证明"不读日志即知下一步"。 + V6-P2 已交付:LoopCard 五卡主表面 + 浏览器 E2E 每步断言卡型与 next_step(证据入库)。 + 下一步 V6-P3:真实证明收口——真 Draft PR、库内真实 Gemini 判词、fail-closed 回归。 ``` --- diff --git a/apps/dashboard/src/pages/Cockpit.tsx b/apps/dashboard/src/pages/Cockpit.tsx index 2053492..a79b516 100644 --- a/apps/dashboard/src/pages/Cockpit.tsx +++ b/apps/dashboard/src/pages/Cockpit.tsx @@ -2,17 +2,20 @@ import { useEffect, useMemo, useState, type CSSProperties } from 'react' import { api, ApiError, + type ApiLoopCard, type ApiMissionOverview, type ApiOperatorChoice, - type ApiOperatorMissionView, type ApiOperatorMessage, + type ApiOperatorMissionView, type ApiOperatorSession, type ApiRepo, + type ApiUnderstandingLoopCard, } from '../api.js' import { useSSE } from '../hooks/useSSE.js' import { ChatThread } from './cockpit/ChatThread.js' import { Composer } from './cockpit/Composer.js' import { ClarificationPopup } from './cockpit/ClarificationPopup.js' +import { LoopCard } from './cockpit/LoopCard.js' import './cockpit/cockpit.css' const DEFAULT_PROMPT = 'Brainstorm a low-risk improvement, produce PRD/ADR/roadmap, then execute to the draft PR/evidence gate.' @@ -219,6 +222,14 @@ export function CockpitPage({ onNavigate }: { onNavigate?: (tab: string) => void const hasSession = Boolean(session) const pendingCount = Math.max(pendingApprovals, sse.pendingApprovals) const operatorView = overview?.operatorView + // v6-P2 (GR#11) — the primary state surface is exactly ONE of the five loop + // cards. The daemon derives it on every mission overview; before a mission + // exists (brainstorm/clarify) a client-side understanding card keeps the + // same five-card mental model without inventing machine state. + const loopCard = useMemo(() => { + if (operatorView?.card) return operatorView.card + return buildPreMissionUnderstandingCard(session, pendingClarify) + }, [operatorView?.card, session, pendingClarify]) const plannerProvider = operatorView?.providerSummary.planner const workerProvider = operatorView?.providerSummary.worker const stageForDom = operatorView?.stage ?? (session?.status === 'brainstorming' ? 'brainstorming' : session?.missionId ? 'roadmap_ready' : 'new') @@ -340,6 +351,8 @@ export function CockpitPage({ onNavigate }: { onNavigate?: (tab: string) => void )} {notice &&
{notice}
} + {loopCard && } + ({ id: q.id, question: q.question })) + const default_assumptions = pending + .map((q) => { + const recommended = q.options.find((o) => o.recommended) + return recommended ? `不回答时默认采用 · If unanswered, the default is "${recommended.label}".` : null + }) + .filter((x): x is string => x !== null) + const needsInput = questions.length > 0 + return { + type: 'understanding', + title: needsInput ? '需要你补充信息 · Needs your input' : '正在理解你的目标 · Understanding your goal', + next_step: needsInput + ? '回答下方的待确认问题,AI 才能继续生成方案 · Answer the questions below so the plan can continue.' + : '稍等片刻,AI 正在确认理解,随后会给出方案 · Hang on — understanding is being confirmed; a plan comes next.', + machine: { + user_state: needsInput ? 'needs_more_context' : 'understanding', + stage: session.status, + hold_code: null, + pr_gate_code: null, + }, + user_goal: session.prompt?.trim() || '(目标待补充 · goal not provided yet)', + interpreted_goal: 'AI 正在阅读你的目标,还没有改任何东西 · The AI is reading your goal; nothing has been changed yet.', + out_of_scope: [], + confidence: 0, + questions, + default_assumptions, + } +} + function isHoldResponse(out: unknown): out is { hold: { code: string; reason: string } } { return Boolean( out && diff --git a/apps/dashboard/src/pages/cockpit/LoopCard.test.tsx b/apps/dashboard/src/pages/cockpit/LoopCard.test.tsx new file mode 100644 index 0000000..b979b9d --- /dev/null +++ b/apps/dashboard/src/pages/cockpit/LoopCard.test.tsx @@ -0,0 +1,207 @@ +// @vitest-environment jsdom +/** + * v6-P2 — card cockpit: the ordinary user only ever sees the five loop cards + * (understanding / plan / progress / blocker / pr_ready) as the primary state + * surface. Contract: docs/product/LOOP_COMMUNICATION_PROTOCOL.md (GR#11). + * + * Pinned invariants: + * - each card type renders with data-card-type and calm bilingual text; + * - every card shows its `next_step` prominently (cockpit-loop-card-next-step); + * - the `machine` sub-object is NEVER rendered as visible text — raw codes + * (HOLD-*, gate codes, stage tokens) live only in data-* attributes. + */ +import { describe, it, expect, afterEach } from 'vitest' +import { render, cleanup } from '@testing-library/react' +import { LoopCard } from './LoopCard.js' +import type { + ApiBlockerLoopCard, + ApiLoopCard, + ApiLoopCardMachine, + ApiPlanLoopCard, + ApiPrReadyLoopCard, + ApiProgressLoopCard, + ApiUnderstandingLoopCard, +} from '../../api.js' + +afterEach(cleanup) + +const MACHINE: ApiLoopCardMachine = { + user_state: 'blocked', + stage: 'pr_blocked', + hold_code: 'HOLD-BUDGET', + pr_gate_code: 'REMOTE_WRITES_DISABLED', +} + +function understanding(): ApiUnderstandingLoopCard { + return { + type: 'understanding', + title: '正在理解你的目标 · Understanding your goal', + next_step: '回答上面的问题,AI 才能继续生成方案 · Answer the questions above so the plan can continue.', + machine: { user_state: 'needs_more_context', stage: 'clarifying', hold_code: null, pr_gate_code: null }, + user_goal: 'Make onboarding calmer', + interpreted_goal: 'AI 正在阅读你的目标 · The AI is reading your goal.', + out_of_scope: [], + confidence: 62, + questions: [{ id: 'q1', question: 'Which slice first?' }], + default_assumptions: ['不回答时默认采用 · If unanswered, the default is "Smallest viable slice".'], + } +} + +function plan(): ApiPlanLoopCard { + return { + type: 'plan', + title: '等待你的确认 · Waiting for your go-ahead', + next_step: '审阅这份方案;你批准后才会开始动手 · Review this plan; work starts only after you approve it.', + machine: { user_state: 'waiting_for_approval', stage: 'roadmap_ready', hold_code: null, pr_gate_code: null }, + objective: 'Improve one onboarding message', + phases: ['Understand · 理解需求', 'Execute · 本地执行'], + acceptance_criteria: ['gate.json'], + risk_level: 'low', + estimated_calls: 15, + requires_approval: true, + } +} + +function progress(): ApiProgressLoopCard { + return { + type: 'progress', + title: '正在执行 · Working on it', + next_step: 'Watch progress here; evidence lands automatically · 进度会自动更新。', + machine: { user_state: 'executing', stage: 'running', hold_code: null, pr_gate_code: null }, + current_phase: '正在执行 · Working on it', + current_action: 'worker 正在按方案干活 · The worker is following the plan.', + evidence_links: ['evidence/run-1/gate.json'], + tests_run: ['typecheck', 'vitest'], + } +} + +function blocker(): ApiBlockerLoopCard { + return { + type: 'blocker', + title: '需要你处理 · Needs your attention', + next_step: '等到明天额度自动恢复 · Wait for the allowance to reset tomorrow.', + machine: MACHINE, + human_explanation: '今日预算已用完,明天自动恢复或调高预算 · Today’s budget is used up; it resets tomorrow, or you can raise the limit.', + why_it_matters: '预算护栏防止系统超额消耗调用额度 · The budget guard stops silent overspend.', + recovery_actions: ['等到明天额度自动恢复 · Wait for the reset.', '调高今日预算 · Raise today’s budget.'], + recommended_action: '等到明天额度自动恢复 · Wait for the reset.', + } +} + +function prReady(withUrl: boolean): ApiPrReadyLoopCard { + return { + type: 'pr_ready', + title: '已完成 · Done', + next_step: '去 GitHub 审阅这个 Draft PR;merge 由你亲自点 · Review the draft PR; merging is yours.', + machine: { user_state: 'completed', stage: 'pr_created', hold_code: null, pr_gate_code: null }, + pr_url: withUrl ? 'https://github.com/o/r/pull/7' : null, + summary: '本轮工作已完成 · This round of work is complete.', + files_changed: ['src/a.ts'], + tests: ['vitest'], + validator_verdict: null, + risk: 'low', + merge_policy: '只有你能 merge · Human merge only: the system never merges.', + rework_button: { enabled: true, label: '不满意?让 AI 返工 · Not satisfied? Ask for rework.' }, + } +} + +function renderCard(card: ApiLoopCard) { + const { container } = render() + const root = container.querySelector('[data-testid="cockpit-loop-card"]') as HTMLElement + expect(root).toBeTruthy() + return root +} + +describe('LoopCard — five card types render with a prominent next_step', () => { + const cases: Array<[string, ApiLoopCard]> = [ + ['understanding', understanding()], + ['plan', plan()], + ['progress', progress()], + ['blocker', blocker()], + ['pr_ready', prReady(false)], + ] + + for (const [type, card] of cases) { + it(`renders the ${type} card with data-card-type and visible next_step`, () => { + const root = renderCard(card) + expect(root.getAttribute('data-card-type')).toBe(type) + const next = root.querySelector('[data-testid="cockpit-loop-card-next-step"]') as HTMLElement + expect(next).toBeTruthy() + expect(next.textContent).toContain(card.next_step) + expect(root.textContent).toContain(card.title) + }) + } +}) + +describe('LoopCard — per-type content', () => { + it('understanding: shows goal, interpreted goal, questions and default assumptions', () => { + const root = renderCard(understanding()) + expect(root.textContent).toContain('Make onboarding calmer') + expect(root.textContent).toContain('The AI is reading your goal') + expect(root.textContent).toContain('Which slice first?') + expect(root.textContent).toContain('Smallest viable slice') + expect(root.textContent).toContain('62') + }) + + it('plan: shows objective, phases, acceptance criteria, and the approval requirement', () => { + const root = renderCard(plan()) + expect(root.textContent).toContain('Improve one onboarding message') + expect(root.textContent).toContain('Understand · 理解需求') + expect(root.textContent).toContain('gate.json') + // requires_approval=true must be visible as calm human text + expect(root.textContent).toMatch(/批准|approve/i) + }) + + it('progress: shows current action, checks, and evidence links', () => { + const root = renderCard(progress()) + expect(root.textContent).toContain('The worker is following the plan') + expect(root.textContent).toContain('typecheck') + expect(root.textContent).toContain('evidence/run-1/gate.json') + }) + + it('pr_ready: honest about a missing PR url and missing verdict; merge policy always visible', () => { + const root = renderCard(prReady(false)) + expect(root.textContent).toContain('Human merge only') + expect(root.textContent).toMatch(/尚未创建|No draft PR/) + expect(root.textContent).toMatch(/还没有结论|No review verdict/) + expect(root.querySelector('a')).toBeNull() + }) + + it('pr_ready: renders the PR link when a real url exists', () => { + const root = renderCard(prReady(true)) + const link = root.querySelector('a') + expect(link?.getAttribute('href')).toBe('https://github.com/o/r/pull/7') + }) +}) + +describe('LoopCard — blocker card: human explanation, never raw codes', () => { + it('shows human_explanation, why_it_matters and recovery actions', () => { + const root = renderCard(blocker()) + expect(root.textContent).toContain('今日预算已用完') + expect(root.textContent).toContain('The budget guard stops silent overspend') + expect(root.textContent).toContain('Raise today’s budget') + }) + + it('never renders raw machine codes as visible text; they stay in data-* attributes', () => { + const root = renderCard(blocker()) + expect(root.textContent).not.toContain('HOLD-BUDGET') + expect(root.textContent).not.toContain('REMOTE_WRITES_DISABLED') + expect(root.textContent).not.toContain('pr_blocked') + expect(root.getAttribute('data-hold-code')).toBe('HOLD-BUDGET') + expect(root.getAttribute('data-pr-gate-code')).toBe('REMOTE_WRITES_DISABLED') + expect(root.getAttribute('data-machine-stage')).toBe('pr_blocked') + expect(root.getAttribute('data-user-state')).toBe('blocked') + }) + + it('keeps machine tokens out of visible text for every card type', () => { + const cards: ApiLoopCard[] = [understanding(), plan(), progress(), blocker(), prReady(false)] + for (const card of cards) { + const root = renderCard(card) + for (const token of [card.machine.user_state, card.machine.stage, card.machine.hold_code, card.machine.pr_gate_code]) { + if (!token) continue + expect(root.textContent).not.toContain(token) + } + cleanup() + } + }) +}) diff --git a/apps/dashboard/src/pages/cockpit/LoopCard.tsx b/apps/dashboard/src/pages/cockpit/LoopCard.tsx new file mode 100644 index 0000000..de26dc5 --- /dev/null +++ b/apps/dashboard/src/pages/cockpit/LoopCard.tsx @@ -0,0 +1,172 @@ +/** + * v6-P2 — the five-card loop cockpit surface (WORKBOOK_v6 GR#11). + * + * Renders exactly ONE of the five ordinary-user cards derived by the daemon + * (`overview.operatorView.card`, packages/daemon/src/loop-cards.ts). Three + * rules from docs/product/LOOP_COMMUNICATION_PROTOCOL.md: + * 1. Machine codes stay in the data layer — the `machine` sub-object is + * exposed ONLY through data-* attributes, never as visible text. + * 2. Visible text is calm and bilingual. + * 3. Every card answers "what happens next" via a prominent `next_step`. + */ +import type { ReactNode } from 'react' +import type { ApiLoopCard } from '../../api.js' + +const TYPE_LABELS: Record = { + understanding: '理解 · Understanding', + plan: '方案 · Plan', + progress: '进展 · Progress', + blocker: '需要你 · Needs you', + pr_ready: '收尾 · Ready for you', +} + +const RISK_LABELS: Record<'low' | 'medium' | 'high', string> = { + low: '低 · low', + medium: '中 · medium', + high: '高 · high', +} + +function Row({ k, children }: { k: string; children: ReactNode }) { + return ( +
+
{k}
+
{children}
+
+ ) +} + +function List({ items }: { items: string[] }) { + return ( +
    + {items.map((item, i) =>
  • {item}
  • )} +
+ ) +} + +function UnderstandingBody({ card }: { card: Extract }) { + return ( +
+ {card.user_goal} + {card.interpreted_goal} + {card.confidence > 0 ? `${card.confidence}%` : '评估中 · being assessed'} + {card.out_of_scope.length > 0 && } + {card.questions.length > 0 && ( + q.question)} /> + )} + {card.default_assumptions.length > 0 && ( + + )} +
+ ) +} + +function PlanBody({ card }: { card: Extract }) { + return ( +
+ {card.objective} + + {card.phases.length > 0 + ? + : '步骤会随方案展开 · Phases appear as the plan takes shape.'} + + + {RISK_LABELS[card.risk_level]} + {`最多 ${card.estimated_calls} 次调用 · up to ${card.estimated_calls} calls`} + {card.requires_approval && ( + 需要你批准后才会动手;merge 永远由你执行 · Nothing runs until you approve; merging stays yours. + )} +
+ ) +} + +function ProgressBody({ card }: { card: Extract }) { + return ( +
+ {card.current_phase} + {card.current_action} + + {card.tests_run.length > 0 ? : '还没有检查记录 · No checks recorded yet.'} + + + {card.evidence_links.length > 0 + ? + : '证据生成后会列在这里 · Evidence appears here once produced.'} + +
+ ) +} + +function BlockerBody({ card }: { card: Extract }) { + return ( + <> +

{card.human_explanation}

+
+ {card.why_it_matters} + + {card.recommended_action} +
+ + ) +} + +function PrReadyBody({ card, onRework }: { card: Extract; onRework?: (() => void) | undefined }) { + return ( + <> +
+ {card.summary} + + {card.pr_url + ? {card.pr_url} + : '尚未创建真实 PR · No draft PR has been created yet.'} + + + {card.files_changed.length > 0 ? : '没有记录到文件改动 · No file changes recorded.'} + + + {card.tests.length > 0 ? : '还没有检查记录 · No checks recorded yet.'} + + {card.validator_verdict ?? '结果评审还没有结论 · No review verdict yet.'} + {RISK_LABELS[card.risk]} + {card.merge_policy} +
+
+ +
+ + ) +} + +export function LoopCard({ card, onRework }: { card: ApiLoopCard; onRework?: () => void }) { + return ( +
+
+ {TYPE_LABELS[card.type]} + {card.title} +
+
+ 下一步 · Next + {card.next_step} +
+ {card.type === 'understanding' && } + {card.type === 'plan' && } + {card.type === 'progress' && } + {card.type === 'blocker' && } + {card.type === 'pr_ready' && } +
+ ) +} diff --git a/apps/dashboard/src/pages/cockpit/cockpit.css b/apps/dashboard/src/pages/cockpit/cockpit.css index 8e69bf9..ddf7415 100644 --- a/apps/dashboard/src/pages/cockpit/cockpit.css +++ b/apps/dashboard/src/pages/cockpit/cockpit.css @@ -280,6 +280,27 @@ button.ck-stat:hover { background: var(--bg-sub); } .ck-validator-top { display: flex; align-items: center; justify-content: space-between; gap: 8px; font-size: 12px; } .ck-validator-top span { font-size: 10.5px; text-transform: uppercase; letter-spacing: 0.04em; color: var(--fg-muted); } +/* --- v6-P2 five-card loop surface (GR#11): the primary state card above the chat --- */ +.ck-loop-card { border: 1px solid #dbe4f5; border-radius: var(--r-lg); background: var(--bg); box-shadow: var(--sh-1); padding: 12px 14px; margin-bottom: 8px; display: grid; gap: 8px; font-size: 12.5px; } +.ck-loop-card.type-blocker { border-color: #fed7aa; background: #fffbf5; } +.ck-loop-card.type-pr_ready { border-color: #a7f3d0; background: #f6fdf9; } +.ck-loop-head { display: flex; align-items: center; gap: 10px; min-width: 0; } +.ck-loop-type { flex: none; border-radius: 999px; padding: 2px 9px; font-size: 10.5px; font-weight: 600; text-transform: none; background: var(--accent-soft); color: #1d4ed8; } +.ck-loop-type.type-blocker { background: #fff1e6; color: #c2410c; } +.ck-loop-type.type-pr_ready { background: #ecfdf5; color: #047857; } +.ck-loop-title { font-size: 13.5px; overflow: hidden; text-overflow: ellipsis; } +.ck-loop-next { display: flex; align-items: baseline; gap: 8px; border: 1px solid #bfdbfe; background: var(--accent-soft); border-radius: 10px; padding: 8px 10px; } +.ck-loop-next .k { flex: none; font-size: 10.5px; font-weight: 700; letter-spacing: 0.03em; color: #1d4ed8; } +.ck-loop-next .v { font-size: 13px; font-weight: 600; color: var(--fg); } +.ck-loop-explanation { margin: 0; font-size: 13px; color: var(--fg-2); line-height: 1.5; } +.ck-loop-body { margin: 0; display: grid; gap: 4px; } +.ck-loop-row { display: grid; grid-template-columns: 110px minmax(0, 1fr); gap: 8px; } +.ck-loop-row dt { color: var(--fg-muted); } +.ck-loop-row dd { margin: 0; color: var(--fg-2); overflow-wrap: anywhere; } +.ck-loop-list { margin: 0; padding-left: 18px; display: grid; gap: 2px; } +.ck-loop-actions { display: flex; gap: 8px; } +.ck-loop-rework { font-size: 12px; } + .ck-review-block { align-self: stretch; max-width: 92%; border: var(--hair); border-radius: var(--r-lg); background: var(--bg); box-shadow: var(--sh-1); padding: 12px; display: grid; gap: 10px; } .ck-review-head { display: flex; align-items: center; justify-content: space-between; gap: 8px; font-size: 13px; } .ck-review-head span { color: var(--fg-muted); font-size: 11px; text-transform: uppercase; letter-spacing: 0.04em; } diff --git a/docs/SESSION_LOG_v3.md b/docs/SESSION_LOG_v3.md index a51373e..a1c5cea 100644 --- a/docs/SESSION_LOG_v3.md +++ b/docs/SESSION_LOG_v3.md @@ -1,5 +1,14 @@ # SESSION LOG v3 +## s_v6_0002 · 2026-06-11 · V6-P2 complete · card cockpit (UI renders the five loop cards) + +- New `apps/dashboard/src/pages/cockpit/LoopCard.tsx` (TDD: 13 component tests first): renders exactly the five GR#11 card types from `overview.operatorView.card`; calm bilingual copy; `next_step` is always the prominent first row (`cockpit-loop-card-next-step`); the `machine` sub-object is never visible text — raw codes live only in `data-card-type` / `data-user-state` / `data-machine-stage` / `data-hold-code` / `data-pr-gate-code`. Blocker card shows `human_explanation` + `why_it_matters` + recovery actions, zero raw codes. +- Mounted in `Cockpit.tsx` above the chat as the primary state surface. Before a mission exists (brainstorm/clarify), an honest client-side UnderstandingCard keeps the same five-card mental model (questions + recommended defaults from the pending clarification; no fabricated machine state; session holds keep their dedicated banner). All pre-existing testids unchanged. +- `scripts/operator-cockpit-user-e2e.ts` extended: every journey step now also asserts the loop card is present with the EXPECTED type — understanding during clarify (steps 2–3), plan at roadmap_ready (step 4), progress during execution (step 5), blocker at the Draft PR gate (step 7) — plus non-empty `next_step` visible without logs and no machine token inside the card's visible text. All previous assertions kept. +- Browser evidence (real chromium, mock/template env — simulated planner/worker, real UI): user-journey E2E PASS 7/7 at `evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/`; quality smoke PASS at `evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/`. +- Gates: `pnpm typecheck` PASS; `pnpm lint` PASS; `pnpm test` PASS with 864 passed, 6 skipped (128 files; +13 over the 851 baseline, zero regressions). +- §0 → `current_phase: V6-P3`(真实证明收口)。L1 of V6-P2 met: "不读日志即知下一步" now has browser-level evidence; the old loop summary stays as a read-only evidence detail in the thread footer, no longer the primary state surface. + ## s_0013 · 2026-06-10 · operator-reported real-chain regression PASS (closed-gate) · P4 still pending open-gate run - Operator ran a real Mac E2E against `hermus-agent` (temp clone): planner=`claude-cli/local_claude_code`, coder=`codex-cli/local_codex`, **Gemini verdict `pass`** persisted to `validator-summary.json`, Draft-PR gate blocked with `REMOTE_WRITES_DISABLED`; no push, no PR URL, no merge; hermus-agent itself untouched. Report (operator machine): `evidence/launch/operator-cockpit-real-smoke-2026-06-10T07-06-36-411Z.md`. Operator gates green locally. diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/01-new.png b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/01-new.png new file mode 100644 index 0000000..cbce33e Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/01-new.png differ diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/02-brainstorm-ready.png b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/02-brainstorm-ready.png new file mode 100644 index 0000000..1611f69 Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/02-brainstorm-ready.png differ diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/03-plan-approval.png b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/03-plan-approval.png new file mode 100644 index 0000000..0b838bb Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/03-plan-approval.png differ diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/04-approved.png b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/04-approved.png new file mode 100644 index 0000000..fbde3f9 Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/04-approved.png differ diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/05-evidence-ready.png b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/05-evidence-ready.png new file mode 100644 index 0000000..b2fc80d Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/05-evidence-ready.png differ diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/06-pr-blocked.png b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/06-pr-blocked.png new file mode 100644 index 0000000..3281adf Binary files /dev/null and b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/06-pr-blocked.png differ diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/console-logs.json b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/console-logs.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/console-logs.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/db-state-summary.json b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/db-state-summary.json new file mode 100644 index 0000000..23cba9a --- /dev/null +++ b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/db-state-summary.json @@ -0,0 +1,286 @@ +{ + "mission": { + "id": "01KTT64DP0JWA5A7A9R0M0VMXJ", + "status": "paused", + "githubPrUrl": null + }, + "operatorView": { + "stage": "pr_blocked", + "stageLabel": "PR blocked by policy · PR 被安全门拦截", + "confidence": 96, + "progressPercent": 95, + "headlessCallsToday": 0, + "primaryAction": { + "id": "check-draft-pr-gate", + "label": "Re-check Draft PR Gate · 重新检查 PR 安全门", + "kind": "primary" + }, + "secondaryActions": [], + "providerSummary": { + "planner": { + "name": "test-synthetic", + "mode": "mock", + "status": "Planner finished", + "tokens": null + }, + "worker": { + "name": "mock", + "mode": "mock", + "status": "done", + "tokens": null + }, + "validators": [ + { + "name": "gemini", + "mode": "not_configured", + "status": "not_configured" + } + ] + }, + "safetySummary": { + "remoteWrites": "disabled", + "prGate": { + "status": "blocked", + "code": "GEMINI_NOT_CONFIGURED", + "reason": "Gemini hard gate has no evidence-only PASS verdict for this mission.", + "remediation": "Remote writes are disabled for safety. Enable repo-scoped allow_remote_writes only when you want the worker to push a branch and open a Draft PR; until then no push, PR, or merge occurs." + }, + "testMode": { + "enabled": true, + "reason": "mock/template mode is active; no external model or remote write is implied." + } + }, + "understanding": { + "roundsCompleted": 0, + "questions": [], + "readyReason": "Planner confidence is at least 95% and no clarification questions are pending." + }, + "projectPulse": { + "progress": [ + { + "id": "understand", + "label": "Understand · 理解需求", + "status": "done" + }, + { + "id": "roadmap", + "label": "Roadmap · 路线图", + "status": "done" + }, + { + "id": "execute", + "label": "Execute · 本地执行", + "status": "done" + }, + { + "id": "validate", + "label": "Validate · 独立验证", + "status": "done", + "detail": "Gemini key is not configured; this is visible and not counted as pass." + }, + { + "id": "pr-gate", + "label": "PR Gate · PR 安全门", + "status": "active" + }, + { + "id": "learn", + "label": "Learn · 沉淀记忆", + "status": "pending" + } + ], + "workingFolder": "/tmp/aedev-cockpit-quality-BEYx80/operator-evidence/01KTT64E24GGCQ2YWX6V7VYQCW", + "touchedFiles": [], + "evidence": [ + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFK", + "title": "ADR draft", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ/adr-mission.md", + "type": "adr" + }, + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFH", + "title": "Evidence directory", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ", + "type": "evidence" + }, + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFJ", + "title": "PRD", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ/prd.md", + "type": "prd" + }, + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFN", + "title": "Workbook summary", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ/workbook-summary.md", + "type": "report" + }, + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFP", + "title": "Test summary", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ/test-summary.md", + "type": "report" + }, + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFQ", + "title": "Risk report", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ/risk-report.md", + "type": "report" + }, + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFR", + "title": "Worker diff", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ/diff-summary.md", + "type": "report" + }, + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFS", + "title": "Done report", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ/done-report.md", + "type": "report" + }, + { + "id": "01KTT64E2AT9MJC8JFV73ZEVFM", + "title": "Roadmap", + "path": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ/roadmap.md", + "type": "roadmap" + }, + { + "id": "01KTT64DP4CEC3RWAFR03NMXJ9", + "title": "ADR draft in mission design", + "path": "/tmp/aedev-cockpit-quality-BEYx80/prd/01KTT64DP0JWA5A7A9R0M0VMXJ.design.json", + "type": "adr" + }, + { + "id": "01KTT64DP4CEC3RWAFR03NMXJ7", + "title": "PRD", + "path": "/tmp/aedev-cockpit-quality-BEYx80/prd/01KTT64DP0JWA5A7A9R0M0VMXJ.md", + "type": "prd" + }, + { + "id": "01KTT64DP4CEC3RWAFR03NMXJ8", + "title": "Mission design JSON", + "path": "/tmp/aedev-cockpit-quality-BEYx80/prd/01KTT64DP0JWA5A7A9R0M0VMXJ.design.json", + "type": "roadmap" + } + ], + "validatorReviews": [ + { + "id": "validators-not-configured", + "validator": "validators", + "verdict": "not_configured", + "summary": "Independent validation did not run because the Gemini key is not configured.", + "checkedEvidence": [ + "ADR draft", + "Evidence directory", + "PRD", + "Workbook summary", + "Test summary", + "Risk report", + "Worker diff", + "Done report" + ], + "blockingIssues": [], + "evidenceGaps": [ + "No Gemini validator verdict exists for this mission." + ], + "recommendedNextAction": "Configure validator keys for live verification, or continue reviewing evidence manually." + } + ] + }, + "memorySummary": { + "projectFacts": [ + { + "id": "repo-01KTT64A17VBM5AED1ERZ9TK1K", + "kind": "project", + "text": "Target repo is cockpit-quality at /tmp/aedev-cockpit-quality-BEYx80.", + "provenance": "repo registry", + "ttlDays": 90, + "superseded": false + }, + { + "id": "repo-forbidden-01KTT64A17VBM5AED1ERZ9TK1K", + "kind": "safety", + "text": "Forbidden paths stay protected: .env*, secrets/**, .github/**, AGENTS.md", + "provenance": "repo policy", + "ttlDays": 365, + "superseded": false + } + ], + "userPreferences": [ + { + "id": "pref-understand-first", + "kind": "user_preference", + "text": "Ask goal-specific questions and confirm understanding before starting worker execution.", + "provenance": "operator product directive", + "ttlDays": 365, + "superseded": false + }, + { + "id": "prompt-01KTT64B82AYBJ6M976ZHGPSAW", + "kind": "mission_intent", + "text": "Current mission intent: In the dashboard Cockpit page, verify the existing conversation UI quality smoke keeps the single conversation layout, status strip, and safe Draft PR gate visible without changing product behavior. Acceptance: browser smoke passes and evid", + "provenance": "operator prompt", + "ttlDays": 30, + "superseded": false + } + ], + "recentLessons": [ + { + "id": "lesson-0", + "kind": "run_lesson", + "text": "Draft PR blocked: GEMINI_NOT_CONFIGURED", + "provenance": "event:operator.draft_pr_blocked", + "ttlDays": 30, + "superseded": false + } + ] + }, + "summary": "Worker done, evidence ready, and the Draft PR gate was blocked by policy. No branch push, PR, or merge occurred.", + "nextAction": "Continue reviewing evidence, or explicitly enable repo-scoped remote writes before re-checking the gate.", + "testMode": true, + "userState": { + "state": "blocked", + "label": "Needs your attention", + "labelZh": "需要你处理", + "explanation": "系统在这一步暂停,等你看一眼后再继续 · The system paused here and will continue once you take a look." + }, + "lastActivity": { + "atIso": "2026-06-11T01:53:18.134Z", + "agoMs": 159, + "phase": "blocked" + }, + "loopSummary": { + "whatChanged": [], + "testsRan": [ + "Test summary" + ], + "agents": [ + "planner · test-synthetic", + "worker · mock", + "validator · gemini" + ], + "validatorSaid": null, + "whyStoppedOrContinuing": "系统在这一步暂停,等你看一眼后再继续 · The system paused here and will continue once you take a look." + }, + "card": { + "type": "blocker", + "title": "需要你处理 · Needs your attention", + "human_explanation": "系统在这一步暂停,等你看一眼后再继续 · The system paused here and will continue once you take a look.", + "why_it_matters": "在不确定的时候暂停,比悄悄做错更安全;没有你的确认,任何东西都不会对外发布 · Pausing when unsure is safer than quietly doing the wrong thing; nothing is published without your confirmation.", + "recovery_actions": [ + "查看这张卡的说明,确认是否继续 · Read this card’s explanation and confirm whether to continue.", + "随时可以重新开始或调整目标 · You can restart or adjust the goal at any time." + ], + "recommended_action": "查看这张卡的说明,确认是否继续 · Read this card’s explanation and confirm whether to continue.", + "next_step": "查看这张卡的说明,确认是否继续 · Read this card’s explanation and confirm whether to continue.", + "machine": { + "user_state": "blocked", + "stage": "pr_blocked", + "hold_code": null, + "pr_gate_code": "GEMINI_NOT_CONFIGURED" + } + } + } +} \ No newline at end of file diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/dom-state-summary.json b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/dom-state-summary.json new file mode 100644 index 0000000..6e84ddc --- /dev/null +++ b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/dom-state-summary.json @@ -0,0 +1,6 @@ +{ + "stage": "pr_blocked", + "planner": "mock", + "worker": "mock", + "prGateCode": "GEMINI_NOT_CONFIGURED" +} \ No newline at end of file diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/event-tail.json b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/event-tail.json new file mode 100644 index 0000000..9d6bd56 --- /dev/null +++ b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/event-tail.json @@ -0,0 +1,119 @@ +[ + { + "type": "operator.draft_pr_blocked", + "payload": { + "code": "GEMINI_NOT_CONFIGURED", + "reason": "Gemini hard gate has no evidence-only PASS verdict for this mission.", + "validator": "gemini" + }, + "createdAt": "2026-06-11T01:53:18.134Z" + }, + { + "type": "operator.gemini_pr_blocked", + "payload": { + "code": "GEMINI_NOT_CONFIGURED", + "reason": "Gemini hard gate has no evidence-only PASS verdict for this mission.", + "verdict": "not_configured", + "summary": null + }, + "createdAt": "2026-06-11T01:53:18.134Z" + }, + { + "type": "operator.evidence_written", + "payload": { + "sessionId": "01KTT64B82AYBJ6M976ZHGPSAW", + "evidenceDir": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ" + }, + "createdAt": "2026-06-11T01:53:16.362Z" + }, + { + "type": "operator.stage_changed", + "payload": { + "stage": "PR/Waiting/Blocked", + "sessionId": "01KTT64B82AYBJ6M976ZHGPSAW", + "status": "waiting" + }, + "createdAt": "2026-06-11T01:53:16.362Z" + }, + { + "type": "mission.run_completed", + "payload": { + "taskId": "01KTT64E24GGCQ2YWX6V7VYQCW", + "runId": "01KTT64E24GGCQ2YWX6V7VYQCY", + "exitCode": 0, + "status": "waiting", + "decision": "WAITING", + "riskScore": 0, + "validatorCount": 0, + "releaseDeployUrl": null, + "releaseReverted": false, + "draftPrUrl": null, + "draftPrNumber": null + }, + "createdAt": "2026-06-11T01:53:16.361Z" + }, + { + "type": "operator.worker_started", + "payload": { + "taskId": "01KTT64E24GGCQ2YWX6V7VYQCW", + "runId": "01KTT64E24GGCQ2YWX6V7VYQCY", + "provider": "mock", + "evidenceDir": "/tmp/aedev-cockpit-quality-BEYx80/operator-evidence/01KTT64E24GGCQ2YWX6V7VYQCW" + }, + "createdAt": "2026-06-11T01:53:16.357Z" + }, + { + "type": "operator.worker_log", + "payload": { + "taskId": "01KTT64E24GGCQ2YWX6V7VYQCW", + "runId": "01KTT64E24GGCQ2YWX6V7VYQCY", + "stream": "stdout", + "chunk": "mock worker completed evidence gate" + }, + "createdAt": "2026-06-11T01:53:16.357Z" + }, + { + "type": "mission.route_selected", + "payload": { + "role": "coder", + "provider": "mock", + "sessionId": null, + "concurrency": 1, + "holdCode": null, + "reason": "worker router not configured" + }, + "createdAt": "2026-06-11T01:53:16.356Z" + }, + { + "type": "mission.run_started", + "payload": { + "evidenceDir": "/tmp/aedev-cockpit-quality-BEYx80/evidence/01KTT64DP0JWA5A7A9R0M0VMXJ" + }, + "createdAt": "2026-06-11T01:53:16.352Z" + }, + { + "type": "operator.validators_not_configured", + "payload": { + "status": "not_configured", + "note": "Gemini validator key is not configured; Draft PR remains blocked until Gemini returns PASS." + }, + "createdAt": "2026-06-11T01:53:16.350Z" + }, + { + "type": "operator.worker_assigned", + "payload": { + "sessionId": "01KTT64B82AYBJ6M976ZHGPSAW", + "mode": "mock", + "availableSessions": 0, + "paidApiKeysStripped": true + }, + "createdAt": "2026-06-11T01:53:16.350Z" + }, + { + "type": "operator.run_starting", + "payload": { + "sessionId": "01KTT64B82AYBJ6M976ZHGPSAW" + }, + "createdAt": "2026-06-11T01:53:16.349Z" + } +] \ No newline at end of file diff --git a/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/quality-smoke.md b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/quality-smoke.md new file mode 100644 index 0000000..2050df9 --- /dev/null +++ b/evidence/browser-cockpit-quality/2026-06-11T01-53-12-152Z/quality-smoke.md @@ -0,0 +1,16 @@ +# Operator Cockpit WebUI Quality Smoke + +Result: PASS +Mission: 01KTT64DP0JWA5A7A9R0M0VMXJ +Stage: pr_blocked +PR gate: GEMINI_NOT_CONFIGURED + +Assertions: +- cockpit renders as one conversation column plus the three-part status strip +- legacy Project Pulse, sidebar, inspector, and tabbed panels are absent +- one primary action per stage +- stable testids for core controls +- planner/worker provider badges expose mock test mode +- PR URL stayed empty while Gemini hard gate was not configured +- draft PR blocked card reassures no push, PR, or merge occurred +- browser console had no error/warning \ No newline at end of file diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/01-composed-and-started.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/01-composed-and-started.png new file mode 100644 index 0000000..33a692a Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/01-composed-and-started.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/02-planning-progress.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/02-planning-progress.png new file mode 100644 index 0000000..3529e5b Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/02-planning-progress.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03a-clarify-popup-filled.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03a-clarify-popup-filled.png new file mode 100644 index 0000000..945749e Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03a-clarify-popup-filled.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03b-clarify-answered-gate-guidance.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03b-clarify-answered-gate-guidance.png new file mode 100644 index 0000000..552e14e Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03b-clarify-answered-gate-guidance.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03c-clarify-unlocked.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03c-clarify-unlocked.png new file mode 100644 index 0000000..7f1014c Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03c-clarify-unlocked.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/04-roadmap-ready.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/04-roadmap-ready.png new file mode 100644 index 0000000..669e32a Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/04-roadmap-ready.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/05a-approved.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/05a-approved.png new file mode 100644 index 0000000..560816d Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/05a-approved.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/05b-execution-evidence-gate.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/05b-execution-evidence-gate.png new file mode 100644 index 0000000..9a2da65 Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/05b-execution-evidence-gate.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/06-loop-summary.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/06-loop-summary.png new file mode 100644 index 0000000..9a2da65 Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/06-loop-summary.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/07-pr-gate-blocked-human.png b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/07-pr-gate-blocked-human.png new file mode 100644 index 0000000..d22c607 Binary files /dev/null and b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/07-pr-gate-blocked-human.png differ diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/console-logs.json b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/console-logs.json new file mode 100644 index 0000000..2936d7c --- /dev/null +++ b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/console-logs.json @@ -0,0 +1,3 @@ +[ + "error: Failed to load resource: the server responded with a status of 409 (Conflict)" +] \ No newline at end of file diff --git a/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/user-e2e-report.md b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/user-e2e-report.md new file mode 100644 index 0000000..923d8dc --- /dev/null +++ b/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/user-e2e-report.md @@ -0,0 +1,77 @@ +# Operator Cockpit — User Journey E2E Report + +Result: **PASS** +Timestamp: 2026-06-11T01-52-49-157Z +Evidence dir: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z + +Harness: mock/template planner+worker, remote writes disabled, all external CLIs/APIs disabled, +temp stateDir, in-memory SQLite, vite dashboard, chromium via playwright. + +## Steps + +### step-1-compose-and-start — PASS + +Type a user prompt into the composer and start brainstorm +- composer testid: cockpit-goal-input · prompt: Make the onboarding flow friendlier for new users. I want it… +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/01-composed-and-started.png + +### step-2-visible-progress — PASS + +Planning shows visible progress; the UI never looks frozen +- status strip during planning: STAGE Brainstorm · 共创中 NOW Planner is thinking · Planner 正在分析 PROGRESS 0% — — APPROVALS 0 +- loop card during planning/clarify: type=understanding · next_step="回答下方的待确认问题,AI 才能继续生成方案 · Answer the questions below so the plan can continue." +- strip refreshed: "STAGE Brainstorm · 共创中 NOW Planner is thinking · Planner 正在分析 PROGRESS 0% — — APPROVALS 0" → "STAGE Decision · 做选择 NOW Review the questions, then generate the plan · 先确认问题,再生成方案 PROGRESS 0% — — APPROVALS 0" +- cockpit-last-activity refresh check is completed as soon as the mission overview exists (see step 4 notes) — the testid only renders once a mission is created. +- cockpit-last-activity refresh verified: "LAST ACTIVITY 0s ago" → "LAST ACTIVITY 1s ago" (1.7s apart) +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/02-planning-progress.png + +### step-3-clarifications — PASS + +Answer the clarification popup through the real UI controls +- clarification questions rendered: 2 +- answered transcript message visible; popup dismissed +- locked Generate Plan produced calm guidance, no raw gate code in visible text +- follow-up round confirmed confidence ≥95; plan unlocked +- loop card after clarification answers: type=understanding · next_step="稍等片刻,AI 正在确认理解,随后会给出方案 · Hang on — understanding is being confirmed; a plan comes next." +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03a-clarify-popup-filled.png +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03b-clarify-answered-gate-guidance.png +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/03c-clarify-unlocked.png + +### step-4-generate-roadmap — PASS + +Generate roadmap; PRD/roadmap artifacts exist and stage advances +- mission 01KTT63TGG0HGHNR7BVJEE704G created with 3 design artifacts (adr, prd, roadmap…) +- loop card at roadmap_ready: type=plan · next_step="审阅这份方案;你批准后才会开始动手 · Review this plan; work starts only after you approve it." +- cockpit-last-activity refresh verified: "LAST ACTIVITY 0s ago" → "LAST ACTIVITY 1s ago" +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/04-roadmap-ready.png + +### step-5-approve-and-execute — PASS + +Approve roadmap, start execution; execution state appears +- execution state appeared (stage=running) +- loop card during execution: type=progress · next_step="Wait for progress, pause, or stop if the run is wrong." +- worker runs recorded: 1; final stage=validators_missing +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/05a-approved.png +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/05b-execution-evidence-gate.png + +### step-6-loop-summary — PASS + +cockpit-loop-summary renders with non-empty whyStoppedOrContinuing +- whyStoppedOrContinuing: 结果评审尚未配置 · result review not configured +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/06-loop-summary.png + +### step-7-draft-pr-gate — PASS + +Draft PR gate BLOCKED is calm human text; no raw codes visible +- machine code stays in data-* only: data-pr-gate-code=GEMINI_NOT_CONFIGURED +- calm safety phrasing visible (安全门 / no push, no PR, no merge reassurance) +- loop card at the Draft PR gate: type=blocker · next_step="查看这张卡的说明,确认是否继续 · Read this card’s explanation and confirm whether to continue." +- no PR URL recorded; operator.draft_pr_blocked event present +- screenshot: /home/user/claude-code-247/evidence/browser-cockpit-user-e2e/2026-06-11T01-52-49-157Z/07-pr-gate-blocked-human.png + +## Browser console issues (informational) + +- error: Failed to load resource: the server responded with a status of 409 (Conflict) + +> Note: the deliberate locked Generate Plan probe in step 3 produces one expected 409 network log entry; +> the assertion is that the VISIBLE UI stays human (guidance text, no raw codes). diff --git a/scripts/operator-cockpit-user-e2e.ts b/scripts/operator-cockpit-user-e2e.ts index 3908762..aacb77a 100644 --- a/scripts/operator-cockpit-user-e2e.ts +++ b/scripts/operator-cockpit-user-e2e.ts @@ -104,6 +104,8 @@ const PLANNED_STEPS: Array<[string, string]> = [ ['step-6-loop-summary', 'cockpit-loop-summary renders with non-empty whyStoppedOrContinuing'], ['step-7-draft-pr-gate', 'Draft PR gate BLOCKED is calm human text; no raw codes visible'], ] +// v6-P2 — at every journey step the five-card loop surface (GR#11) must show +// the EXPECTED card type with a non-empty, log-free next_step in visible text. const steps: StepResult[] = [] let current: StepResult | undefined let harnessError: string | undefined @@ -208,6 +210,9 @@ async function runJourney(page: Page): Promise { // …and WITHOUT any further clicks the planner's brainstorm result must // arrive in the conversation (polling keeps the UI alive, not frozen). await page.getByText('Initial brainstorm (user e2e fixture)', { exact: false }).first().waitFor({ timeout: 20_000 }) + // v6-P2: while the system is still understanding/clarifying, the loop card + // is the understanding card and its next_step is readable without logs. + await expectLoopCard(page, ['understanding'], 'during planning/clarify') const stripB = await page.getByTestId('cockpit-status-strip').innerText() if (stripA === stripB) { // The strip may legitimately settle fast; the brainstorm arrival above @@ -256,6 +261,8 @@ async function runJourney(page: Page): Promise { await page.getByRole('button', { name: /Ask Until Clear/ }).click() await page.getByText('your answers are enough to plan safely', { exact: false }).first().waitFor({ timeout: 20_000 }) note('follow-up round confirmed confidence ≥95; plan unlocked') + // v6-P2: still pre-plan — the loop card stays the understanding card. + await expectLoopCard(page, ['understanding'], 'after clarification answers') await shot(page, '03c-clarify-unlocked') }) @@ -270,6 +277,8 @@ async function runJourney(page: Page): Promise { if (artifacts.length === 0) throw new Error('No roadmap/PRD artifacts registered for the mission') note(`mission ${mission.id} created with ${artifacts.length} design artifacts (${artifacts.slice(0, 3).map((a) => a.type).join(', ')}…)`) await expectStage(page, ['roadmap_ready', 'pending_approval']) + // v6-P2: at roadmap_ready the daemon-derived card is the plan card. + await expectLoopCard(page, ['plan'], 'at roadmap_ready') // Deferred half of step 2: the overview now exists, so cockpit-last-activity // must render and keep refreshing (two samples >1.5s apart). @@ -295,6 +304,8 @@ async function runJourney(page: Page): Promise { await page.getByTestId('cockpit-start-execution').click() await waitForRootStage(page, ['running', 'evidence_ready', 'validators_missing', 'validating', 'pr_ready'], 30_000) note(`execution state appeared (stage=${await rootStage(page)})`) + // v6-P2: during execution/checking the loop card is the progress card. + await expectLoopCard(page, ['progress'], 'during execution') await waitForRootStage(page, ['evidence_ready', 'validators_missing', 'validators_ready', 'pr_ready'], 60_000) const mission = db.listMissions()[0] if (!mission) throw new Error('Mission disappeared') @@ -336,6 +347,9 @@ async function runJourney(page: Page): Promise { } note('calm safety phrasing visible (安全门 / no push, no PR, no merge reassurance)') await assertNoForbiddenVisibleText(page, 'on the blocked Draft PR gate screen') + // v6-P2: at the gate the loop card is blocker (or pr_ready when a real PR + // exists); with remote writes disabled the safe expectation is blocker. + await expectLoopCard(page, ['blocker', 'pr_ready'], 'at the Draft PR gate') const mission = db.listMissions()[0] if (mission?.githubPrUrl) throw new Error(`Unexpected PR URL recorded: ${mission.githubPrUrl}`) if (!db.queryEvents({ entityId: mission!.id, limit: 20 }).some((e) => e.type === 'operator.draft_pr_blocked')) { @@ -374,6 +388,48 @@ async function assertNoForbiddenVisibleText(page: Page, where: string): Promise< } } +/** + * v6-P2 (GR#11) — assert that the loop card is present with one of the + * expected card types, that an ordinary user can read a non-empty next_step + * in visible text (no logs needed), and that the card's machine codes stay in + * data-* attributes only — never inside its visible text. + */ +async function expectLoopCard(page: Page, expectedTypes: string[], where: string, timeoutMs = 20_000): Promise { + const card = page.getByTestId('cockpit-loop-card') + const deadline = Date.now() + timeoutMs + let lastType = 'absent' + while (Date.now() < deadline) { + if (await card.count()) { + lastType = (await card.getAttribute('data-card-type')) ?? 'unknown' + if (expectedTypes.includes(lastType)) break + } + await page.waitForTimeout(200) + } + if (!expectedTypes.includes(lastType)) { + throw new Error(`Expected loop card type in [${expectedTypes.join(', ')}] ${where}, got: ${lastType}`) + } + const nextStep = oneLine(await page.getByTestId('cockpit-loop-card-next-step').innerText()) + const nextStepBody = nextStep.replace(/下一步 · Next/g, '').trim() + if (nextStepBody.length < 8) { + throw new Error(`Loop card next_step is empty/too short ${where}: "${nextStep}"`) + } + const cardText = await card.innerText() + for (const re of FORBIDDEN_VISIBLE) { + if (re.test(cardText)) throw new Error(`Raw machine code ${re} leaked into the loop card ${where}`) + } + for (const attr of ['data-hold-code', 'data-pr-gate-code', 'data-machine-stage'] as const) { + const code = await card.getAttribute(attr) + // Stage tokens that are plain words (e.g. "running") can legitimately occur + // in calm prose; only machine-looking tokens (underscored / HOLD-*) are + // forbidden from visible text. + const machineLooking = Boolean(code) && (code!.includes('_') || code!.startsWith('HOLD-')) + if (code && machineLooking && cardText.includes(code)) { + throw new Error(`Machine code "${code}" (${attr}) rendered as visible loop-card text ${where}`) + } + } + note(`loop card ${where}: type=${lastType} · next_step="${nextStepBody.slice(0, 120)}"`) +} + async function rootStage(page: Page): Promise { return (await page.getByTestId('cockpit-root').getAttribute('data-stage')) ?? 'unknown' }