From 378cd1c53c9ba77365074d1f949363884422a556 Mon Sep 17 00:00:00 2001
From: Ittai Zeidman <ittaiz@gmail.com>
Date: Mon, 6 Apr 2026 08:58:07 +0300
Subject: [PATCH] =?UTF-8?q?feat:=20UI=20testing=20infrastructure=20?=
 =?UTF-8?q?=E2=80=94=20agent-browser=20default,=20CDP=20fallback?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable agents to visually verify dev-3.0 UI changes by connecting
automation tools to the running app. Two modes are supported: browser
mode (default, fast) and native/CDP mode (opt-in, for terminal and
WebKit-specific testing).

Browser mode (agent-browser via remote access server):
The remote access server already served the full React UI to browsers
for remote access. Two changes make it usable for automated testing:
- Listen on DEV3_RPC_PORT (fixed port from the port pool) instead of
  a random port, so agent-browser can target the right task instance
- Bypass JWT auth for localhost connections when DEV3_RPC_PORT is set

Each concurrent task gets its own port, so multiple agents can test
different instances simultaneously without conflicts.

Native/CDP mode (agent-electrobun via CEF):
For terminal/PTY features, native window behavior, or WebKit rendering
bugs, a .dev3_cdp sentinel file switches the devScript to bundle CEF
with Chrome remote debugging enabled. This adds build time but gives
access to the actual Electrobun webview via CDP.

The mode toggle lives entirely in the project devScript — no changes
to the generic dev3 CLI or RPC handlers. Env vars can't cross the Unix
socket boundary between the agent shell and the tmux dev session, so
the devScript reads a file instead.

Automation bridge (window.__dev3):
Expose navigate() and getState() on window.__dev3 for programmatic
navigation and state inspection. Guarded by a Vite define flag
(__DEV3_AUTOMATION) — available in dev/staging, tree-shaken in prod.
Works identically in both agent-browser and agent-electrobun.

Skill docs (.claude/skills/dev3-ui-control/):
- SKILL.md: agent-browser as primary tool, decision matrix, __dev3
  bridge recipes, Playwright-native interaction patterns
- native-cdp.md: agent-electrobun setup, sentinel file toggle, React
  input workaround, CDP connection retry, keyboard event dispatch
- click-navigation.md: click-based navigation reference and gotchas

devScript now detects a stale CEF app bundle (build.json has
defaultRenderer:cef) when .dev3_cdp is absent and removes it before
building, preventing the CEF process from grabbing DEV3_PORT0 ahead
of the remote access server.
---
 .claude/skills/dev3-ui-control/SKILL.md       | 266 ++++++++++++++++++
 .../dev3-ui-control/click-navigation.md       |  45 +++
 .claude/skills/dev3-ui-control/native-cdp.md  | 110 ++++++++
 .dev3/config.json                             |   3 +-
 .gitignore                                    |   1 +
 README.md                                     |   1 +
 .../04/06/feature-cdp-remote-debugging.md     |   1 +
 .../2026/04/08/docs-dev3-ui-control-skill.md  |   2 +
 change-logs/2026/04/11/fix-stale-cef-build.md |   1 +
 decisions/032-dev3-automation-bridge.md       |  27 ++
 decisions/033-dual-mode-dev-server.md         |  30 ++
 docs/agent-electrobun-setup.md                |  61 ++++
 electrobun.config.ts                          |  15 +-
 package.json                                  |   2 +-
 .../__tests__/remote-access-server.test.ts    |  36 +++
 src/bun/remote-access-server.ts               |  15 +-
 src/cli/__tests__/dev-server.test.ts          |   1 +
 src/mainview/App.tsx                          |  16 ++
 vite.config.ts                                |   1 +
 19 files changed, 627 insertions(+), 7 deletions(-)
 create mode 100644 .claude/skills/dev3-ui-control/SKILL.md
 create mode 100644 .claude/skills/dev3-ui-control/click-navigation.md
 create mode 100644 .claude/skills/dev3-ui-control/native-cdp.md
 create mode 100644 change-logs/2026/04/06/feature-cdp-remote-debugging.md
 create mode 100644 change-logs/2026/04/08/docs-dev3-ui-control-skill.md
 create mode 100644 change-logs/2026/04/11/fix-stale-cef-build.md
 create mode 100644 decisions/032-dev3-automation-bridge.md
 create mode 100644 decisions/033-dual-mode-dev-server.md
 create mode 100644 docs/agent-electrobun-setup.md

diff --git a/.claude/skills/dev3-ui-control/SKILL.md b/.claude/skills/dev3-ui-control/SKILL.md
new file mode 100644
index 00000000..c396e2fc
--- /dev/null
+++ b/.claude/skills/dev3-ui-control/SKILL.md
@@ -0,0 +1,266 @@
+---
+name: dev3-ui-control
+description: "MANDATORY for any visual verification of dev-3.0 app changes. Default: agent-browser via the remote access server. Fallback: agent-electrobun for terminal/native testing (see native-cdp.md). Triggers on: verify fix in app, test the change, screenshot, interact with UI, control the app, create/run task in UI, dogfood."
+allowed-tools: Bash(agent-browser:*), Bash(agent-electrobun:*), Bash(QUIVER_CDP_PORT=*), Bash(dev3 dev-server:*)
+---
+
+# dev3-ui-control — Verifying UI changes
+
+## Which tool to use
+
+| What you're testing | Tool | Why |
+|---|---|---|
+| Component behavior (clicks, toggles, modals, forms) | agent-browser + snapshot/eval | Same React UI, better tooling (wait, fill, press) |
+| Layout, visual appearance | agent-browser + snapshot/eval | Prefer text assertions; screenshot only if human review explicitly needed |
+| Terminal/PTY features (xterm, tmux controls) | agent-electrobun | Browser mode uses WebSocket PTY proxy — different path |
+| Native window features (drag-drop, system notifications) | agent-electrobun | Not available in browser mode |
+| WebKit-specific rendering bugs | agent-electrobun | Production uses WKWebView, not Chromium |
+
+**Default to agent-browser.** Use agent-electrobun only for the last three rows. See [native-cdp.md](./native-cdp.md) for the CDP setup.
+
+## Quick start (agent-browser)
+
+```bash
+# 1. Start dev server and get the port
+dev3 dev-server start          # or `dev3 dev-server status` if already running
+# → Assigned Ports: DEV3_PORT0=NNNNN
+
+# 2. Set the port variable for the session
+PORT=NNNNN
+
+# 3. Open the app in agent-browser
+agent-browser open http://localhost:$PORT
+
+# 4. Wait for it to load (WebSocket RPC must connect)
+agent-browser wait --load networkidle
+
+# 5. You're connected — start automating
+agent-browser snapshot -i
+```
+
+### Waiting for the app to be ready
+
+After `dev3 dev-server start`, the build takes 10-20s. The remote access server starts after the Electrobun app launches.
+
+**Check build progress:** `dev3 dev-server status` — if CPU is >100%, the build is still compiling. Once it drops, the server should be reachable.
+
+```bash
+# Open and wait — agent-browser retries internally
+agent-browser open http://localhost:$PORT
+agent-browser wait --load networkidle
+```
+
+If `open` fails with connection refused, the build isn't done yet. Wait a few seconds and retry.
+
+## The `__dev3` automation bridge
+
+The app exposes `window.__dev3` for programmatic control. Works in both agent-browser and agent-electrobun.
+
+| Method | What it does |
+|---|---|
+| `__dev3.navigate(route)` | Programmatic navigation |
+| `__dev3.getState()` | Returns current AppState (route, projects, tasks) |
+
+**Always prefer `navigate()` over clicking through the UI.** It's instant and doesn't require finding elements.
+
+**Availability:** Guarded by `globalThis.__DEV3_AUTOMATION` (true in dev/staging, false in production). Available in built assets served by the remote access server.
+
+**State coupling:** Stick to `getState().route`, `getState().projects`, and `getState().currentProjectTasks` — these are stable across refactors.
+
+### Route types
+
+```javascript
+__dev3.navigate({ screen: "dashboard" })
+__dev3.navigate({ screen: "project", projectId: "..." })
+__dev3.navigate({ screen: "project", projectId: "...", activeTaskId: "..." })  // split view
+__dev3.navigate({ screen: "task", projectId: "...", taskId: "..." })           // fullscreen
+__dev3.navigate({ screen: "project-settings", projectId: "..." })
+__dev3.navigate({ screen: "settings" })
+```
+
+### Finding IDs and navigating
+
+```bash
+# Get project and task IDs
+agent-browser eval '(() => {
+  const s = window.__dev3.getState();
+  return JSON.stringify({
+    route: s.route,
+    projects: s.projects.map(p => ({ id: p.id, name: p.name })),
+    tasks: s.currentProjectTasks.map(t => ({ id: t.id, seq: t.seq, title: t.title?.substring(0, 40), status: t.status }))
+  }, null, 2);
+})()'
+
+# Navigate to a task's split view
+agent-browser eval '(() => {
+  window.__dev3.navigate({ screen: "project", projectId: "PROJECT_ID", activeTaskId: "TASK_ID" });
+  return "ok";
+})()'
+
+# Navigate to first project's board
+agent-browser eval '(() => {
+  const p = window.__dev3.getState().projects[0];
+  if (!p) return "no projects";
+  window.__dev3.navigate({ screen: "project", projectId: p.id });
+  return "navigated to " + p.name;
+})()'
+
+# Navigate to a task by sequence number (e.g., #42)
+agent-browser eval '(() => {
+  const s = window.__dev3.getState();
+  const task = s.currentProjectTasks.find(t => t.seq === 42);
+  if (!task) return "task not found";
+  const pid = s.route.screen === "project" ? s.route.projectId : s.projects[0]?.id;
+  window.__dev3.navigate({ screen: "project", projectId: pid, activeTaskId: task.id });
+  return "navigated to #" + task.seq;
+})()'
+```
+
+## Interacting with elements
+
+```bash
+# Snapshot interactive elements
+agent-browser snapshot -i
+
+# Click by ref
+agent-browser click @e27
+
+# Fill an input (works natively with React — no workarounds needed)
+agent-browser fill @e2 "my text here"
+
+# Press a key
+agent-browser press Escape
+agent-browser press Enter
+
+# Select dropdown option
+agent-browser select @e5 "Bypass (Sonnet)"
+
+# Wait for element to appear
+agent-browser wait @e1
+
+# Scoped snapshot (only elements within a CSS selector)
+agent-browser snapshot -i -s "[data-panel='task-info']"
+```
+
+## UI flows
+
+### Create a task
+
+1. Navigate to the Kanban board.
+2. `agent-browser snapshot -i` — find `+ New Task` button, click it.
+3. A modal appears with a textarea.
+4. `agent-browser fill @eNN "Describe what needs to be done..."` — fill the description.
+5. `agent-browser wait 300` then `agent-browser snapshot -i` — confirm Save buttons are enabled.
+6. Click `Save` or `Save & Start`.
+
+### Run a task with a specific model/profile
+
+1. Find the task's `Run` button and click it.
+2. **Launch Task** modal appears with CLI and Profile dropdowns.
+3. Select the desired profile, click `Launch`.
+
+### Search tasks
+
+```bash
+agent-browser snapshot -i          # find the search input
+agent-browser fill @eNN "query"    # fill it
+agent-browser wait 500             # wait for filter
+agent-browser snapshot -i          # see filtered results
+```
+
+## Verification patterns
+
+### Assert element presence (prefer over screenshots)
+
+```bash
+# Menu is open (Restart/Stop visible)
+agent-browser snapshot -i 2>&1 | grep -i "restart\|stop"
+
+# Menu is closed (no matches = assertion passed)
+agent-browser snapshot -i 2>&1 | grep -i "restart\|stop"
+
+# Scoped assertion (only within a specific panel)
+agent-browser snapshot -i -s "[data-menu='dev-server']" 2>&1 | grep "Restart"
+```
+
+### Assert current route
+
+```bash
+agent-browser eval '(() => {
+  return JSON.stringify(window.__dev3.getState().route);
+})()'
+```
+
+### Typical verification flow
+
+```bash
+PORT=14561  # set once from dev3 dev-server status
+
+# 1. Open and wait
+agent-browser open http://localhost:$PORT
+agent-browser wait --load networkidle
+
+# 2. Navigate to the view you need
+agent-browser eval '(() => {
+  window.__dev3.navigate({ screen: "project", projectId: "PID", activeTaskId: "TID" });
+  return "ok";
+})()'
+
+# 3. Wait for render, find element, interact
+agent-browser wait 300
+agent-browser snapshot -i 2>&1 | grep "Dev Server"
+# → @e27 button "Dev Server"
+agent-browser click @e27
+
+# 4. Assert the result
+agent-browser wait 300
+agent-browser snapshot -i 2>&1 | grep "Restart"
+# Output present → menu opened
+```
+
+## Screenshots — last resort only
+
+**Never use screenshots to diagnose or assert.** Every screenshot costs image tokens and adds latency. Before reaching for `screenshot`, ask: "can I express this as text?"
+
+The answer is almost always yes:
+
+| Instead of screenshot for... | Use this |
+|---|---|
+| "Is the page blank?" | `eval 'document.documentElement.outerHTML.slice(0, 300)'` |
+| "Did the menu open?" | `snapshot -i \| grep -i "restart\|stop"` |
+| "Did navigation work?" | `eval 'JSON.stringify(window.__dev3.getState().route)'` |
+| "Is the element visible?" | `snapshot -i \| grep "element text"` |
+| "Did the data load?" | `eval 'window.__dev3.getState().projects.length'` |
+| "Is there an error?" | `eval 'document.body.innerText.slice(0, 500)'` |
+
+**Only take a screenshot when:**
+1. The user explicitly asks for one, OR
+2. You need to show a human a pixel-level rendering defect that cannot be described in text
+
+```bash
+# Last resort only
+agent-browser screenshot /tmp/dev3-screenshot.png
+agent-browser screenshot --full /tmp/dev3-full.png
+```
+
+## Efficiency tips
+
+- **Set `PORT` once** from `dev3 dev-server status` and reuse throughout the session.
+- **Use `__dev3.navigate()`** instead of clicking through the UI. Click navigation wastes 5-10 commands.
+- **Use `snapshot -i`** (interactive only) — much shorter output than full snapshot.
+- **Use `snapshot -i | grep`** for assertions — faster, no image token cost, grep-able.
+- **Re-snapshot after every click** that changes the DOM — refs are invalidated.
+- **Use `agent-browser wait`** instead of `sleep` — `wait @e1` waits for an element, `wait --load networkidle` waits for network.
+
+## Troubleshooting
+
+| Problem | Cause | Fix |
+|---|---|---|
+| Connection refused on `open` | Build still compiling | Check `dev3 dev-server status` CPU; retry when it drops |
+| App loads but shows errors | WebSocket RPC not connected | Ensure the Electrobun app is running (it hosts the RPC server) |
+| Click didn't navigate | Clicked a non-active task card | Only in-progress tasks navigate on click. Use `__dev3.navigate()` |
+| `fill` doubles characters | Only happens with agent-electrobun | Use agent-browser instead (native Playwright, no doubling) |
+
+## Native/CDP mode
+
+For terminal/PTY testing, native window features, or WebKit-specific bugs, see [native-cdp.md](./native-cdp.md). Requires creating a `.dev3_cdp` sentinel file and restarting the dev server.
diff --git a/.claude/skills/dev3-ui-control/click-navigation.md b/.claude/skills/dev3-ui-control/click-navigation.md
new file mode 100644
index 00000000..d7d17b43
--- /dev/null
+++ b/.claude/skills/dev3-ui-control/click-navigation.md
@@ -0,0 +1,45 @@
+# Click-based navigation reference
+
+Load this file only when testing click behavior itself. For normal navigation, use `__dev3.navigate()` from the main SKILL.md.
+
+## Navigate to a project (from dashboard)
+
+```bash
+agent-browser snapshot -i
+# Find the project button (e.g., @e2 button "dev-3.0 /Users/...")
+agent-browser click @eNN
+agent-browser wait 1000
+agent-browser snapshot -i
+# Now on the Kanban board
+```
+
+## Task card click behavior
+
+| Task status | Click behavior |
+|---|---|
+| Active (has worktree) | Navigates to split view or fullscreen |
+| To Do (no worktree) | **Does nothing** |
+| Completed / Cancelled | Opens TaskDetailModal |
+
+## Sidebar task list
+
+The left sidebar lists active tasks. Each is a `<button>`:
+- Click a task → switches to its workspace
+- Click the active task → deselects (returns to board view)
+
+## "Open in..." menu (gotcha)
+
+The icon button (`U+F0379`) on task cards opens an "Open in..." dropdown — it does **not** navigate. Don't click it when trying to navigate.
+
+## Finding task cards by ID
+
+When snapshot refs don't surface a card, use `data-task-id`:
+
+```bash
+agent-browser eval '(() => {
+  const el = document.querySelector("div[data-task-id=\"TASK_UUID\"]");
+  if (!el) return "not found";
+  el.click();
+  return "clicked";
+})()'
+```
diff --git a/.claude/skills/dev3-ui-control/native-cdp.md b/.claude/skills/dev3-ui-control/native-cdp.md
new file mode 100644
index 00000000..b27c5177
--- /dev/null
+++ b/.claude/skills/dev3-ui-control/native-cdp.md
@@ -0,0 +1,110 @@
+# Native/CDP mode — agent-electrobun
+
+Use this mode when testing terminal/PTY features, native window behavior, or WebKit-specific rendering. For everything else, use the default agent-browser flow in the main SKILL.md.
+
+## Enabling CDP mode
+
+CDP mode requires CEF (Chromium Embedded Framework) to be bundled in the dev build. This is controlled by a sentinel file:
+
+```bash
+# Enable CDP mode
+touch .dev3_cdp
+dev3 dev-server restart   # rebuilds with CEF — slower build
+
+# Disable CDP mode (return to browser mode)
+rm .dev3_cdp
+dev3 dev-server restart   # rebuilds without CEF — faster build
+```
+
+## Connecting
+
+```bash
+# 1. Get the CDP port
+dev3 dev-server status
+# → Assigned Ports: DEV3_PORT0=NNNNN
+
+# 2. Set the port variable
+CDP=NNNNN
+
+# 3. Wait for the app to be ready (retry until it connects)
+QUIVER_CDP_PORT=$CDP agent-electrobun --target shell list 2>&1 || \
+  (sleep 1.9 && QUIVER_CDP_PORT=$CDP agent-electrobun --target shell list 2>&1) || \
+  (sleep 1.9 && QUIVER_CDP_PORT=$CDP agent-electrobun --target shell list 2>&1)
+
+# 4. Start automating
+QUIVER_CDP_PORT=$CDP agent-electrobun --target shell snapshot -i
+```
+
+**Always use `--target shell`** — the app is a single-webview shell.
+
+## Known limitations
+
+- **`agent-electrobun tabs` fails** — `window.__quiverAutomation` is undefined. Use `--target shell` for everything.
+- **`agent-electrobun keyboard press`** does not support key names like `Escape`. Use JS eval instead (see below).
+- **Refs (`@e1`, `@e2`...) invalidate** on any DOM change. Re-snapshot after every click.
+- **`fill` / `keyboard type` doubles characters** in React controlled inputs. Use the JS eval workaround below.
+
+## React input workaround
+
+agent-electrobun's `fill` and `keyboard type` cause character doubling with React. Use this pattern instead:
+
+For `<textarea>`:
+```bash
+QUIVER_CDP_PORT=$CDP agent-electrobun --target shell eval '(() => {
+  const el = document.querySelector("textarea[placeholder=\"YOUR_PLACEHOLDER\"]");
+  if (!el) return "not found";
+  const setter = Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, "value").set;
+  setter.call(el, "YOUR TEXT HERE");
+  el.dispatchEvent(new Event("input", { bubbles: true }));
+  return "done";
+})()'
+```
+
+For `<input>`:
+```bash
+QUIVER_CDP_PORT=$CDP agent-electrobun --target shell eval '(() => {
+  const el = document.querySelector("input[placeholder=\"YOUR_PLACEHOLDER\"]");
+  if (!el) return "not found";
+  const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, "value").set;
+  setter.call(el, "YOUR TEXT HERE");
+  el.dispatchEvent(new Event("input", { bubbles: true }));
+  return "done";
+})()'
+```
+
+## Dispatching keyboard events
+
+```bash
+QUIVER_CDP_PORT=$CDP agent-electrobun --target shell eval \
+  'document.activeElement.dispatchEvent(new KeyboardEvent("keydown", {key:"Escape",bubbles:true}))'
+```
+
+## Navigation and state
+
+The `__dev3` bridge works identically in CDP mode. The only difference is the command prefix:
+
+```bash
+# agent-browser (browser mode):
+agent-browser eval '(() => { window.__dev3.navigate({...}); return "ok"; })()'
+
+# agent-electrobun (CDP mode):
+QUIVER_CDP_PORT=$CDP agent-electrobun --target shell eval '(() => { window.__dev3.navigate({...}); return "ok"; })()'
+```
+
+All route types and recipes from the main SKILL.md apply — just swap the command prefix.
+
+## Screenshots
+
+```bash
+QUIVER_CDP_PORT=$CDP agent-electrobun --target shell screenshot /tmp/dev3-screenshot.png
+QUIVER_CDP_PORT=$CDP agent-electrobun --target shell screenshot --annotate /tmp/dev3-annotated.png
+```
+
+## Troubleshooting
+
+| Problem | Cause | Fix |
+|---|---|---|
+| `ConnectionRefused` on `list` | Build still compiling | Check `dev3 dev-server status` CPU; retry when it drops |
+| Build doesn't bundle CEF | `.dev3_cdp` file missing | `touch .dev3_cdp && dev3 dev-server restart` |
+| Two instances can't run simultaneously | CFBundleIdentifier conflict | Each task needs a unique `DEV3_PORT0` (handled by port pool) |
+| Snapshot shows nothing | App hasn't loaded yet | Wait 1-2s after `list` succeeds, then retry |
diff --git a/.dev3/config.json b/.dev3/config.json
index 9e301cb9..3fc4160a 100644
--- a/.dev3/config.json
+++ b/.dev3/config.json
@@ -1,11 +1,12 @@
 {
   "setupScript": "echo setup\nbun install",
-  "devScript": "bun run dev",
+  "devScript": "if [ -f .dev3_cdp ]; then DEV3_CDP_PORT=${DEV3_PORT0} bun run dev; else if [ -f build/dev-macos-arm64/dev-3.0-dev.app/Contents/Resources/build.json ] && grep -q '\"defaultRenderer\":\"cef\"' build/dev-macos-arm64/dev-3.0-dev.app/Contents/Resources/build.json; then echo '[dev3] Stale CEF build detected without .dev3_cdp — removing app bundle' && rm -rf build/dev-macos-arm64/dev-3.0-dev.app; fi && DEV3_RPC_PORT=${DEV3_PORT0:-19191} bun run dev; fi",
   "clonePaths": [
     "node_modules",
     "build",
     "dist"
   ],
+  "portCount": 1,
   "builtinColumnAgents": {
     "review-by-ai": {
       "agentId": "builtin-claude",
diff --git a/.gitignore b/.gitignore
index 6e6fad67..3e485f41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@ changelog.json
 
 # dev-3.0 local config
 .dev3/config.local.json
+.dev3_cdp
diff --git a/README.md b/README.md
index 9121f16e..3eaa6b14 100644
--- a/README.md
+++ b/README.md
@@ -130,6 +130,7 @@ bun run test         # Run tests
 
 See [AGENTS.md](AGENTS.md) for full architecture docs and coding guidelines.
 See [agent-support-matrix.md](agent-support-matrix.md) for feature compatibility across AI agents.
+See [docs/agent-electrobun-setup.md](docs/agent-electrobun-setup.md) for setting up UI automation with agent-electrobun (lets AI agents interact with the app via CDP).
 
 ## Troubleshooting
 
diff --git a/change-logs/2026/04/06/feature-cdp-remote-debugging.md b/change-logs/2026/04/06/feature-cdp-remote-debugging.md
new file mode 100644
index 00000000..958bc9bf
--- /dev/null
+++ b/change-logs/2026/04/06/feature-cdp-remote-debugging.md
@@ -0,0 +1 @@
+Enable CDP remote debugging for dev builds via DEV3_CDP env var. When set, electrobun bundles CEF and passes --remote-debugging-port to Chromium, allowing agent-electrobun to connect and automate the app. Each task gets a unique CDP port from the port pool (DEV3_PORT0) and a unique app identifier (dev3.electrobun.dev.<port>) so multiple dev instances can run concurrently without CEF singleton conflicts. Production builds remain native WebKit with no CDP.
diff --git a/change-logs/2026/04/08/docs-dev3-ui-control-skill.md b/change-logs/2026/04/08/docs-dev3-ui-control-skill.md
new file mode 100644
index 00000000..15050412
--- /dev/null
+++ b/change-logs/2026/04/08/docs-dev3-ui-control-skill.md
@@ -0,0 +1,2 @@
+Added project-level skill `dev3-ui-control` that teaches AI agents how to automate the dev-3.0 app UI via agent-electrobun CDP. Covers CDP port discovery, React controlled input workarounds, and recipes for common UI flows (navigate projects, create tasks, run tasks, search). Added `docs/agent-electrobun-setup.md` with build/install instructions pinned to vetted commit `c0bc0c0`, and linked it from the README.
+Added `window.__dev3` automation bridge (navigate, getState) for CDP-based UI testing, and comprehensively updated the dev3-ui-control skill docs with navigation recipes, verification patterns, troubleshooting, and efficiency tips.
diff --git a/change-logs/2026/04/11/fix-stale-cef-build.md b/change-logs/2026/04/11/fix-stale-cef-build.md
new file mode 100644
index 00000000..2feeb938
--- /dev/null
+++ b/change-logs/2026/04/11/fix-stale-cef-build.md
@@ -0,0 +1 @@
+Stale CEF build detection: if a prior worktree session used CDP mode (.dev3_cdp), the CEF binary with a baked-in remote-debugging-port was cloned into subsequent worktrees. When those worktrees ran without .dev3_cdp, the stale CEF grabbed DEV3_PORT0 before the remote access server could bind to it. Fixed by adding a check in the devScript that removes the app bundle when a CEF build is detected without a .dev3_cdp sentinel.
\ No newline at end of file
diff --git a/decisions/032-dev3-automation-bridge.md b/decisions/032-dev3-automation-bridge.md
new file mode 100644
index 00000000..4284905d
--- /dev/null
+++ b/decisions/032-dev3-automation-bridge.md
@@ -0,0 +1,27 @@
+# 032 — `window.__dev3` automation bridge for CDP testing
+
+## Context
+
+AI agents verifying UI changes via CDP (agent-electrobun) spent 80% of their effort navigating to the right view. Click-based navigation is fragile: task card clicks behave differently based on task status, refs invalidate on every DOM change, and there's no deterministic way to reach a specific screen.
+
+## Decision
+
+Added `window.__dev3` global in `src/mainview/App.tsx` with two methods:
+- `navigate(route)` — programmatic navigation using the same Route type as the reducer
+- `getState()` — returns current AppState (route, projects, tasks)
+
+The bridge is guarded by `globalThis.__DEV3_AUTOMATION`, a Vite `define` flag set in `vite.config.ts`. It defaults to `true` and is only `false` when `DEV3_PROD=1` is set (which `bun run build:prod` does). This means the bridge is available in all dev/staging builds (including bundled assets), but tree-shaken out of production. Set up in a `useEffect` that depends on `[navigate, state]` and cleans up on unmount.
+
+`dispatch()` was intentionally excluded — it's a foot-gun that would let CDP evals wipe UI state with no guardrails.
+
+## Risks
+
+- `getState()` returns a snapshot that may be stale by the time the eval result is processed. Fine for navigation and ID discovery, not for real-time assertions.
+- The `state` dependency on the useEffect means the bridge object is recreated on every state change. This is cheap (object creation) and ensures `getState()` always returns current state.
+- `getState()` exposes the internal `AppState` shape. If fields are renamed in a refactor, automation scripts break. Stable fields: `route`, `projects`, `currentProjectTasks`.
+
+## Alternatives considered
+
+- **Custom events only** (like existing `rpc:navigateToSettings`): Would require a new event per route type. The bridge is more flexible.
+- **URL-based routing**: The app uses React state routing, not URL hash routing. Adding URL routing would be a larger change with broader impact.
+- **Including `dispatch()`**: Rejected — too much power with no safety. An agent could dispatch `{ type: "setTasks", tasks: [] }` and wipe the board. `navigate` + `getState` covers all known automation needs.
diff --git a/decisions/033-dual-mode-dev-server.md b/decisions/033-dual-mode-dev-server.md
new file mode 100644
index 00000000..c614a604
--- /dev/null
+++ b/decisions/033-dual-mode-dev-server.md
@@ -0,0 +1,30 @@
+# 033 — Dual-mode dev server (browser vs native CDP)
+
+## Context
+
+Agent UI verification used agent-electrobun via CDP, which required bundling CEF in dev builds. This added build time, binary size, and complexity (per-task CFBundleIdentifier hack). Meanwhile, the remote access server already serves the full UI to any browser — agent-browser (Playwright-based) could test the same React components without CEF.
+
+## Decision
+
+Two platform changes enable any project to use agent-browser for UI testing:
+
+1. **Fixed port for remote access server:** When `DEV3_RPC_PORT` is set in the environment, the remote access server listens on that port instead of a random one. The devScript sets this from `DEV3_PORT0` (the port pool allocation).
+2. **Localhost auth bypass:** When `DEV3_RPC_PORT` is set, localhost connections skip JWT authentication, allowing agent-browser to connect without a QR token.
+
+Mode switching is handled entirely in the project's devScript (`.dev3/config.json`), not in the dev3 platform. The dev-3.0 project's devScript branches on `DEV3_CDP`:
+
+- **Browser mode (default):** `DEV3_RPC_PORT=$DEV3_PORT0 bun run dev` — remote access server on a known port, no CEF.
+- **Native/CDP mode:** Override via `.dev3/config.local.json` with `DEV3_CDP_PORT=$DEV3_PORT0 bun run dev` — bundles CEF for agent-electrobun.
+
+Code paths: `remote-access-server.ts` (fixed port + auth bypass). No changes to the generic `runDevServer`, CLI, or socket handler.
+
+## Risks
+
+- If `DEV3_RPC_PORT` is already in use (stale process), the server fails to start. Random port (old default) never had this problem.
+- Localhost auth bypass means any local process can connect to RPC in dev mode. Acceptable for local development.
+
+## Alternatives considered
+
+- **`--native` CLI flag:** Would pass a mode toggle through the generic dev3 CLI → socket → runDevServer pipeline. Rejected — bakes a project-specific concept (CEF/CDP) into the platform. Mode switching belongs in the devScript.
+- **Vite proxy approach:** Add WebSocket proxy in vite.config.ts to forward /rpc to the backend. Rejected — adds a proxy hop and the Vite dev server isn't running per-task anyway (each task runs `vite build`, not `vite serve`).
+- **Per-task Vite port allocation:** Allocate a second port for the Vite dev server per task. Rejected — unnecessary since the remote access server already serves built assets.
diff --git a/docs/agent-electrobun-setup.md b/docs/agent-electrobun-setup.md
new file mode 100644
index 00000000..82b50f52
--- /dev/null
+++ b/docs/agent-electrobun-setup.md
@@ -0,0 +1,61 @@
+# agent-electrobun Setup
+
+[agent-electrobun](https://github.com/nichochar/agent-electrobun) is a CDP (Chrome DevTools Protocol) automation CLI that lets AI agents interact with the dev-3.0 app — clicking buttons, filling forms, taking screenshots, and verifying UI changes.
+
+## Install the CLI
+
+Build from the vetted commit:
+
+```bash
+# Clone the repo
+git clone https://github.com/nichochar/agent-electrobun.git
+cd agent-electrobun
+
+# Check out the vetted commit
+git checkout c0bc0c04ff8e7a1231709e36bfab45076d7bc27a
+
+# Build and install globally
+bun install
+bun build src/agent-electrobun.ts --compile --outfile agent-electrobun
+sudo mv agent-electrobun /usr/local/bin/
+```
+
+Verify: `agent-electrobun --help` should print the usage info.
+
+## Install the Claude Code skill
+
+The `agent-electrobun` skill teaches Claude Code the generic CDP commands. Install it from the same repo:
+
+```bash
+mkdir -p ~/.claude/skills/agent-electrobun
+cp SKILL.md ~/.claude/skills/agent-electrobun/SKILL.md
+
+# If the repo has a references/ directory, copy that too
+cp -r references ~/.claude/skills/agent-electrobun/references 2>/dev/null || true
+```
+
+## dev3-specific skill
+
+The project includes a **project-level** skill at `.claude/skills/dev3-ui-control/` that builds on top of `agent-electrobun` with dev3-specific recipes (CDP port discovery, React input workarounds, UI flow guides). This skill is automatically available to all agents working in the repo — no extra installation needed.
+
+## Quick verification
+
+```bash
+# Start a dev server for your task
+dev3 dev-server start
+
+# Get the CDP port
+dev3 dev-server status
+# Look for: Assigned Ports: DEV3_PORT0=NNNNN
+
+# Test connectivity
+QUIVER_CDP_PORT=NNNNN agent-electrobun list
+# Expected: [shell] dev-3.0 vX.Y.Z
+
+# Take a snapshot
+QUIVER_CDP_PORT=NNNNN agent-electrobun --target shell snapshot -i
+```
+
+## Vetted commit
+
+The current vetted commit is `c0bc0c04ff8e7a1231709e36bfab45076d7bc27a`. We pin to a specific commit because agent-electrobun is an external tool that runs arbitrary JS in the app's renderer process. Before updating, review the diff for security implications.
diff --git a/electrobun.config.ts b/electrobun.config.ts
index 2303cb12..088f545a 100644
--- a/electrobun.config.ts
+++ b/electrobun.config.ts
@@ -3,7 +3,9 @@ import type { ElectrobunConfig } from "electrobun";
 export default {
 	app: {
 		name: "dev-3.0",
-		identifier: "dev3.electrobun.dev",
+		identifier: process.env.DEV3_CDP_PORT
+			? `dev3.electrobun.dev.${process.env.DEV3_CDP_PORT}`
+			: "dev3.electrobun.dev",
 		version: "1.6.2",
 	},
 	release: {
@@ -11,10 +13,19 @@ export default {
 	},
 	build: {
 		mac: {
-			bundleCEF: false,
+			bundleCEF: !!process.env.DEV3_CDP_PORT,
 			icons: "icon.iconset",
 			codesign: false,
 			notarize: false,
+			...(process.env.DEV3_CDP_PORT
+				? {
+						defaultRenderer: "cef" as const,
+						chromiumFlags: {
+							"remote-debugging-port":
+								process.env.DEV3_CDP_PORT || "9222",
+						},
+					}
+				: {}),
 			entitlements: {
 				"com.apple.security.device.audio-input":
 					"Required for voice dictation in AI coding assistants",
diff --git a/package.json b/package.json
index 3934115a..9ee0b966 100644
--- a/package.json
+++ b/package.json
@@ -9,7 +9,7 @@
 		"watch": "concurrently --kill-others \"bun run hmr\" \"bun run scripts/watch-main.ts\"",
 		"hmr": "vite --port 5173",
 		"build": "bun scripts/generate-build-info.ts && bun scripts/generate-changelog.ts && vite build && bun run build:cli && electrobun build",
-		"build:prod": "bun scripts/generate-build-info.ts && bun scripts/generate-changelog.ts && vite build && bun run build:cli && electrobun build --channel prod",
+		"build:prod": "bun scripts/generate-build-info.ts && bun scripts/generate-changelog.ts && DEV3_PROD=1 vite build && bun run build:cli && electrobun build --channel prod",
 		"build:cli": "bun build src/cli/main.ts --compile --outfile dist/dev3",
 		"start": "bun scripts/generate-build-info.ts && bun scripts/generate-changelog.ts && bun run build:cli && electrobun build && electrobun dev",
 		"lint": "bun scripts/lint.ts",
diff --git a/src/bun/__tests__/remote-access-server.test.ts b/src/bun/__tests__/remote-access-server.test.ts
index cf8b955d..1be8f2f6 100644
--- a/src/bun/__tests__/remote-access-server.test.ts
+++ b/src/bun/__tests__/remote-access-server.test.ts
@@ -213,6 +213,42 @@ describe("MIME types", () => {
 	});
 });
 
+// ================================================================
+// Localhost dev bypass for auth
+// ================================================================
+
+// Reimplements isLocalhostDevBypass logic from remote-access-server.ts
+function isLocalhostDevBypass(hostname: string, hasRpcPort: boolean): boolean {
+	if (!hasRpcPort) return false;
+	return hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1";
+}
+
+describe("localhost dev auth bypass", () => {
+	it("bypasses auth for localhost when DEV3_RPC_PORT is set", () => {
+		expect(isLocalhostDevBypass("localhost", true)).toBe(true);
+	});
+
+	it("bypasses auth for 127.0.0.1 when DEV3_RPC_PORT is set", () => {
+		expect(isLocalhostDevBypass("127.0.0.1", true)).toBe(true);
+	});
+
+	it("bypasses auth for ::1 when DEV3_RPC_PORT is set", () => {
+		expect(isLocalhostDevBypass("::1", true)).toBe(true);
+	});
+
+	it("does NOT bypass auth for localhost when DEV3_RPC_PORT is unset", () => {
+		expect(isLocalhostDevBypass("localhost", false)).toBe(false);
+	});
+
+	it("does NOT bypass auth for remote hosts even when DEV3_RPC_PORT is set", () => {
+		expect(isLocalhostDevBypass("192.168.1.100", true)).toBe(false);
+	});
+
+	it("does NOT bypass auth for remote hosts when DEV3_RPC_PORT is unset", () => {
+		expect(isLocalhostDevBypass("10.0.0.5", false)).toBe(false);
+	});
+});
+
 // ================================================================
 // uploadImageBase64 size limit
 // ================================================================
diff --git a/src/bun/remote-access-server.ts b/src/bun/remote-access-server.ts
index 07c7e35f..90116484 100644
--- a/src/bun/remote-access-server.ts
+++ b/src/bun/remote-access-server.ts
@@ -1,8 +1,9 @@
 /**
  * Remote Access Server.
  *
- * A single HTTP + WebSocket server on 0.0.0.0:random that serves the full UI
- * to any browser on the local network. Replaces the previous browser-rpc-server.
+ * A single HTTP + WebSocket server that serves the full UI to any browser on
+ * the local network. Listens on DEV3_RPC_PORT if set (fixed, for agent-browser
+ * testing), otherwise on a random port. Replaces the previous browser-rpc-server.
  *
  * Features:
  *   - Static file serving (built Vite assets from dist/)
@@ -30,7 +31,15 @@ function extractToken(req: Request): string | null {
 	return url.searchParams.get("token") ?? null;
 }
 
+/** Allow unauthenticated localhost connections when DEV3_RPC_PORT is set (dev mode). */
+function isLocalhostDevBypass(req: Request): boolean {
+	if (!process.env.DEV3_RPC_PORT) return false;
+	const host = new URL(req.url).hostname;
+	return host === "localhost" || host === "127.0.0.1" || host === "::1";
+}
+
 async function isSessionAuthenticated(req: Request): Promise<boolean> {
+	if (isLocalhostDevBypass(req)) return true;
 	const token = extractToken(req);
 	if (!token) return false;
 	return verifySessionToken(token);
@@ -207,7 +216,7 @@ export async function startRemoteAccessServer(options: StartOptions): Promise<vo
 
 	const server = Bun.serve<WsData>({
 		hostname: "0.0.0.0",
-		port: 0, // random
+		port: parseInt(process.env.DEV3_RPC_PORT || "0", 10), // fixed when set, random otherwise
 		async fetch(req, server) {
 			const url = new URL(req.url);
 			const ua = req.headers.get("user-agent")?.slice(0, 80) ?? "unknown";
diff --git a/src/cli/__tests__/dev-server.test.ts b/src/cli/__tests__/dev-server.test.ts
index 72a357d3..dfe0cec5 100644
--- a/src/cli/__tests__/dev-server.test.ts
+++ b/src/cli/__tests__/dev-server.test.ts
@@ -143,6 +143,7 @@ describe("dev-server start/stop/restart", () => {
 		});
 		expect(stdoutOutput).toContain("Restarted dev server");
 	});
+
 });
 
 describe("dev-server errors", () => {
diff --git a/src/mainview/App.tsx b/src/mainview/App.tsx
index 05c873c2..99ecb1aa 100644
--- a/src/mainview/App.tsx
+++ b/src/mainview/App.tsx
@@ -382,6 +382,22 @@ function App() {
 		};
 	}, []);
 
+	// Automation bridge for UI testing (dev3-ui-control).
+	// __DEV3_AUTOMATION is true in dev/staging builds, false in prod (DEV3_PROD=1).
+	// Vite replaces the global at build time so the block is tree-shaken in prod.
+	useEffect(() => {
+		if (!(globalThis as any).__DEV3_AUTOMATION) return;
+		// eslint-disable-next-line @typescript-eslint/no-explicit-any
+		(window as any).__dev3 = {
+			navigate: (route: Route) => navigate(route),
+			getState: () => state,
+		};
+		return () => {
+			// eslint-disable-next-line @typescript-eslint/no-explicit-any
+			delete (window as any).__dev3;
+		};
+	}, [navigate, state]);
+
 	// Listen for Cmd+, (Settings menu item)
 	useEffect(() => {
 		function onNavigateToSettings() {
diff --git a/vite.config.ts b/vite.config.ts
index b6e0f836..99b363b0 100644
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -6,6 +6,7 @@ export default defineConfig({
 	root: "src/mainview",
 	define: {
 		"globalThis.__DEV3_BROWSER_RPC_PORT": JSON.stringify(19191),
+		"globalThis.__DEV3_AUTOMATION": JSON.stringify(process.env.DEV3_PROD !== "1"),
 	},
 	build: {
 		outDir: "../../dist",