From eb7f9b851e83e0bca154358a140e646162600a1d Mon Sep 17 00:00:00 2001
From: Robin Wohlers-Reichel <robin@entire.io>
Date: Tue, 3 Mar 2026 11:19:49 +0100
Subject: [PATCH] cursor: e2e tests

---
 .github/workflows/e2e-isolated.yml   |   5 +-
 .github/workflows/e2e.yml            |   5 +-
 cmd/entire/cli/agent/cursor/AGENT.md | 230 +++++++++++++++++++++++++++
 e2e/agents/cursor_cli.go             | 190 ++++++++++++++++++++++
 4 files changed, 428 insertions(+), 2 deletions(-)
 create mode 100644 cmd/entire/cli/agent/cursor/AGENT.md
 create mode 100644 e2e/agents/cursor_cli.go

diff --git a/.github/workflows/e2e-isolated.yml b/.github/workflows/e2e-isolated.yml
index 164127857..b92baeb68 100644
--- a/.github/workflows/e2e-isolated.yml
+++ b/.github/workflows/e2e-isolated.yml
@@ -8,7 +8,7 @@ on:
         required: true
         default: "gemini-cli"
         type: choice
-        options: [claude-code, opencode, gemini-cli, factoryai-droid]
+        options: [claude-code, opencode, gemini-cli, cursor-cli, factoryai-droid]
       test:
         description: "Test name filter (regex)"
         required: true
@@ -38,6 +38,7 @@ jobs:
             claude-code) curl -fsSL https://claude.ai/install.sh | bash ;;
             opencode)    curl -fsSL https://opencode.ai/install | bash ;;
             gemini-cli)  npm install -g @google/gemini-cli ;;
+            cursor-cli)  curl https://cursor.com/install -fsS | bash ;;
             factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;;
           esac
           echo "$HOME/.local/bin" >> $GITHUB_PATH
@@ -46,6 +47,7 @@ jobs:
         env:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
           FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
         run: go run ./e2e/bootstrap
 
@@ -53,6 +55,7 @@ jobs:
         env:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
           FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
           E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts
           E2E_ENTIRE_BIN: /usr/local/bin/entire
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index ab661b9e8..b84428bce 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        agent: [claude-code, opencode, gemini-cli, factoryai-droid]
+        agent: [claude-code, opencode, gemini-cli, factoryai-droid, cursor-cli]
 
     steps:
       - name: Checkout repository
@@ -36,6 +36,7 @@ jobs:
             claude-code) curl -fsSL https://claude.ai/install.sh | bash ;;
             opencode)    curl -fsSL https://opencode.ai/install | bash ;;
             gemini-cli)  npm install -g @google/gemini-cli ;;
+            cursor-cli)  curl https://cursor.com/install -fsS | bash ;;
             factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;;
           esac
           echo "$HOME/.local/bin" >> $GITHUB_PATH
@@ -44,6 +45,7 @@ jobs:
         env:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
           FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
         run: go run ./e2e/bootstrap
 
@@ -51,6 +53,7 @@ jobs:
         env:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+          CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
           FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
           E2E_CONCURRENT_TEST_LIMIT: ${{ matrix.agent == 'gemini-cli' && '6' || matrix.agent == 'factoryai-droid' && '1' || '' }}
         run: mise run test:e2e --agent ${{ matrix.agent }}
diff --git a/cmd/entire/cli/agent/cursor/AGENT.md b/cmd/entire/cli/agent/cursor/AGENT.md
new file mode 100644
index 000000000..9906897ab
--- /dev/null
+++ b/cmd/entire/cli/agent/cursor/AGENT.md
@@ -0,0 +1,230 @@
+# Cursor CLI (`agent`) — Integration One-Pager
+
+## Verdict: COMPATIBLE
+
+The `agent` binary supports hooks via `.cursor/hooks.json` and stores JSONL transcripts in a predictable location. The existing Cursor agent implementation in this package already handles both IDE and CLI modes. The CLI fires `sessionStart`, `sessionEnd`, `preToolUse`, and `postToolUse` hooks in headless (`-p`) mode. In interactive mode, `beforeSubmitPrompt` and `stop` also fire.
+
+**Key difference from IDE:** In `-p` (headless/print) mode, `beforeSubmitPrompt` and `stop` hooks do **not** fire. Only `sessionStart`, `sessionEnd`, and tool-use hooks fire. This means E2E tests using `RunPrompt` (headless) will not get `TurnStart`/`TurnEnd` events — only `SessionStart`/`SessionEnd`. Interactive tmux-based tests get the full lifecycle.
+
+## Static Checks
+
+| Check | Result | Notes |
+|-------|--------|-------|
+| Binary present | PASS | `/Users/robin/.local/bin/agent` |
+| Help available | PASS | Full CLI help with subcommands |
+| Version info | PASS | `2026.02.13-41ac335` |
+| Hook keywords | PASS | `session`, `resume`, `continue` in help |
+| Session keywords | PASS | `--resume`, `--continue`, `ls` (list sessions) |
+| Config directory | PASS | `~/.cursor/`, `.cursor/` (project-local) |
+| Documentation | PASS | https://cursor.com/docs/agent/hooks, https://cursor.com/docs/cli/using |
+
+## Binary
+
+- Name: `agent`
+- Version: `2026.02.13-41ac335`
+- Install: `curl -fsSL https://cursor.com/install-agent | bash` (or via Cursor IDE: install shell integration)
+- Also accessible as: `cursor agent` (when Cursor IDE is installed)
+
+## Hook Mechanism
+
+- Config file: `.cursor/hooks.json` (project-local) or `~/.cursor/hooks.json` (user-global)
+- Config format: JSON
+- Hook registration: Array of `{"command": "...", "matcher": "..."}` entries per hook type (matcher is optional, used for tool-use hooks)
+
+### Hook Names and When They Fire
+
+| Native Hook Name | When It Fires | Entire EventType | Fires in `-p` mode? |
+|-----------------|---------------|-----------------|---------------------|
+| `sessionStart` | New conversation created | `SessionStart` | Yes |
+| `beforeSubmitPrompt` | After user presses send, before backend request | `TurnStart` | **No** |
+| `stop` | Agent loop ends (one turn completes) | `TurnEnd` | **No** |
+| `sessionEnd` | Conversation ends | `SessionEnd` | Yes |
+| `preCompact` | Before context compaction | `Compaction` | Needs long context |
+| `subagentStart` | Before spawning a subagent (Task tool) | `SubagentStart` | Yes (when subagent used) |
+| `subagentStop` | Subagent completes | `SubagentEnd` | Yes (when subagent used) |
+| `preToolUse` | Before any tool execution | *(not mapped — informational)* | Yes |
+| `postToolUse` | After tool execution | *(not mapped — informational)* | Yes |
+
+### Hook Input (stdin JSON)
+
+All hooks share these common fields:
+
+```json
+{
+  "conversation_id": "uuid",
+  "generation_id": "uuid",
+  "model": "gpt-5.2-codex-xhigh-fast",
+  "hook_event_name": "sessionStart",
+  "cursor_version": "2026.02.13-41ac335",
+  "workspace_roots": ["/path/to/repo"],
+  "user_email": "user@example.com",
+  "transcript_path": null
+}
+```
+
+**Important:** `transcript_path` is **always `null`** in CLI mode. The existing cursor agent handles this via `resolveTranscriptRef()` which computes the path dynamically from the repo root.
+
+#### sessionStart additional fields
+
+```json
+{
+  "session_id": "uuid",
+  "is_background_agent": false
+}
+```
+
+Note: IDE also sends `composer_mode: "agent"` — CLI omits this field.
+
+#### sessionEnd additional fields
+
+```json
+{
+  "session_id": "uuid",
+  "reason": "completed",
+  "duration_ms": 5505,
+  "is_background_agent": false,
+  "final_status": "completed"
+}
+```
+
+#### beforeSubmitPrompt additional fields (interactive mode only)
+
+```json
+{
+  "prompt": "user prompt text"
+}
+```
+
+#### stop additional fields (interactive mode only)
+
+```json
+{
+  "status": "completed",
+  "loop_count": 0
+}
+```
+
+#### subagentStart additional fields
+
+```json
+{
+  "subagent_id": "uuid",
+  "subagent_type": "generalPurpose",
+  "subagent_model": "model-name",
+  "task": "task description",
+  "parent_conversation_id": "uuid",
+  "tool_call_id": "id",
+  "is_parallel_worker": false
+}
+```
+
+#### subagentStop additional fields
+
+```json
+{
+  "subagent_id": "uuid",
+  "subagent_type": "generalPurpose",
+  "status": "completed",
+  "duration_ms": 5000,
+  "summary": "result text",
+  "parent_conversation_id": "uuid",
+  "message_count": 10,
+  "tool_call_count": 3,
+  "modified_files": ["file.txt"],
+  "loop_count": 1,
+  "task": "task description",
+  "description": "...",
+  "agent_transcript_path": "/path/to/transcript"
+}
+```
+
+#### preToolUse additional fields
+
+```json
+{
+  "tool_name": "Write",
+  "tool_input": {"file_path": "/path", "content": "..."},
+  "tool_use_id": "call_xxx\nctc_xxx"
+}
+```
+
+#### postToolUse additional fields
+
+```json
+{
+  "tool_name": "Write",
+  "tool_input": {"file_path": "/path", "content": "..."},
+  "tool_output": "{\"success\":true}",
+  "duration": 36.841,
+  "tool_use_id": "call_xxx\nctc_xxx"
+}
+```
+
+## Transcript
+
+- Location: `~/.cursor/projects/<sanitized-repo-path>/agent-transcripts/<conversation-id>.jsonl`
+  - CLI uses flat layout: `<dir>/<id>.jsonl`
+  - IDE uses nested layout: `<dir>/<id>/<id>.jsonl`
+  - The existing `ResolveSessionFile()` handles both
+- Path sanitization: leading `/` stripped, all non-alphanumeric chars replaced with `-`
+- Format: JSONL (one JSON object per line)
+- Session ID extraction: `conversation_id` field from hook payload (same value as `session_id`)
+- Example entries:
+
+```jsonl
+{"role":"user","message":{"content":[{"type":"text","text":"<user_query>\ncreate a file\n</user_query>"}]}}
+{"role":"assistant","message":{"content":[{"type":"text","text":"Created the file."}]}}
+```
+
+- Note: Transcript does NOT contain tool_use blocks — file detection relies on git status
+- Override for testing: set `ENTIRE_TEST_CURSOR_PROJECT_DIR` env var to override the transcript directory
+
+## Config Preservation
+
+- `.cursor/hooks.json`: Read-modify-write using `map[string]json.RawMessage` to preserve unknown fields
+- `~/.cursor/cli-config.json`: User-level config — do not modify (contains auth, permissions, model settings)
+- Keys to preserve: `version`, any unknown hook types, user's custom hooks
+
+## CLI Flags
+
+- Non-interactive prompt: `agent -p "prompt text" --force --trust --workspace <dir>`
+  - `-p` / `--print`: Headless mode, prints response to stdout
+  - `--force` / `--yolo`: Auto-approve all tool use
+  - `--trust`: Trust workspace without prompting (headless only)
+  - `--workspace <path>`: Set working directory
+  - `--model <model>`: Model override (e.g., `sonnet-4`, `gpt-5`)
+  - `--output-format <fmt>`: `text` (default), `json`, `stream-json`
+- Interactive mode: `agent --force` (launches TUI)
+  - Prompt pattern for TUI ready: TBD (needs interactive probe)
+  - `--resume [chatId]`: Resume specific session
+  - `--continue`: Resume most recent session
+- Relevant env vars:
+  - `CURSOR_API_KEY`: API key for authentication
+  - `ENTIRE_TEST_CURSOR_PROJECT_DIR`: Override transcript directory (for testing)
+  - `ENTIRE_TEST_TTY=0`: Disable TTY detection in Entire hooks
+
+## Gaps & Limitations
+
+1. **`beforeSubmitPrompt` and `stop` don't fire in `-p` mode**: This is the main limitation. In headless mode, Entire won't get TurnStart/TurnEnd events. Checkpoints can only be created via sessionStart/sessionEnd flow. E2E tests using `RunPrompt` won't trigger the normal TurnStart→TurnEnd checkpoint flow.
+2. **`transcript_path` is always `null` in CLI mode**: Handled by existing `resolveTranscriptRef()` which computes the path dynamically.
+3. **No `composer_mode` field in CLI**: IDE sends `"agent"`, CLI omits it. Not impactful.
+4. **Transcript lacks tool_use blocks**: Modified file detection relies on git status (already handled).
+5. **`tool_use_id` format**: Contains newline (`call_xxx\nctc_xxx`) — may need sanitization if used as identifiers.
+
+## Captured Payloads
+
+Probe run on 2026-03-02 using `agent -p` in a temp git repo.
+
+Hooks captured in headless (`-p`) mode:
+- `sessionStart` (1 capture)
+- `sessionEnd` (1 capture)
+- `preToolUse` (2 captures: Read, Write)
+- `postToolUse` (1 capture: Write)
+
+Hooks NOT captured in headless mode:
+- `beforeSubmitPrompt` — does not fire in `-p` mode
+- `stop` — does not fire in `-p` mode
+- `preCompact` — requires long context (not triggered by short prompt)
+- `subagentStart/Stop` — requires subagent usage
+
+See `.entire/tmp/probe-cursor-cli-*/captures/` for raw JSON captures.
\ No newline at end of file
diff --git a/e2e/agents/cursor_cli.go b/e2e/agents/cursor_cli.go
new file mode 100644
index 000000000..5c3e1d039
--- /dev/null
+++ b/e2e/agents/cursor_cli.go
@@ -0,0 +1,190 @@
+package agents
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+)
+
+func init() {
+	if env := os.Getenv("E2E_AGENT"); env != "" && env != "cursor-cli" {
+		return
+	}
+	Register(&CursorCLI{})
+}
+
+// CursorCLI implements the E2E Agent interface for the Cursor Agent CLI binary.
+// The CLI binary is called "agent" and uses Cursor's hooks system via
+// .cursor/hooks.json. It maps to the same Entire agent as Cursor IDE ("cursor").
+//
+// All E2E interactions use interactive (tmux) mode so that the full hook
+// lifecycle fires (sessionStart, beforeSubmitPrompt, stop, sessionEnd).
+// Headless (-p) mode skips beforeSubmitPrompt and stop hooks.
+type CursorCLI struct{}
+
+func (a *CursorCLI) Name() string               { return "cursor-cli" }
+func (a *CursorCLI) Binary() string             { return "agent" }
+func (a *CursorCLI) EntireAgent() string        { return "cursor" }
+func (a *CursorCLI) TimeoutMultiplier() float64 { return 1.5 }
+
+// PromptPattern returns a regex matching the Cursor CLI's TUI input prompt.
+// The CLI shows a styled input box with placeholder text when ready for input.
+func (a *CursorCLI) PromptPattern() string { return `/ commands` }
+
+func (a *CursorCLI) IsTransientError(out Output, err error) bool {
+	if err == nil {
+		return false
+	}
+	if errors.Is(err, context.DeadlineExceeded) {
+		return true
+	}
+	combined := out.Stdout + out.Stderr
+	for _, p := range []string{
+		"overloaded",
+		"rate limit",
+		"429",
+		"503",
+		"529",
+		"ECONNRESET",
+		"ETIMEDOUT",
+		"server error",
+		"Internal Server Error",
+	} {
+		if strings.Contains(combined, p) {
+			return true
+		}
+	}
+	return false
+}
+
+func (a *CursorCLI) Bootstrap() error {
+	// The Cursor CLI authenticates via CURSOR_API_KEY env var or OAuth.
+	// On CI, ensure CURSOR_API_KEY is set. Locally, OAuth/keychain works.
+	if os.Getenv("CI") != "" && os.Getenv("CURSOR_API_KEY") == "" {
+		return errors.New("CURSOR_API_KEY must be set on CI for cursor-cli E2E tests")
+	}
+	return nil
+}
+
+func (a *CursorCLI) RunPrompt(ctx context.Context, dir string, prompt string, opts ...Option) (Output, error) {
+	cfg := &runConfig{}
+	for _, o := range opts {
+		o(cfg)
+	}
+
+	timeout := 90 * time.Second
+	if cfg.PromptTimeout > 0 {
+		timeout = cfg.PromptTimeout
+	}
+
+	displayCmd := a.Binary() + " --force --workspace " + dir + " (interactive prompt: " + prompt + ")"
+
+	// Start an interactive tmux session so all hooks fire
+	// (beforeSubmitPrompt and stop don't fire in headless -p mode).
+	s, err := a.startInteractiveSession(dir)
+	if err != nil {
+		return Output{Command: displayCmd, ExitCode: -1},
+			fmt.Errorf("start interactive session: %w", err)
+	}
+	defer s.Close()
+
+	// Wait for trust dialog and accept it.
+	if err := a.acceptTrustDialogIfNeeded(s); err != nil {
+		return Output{Command: displayCmd, Stdout: s.Capture(), ExitCode: -1}, err
+	}
+
+	// Wait for the TUI to be ready.
+	if _, err := s.WaitFor(a.PromptPattern(), 30*time.Second); err != nil {
+		return Output{Command: displayCmd, Stdout: s.Capture(), ExitCode: -1},
+			fmt.Errorf("waiting for startup prompt: %w", err)
+	}
+
+	// Send the prompt.
+	if err := s.Send(prompt); err != nil {
+		return Output{Command: displayCmd, Stdout: s.Capture(), ExitCode: -1},
+			fmt.Errorf("sending prompt: %w", err)
+	}
+
+	// Wait for the prompt pattern to reappear (agent finished processing).
+	content, waitErr := s.WaitFor(a.PromptPattern(), timeout)
+	if waitErr != nil {
+		// Check for deadline exceeded to allow transient error detection.
+		if ctx.Err() == context.DeadlineExceeded {
+			waitErr = fmt.Errorf("%w: %w", waitErr, context.DeadlineExceeded)
+		}
+		return Output{Command: displayCmd, Stdout: content, ExitCode: -1}, waitErr
+	}
+
+	return Output{Command: displayCmd, Stdout: content, ExitCode: 0}, nil
+}
+
+func (a *CursorCLI) StartSession(ctx context.Context, dir string) (Session, error) {
+	s, err := a.startInteractiveSession(dir)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := a.acceptTrustDialogIfNeeded(s); err != nil {
+		_ = s.Close()
+		return nil, err
+	}
+
+	// Wait for the TUI to be ready (input prompt).
+	if _, err := s.WaitFor(a.PromptPattern(), 30*time.Second); err != nil {
+		_ = s.Close()
+		return nil, fmt.Errorf("waiting for startup prompt: %w", err)
+	}
+	s.stableAtSend = ""
+
+	return s, nil
+}
+
+// startInteractiveSession creates a new tmux session running the Cursor CLI
+// in interactive mode (no -p flag) so all hooks fire.
+func (a *CursorCLI) startInteractiveSession(dir string) (*TmuxSession, error) {
+	// Resolve to absolute path so tmux can find the binary even if its
+	// shell doesn't inherit the test process's PATH (common on CI).
+	bin, err := exec.LookPath(a.Binary())
+	if err != nil {
+		return nil, fmt.Errorf("agent binary not found: %w", err)
+	}
+
+	// Build env-wrapped command so the tmux session inherits critical env vars.
+	// tmux starts a new shell that doesn't inherit Go's os.Environ().
+	var envArgs []string
+	for _, key := range []string{"CURSOR_API_KEY", "PATH", "HOME", "TERM"} {
+		if v := os.Getenv(key); v != "" {
+			envArgs = append(envArgs, key+"="+v)
+		}
+	}
+
+	args := append([]string{"env"}, envArgs...)
+	args = append(args, bin, "--force", "--workspace", dir)
+
+	name := fmt.Sprintf("cursor-cli-test-%d", time.Now().UnixNano())
+	unset := []string{"CI"}
+	return NewTmuxSession(name, dir, unset, args[0], args[1:]...)
+}
+
+// acceptTrustDialogIfNeeded checks whether the workspace trust dialog appears
+// and presses "a" to accept it. The dialog only shows on the first launch in
+// a workspace — subsequent sessions in the same directory skip it.
+func (a *CursorCLI) acceptTrustDialogIfNeeded(s *TmuxSession) error {
+	// Race: either the trust dialog or the input prompt will appear first.
+	// Use a short timeout to check for the trust dialog without blocking
+	// too long if the workspace is already trusted.
+	content, err := s.WaitFor(`Trust this workspace|`+a.PromptPattern(), 30*time.Second)
+	if err != nil {
+		return fmt.Errorf("waiting for trust dialog or prompt: %w", err)
+	}
+	if strings.Contains(content, "Trust this workspace") {
+		if err := s.SendKeys("a"); err != nil {
+			return fmt.Errorf("accepting trust dialog: %w", err)
+		}
+	}
+	return nil
+}