From eb7f9b851e83e0bca154358a140e646162600a1d Mon Sep 17 00:00:00 2001 From: Robin Wohlers-Reichel Date: Tue, 3 Mar 2026 11:19:49 +0100 Subject: [PATCH] cursor: e2e tests --- .github/workflows/e2e-isolated.yml | 5 +- .github/workflows/e2e.yml | 5 +- cmd/entire/cli/agent/cursor/AGENT.md | 230 +++++++++++++++++++++++++++ e2e/agents/cursor_cli.go | 190 ++++++++++++++++++++++ 4 files changed, 428 insertions(+), 2 deletions(-) create mode 100644 cmd/entire/cli/agent/cursor/AGENT.md create mode 100644 e2e/agents/cursor_cli.go diff --git a/.github/workflows/e2e-isolated.yml b/.github/workflows/e2e-isolated.yml index 164127857..b92baeb68 100644 --- a/.github/workflows/e2e-isolated.yml +++ b/.github/workflows/e2e-isolated.yml @@ -8,7 +8,7 @@ on: required: true default: "gemini-cli" type: choice - options: [claude-code, opencode, gemini-cli, factoryai-droid] + options: [claude-code, opencode, gemini-cli, cursor-cli, factoryai-droid] test: description: "Test name filter (regex)" required: true @@ -38,6 +38,7 @@ jobs: claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; opencode) curl -fsSL https://opencode.ai/install | bash ;; gemini-cli) npm install -g @google/gemini-cli ;; + cursor-cli) curl https://cursor.com/install -fsS | bash ;; factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; esac echo "$HOME/.local/bin" >> $GITHUB_PATH @@ -46,6 +47,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} run: go run ./e2e/bootstrap @@ -53,6 +55,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts E2E_ENTIRE_BIN: /usr/local/bin/entire diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index ab661b9e8..b84428bce 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - agent: [claude-code, opencode, gemini-cli, factoryai-droid] + agent: [claude-code, opencode, gemini-cli, factoryai-droid, cursor-cli] steps: - name: Checkout repository @@ -36,6 +36,7 @@ jobs: claude-code) curl -fsSL https://claude.ai/install.sh | bash ;; opencode) curl -fsSL https://opencode.ai/install | bash ;; gemini-cli) npm install -g @google/gemini-cli ;; + cursor-cli) curl https://cursor.com/install -fsS | bash ;; factoryai-droid) curl -fsSL https://app.factory.ai/cli | sh ;; esac echo "$HOME/.local/bin" >> $GITHUB_PATH @@ -44,6 +45,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} run: go run ./e2e/bootstrap @@ -51,6 +53,7 @@ jobs: env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }} E2E_CONCURRENT_TEST_LIMIT: ${{ matrix.agent == 'gemini-cli' && '6' || matrix.agent == 'factoryai-droid' && '1' || '' }} run: mise run test:e2e --agent ${{ matrix.agent }} diff --git a/cmd/entire/cli/agent/cursor/AGENT.md b/cmd/entire/cli/agent/cursor/AGENT.md new file mode 100644 index 000000000..9906897ab --- /dev/null +++ b/cmd/entire/cli/agent/cursor/AGENT.md @@ -0,0 +1,230 @@ +# Cursor CLI (`agent`) — Integration One-Pager + +## Verdict: COMPATIBLE + +The `agent` binary supports hooks via `.cursor/hooks.json` and stores JSONL transcripts in a predictable location. The existing Cursor agent implementation in this package already handles both IDE and CLI modes. The CLI fires `sessionStart`, `sessionEnd`, `preToolUse`, and `postToolUse` hooks in headless (`-p`) mode. In interactive mode, `beforeSubmitPrompt` and `stop` also fire. + +**Key difference from IDE:** In `-p` (headless/print) mode, `beforeSubmitPrompt` and `stop` hooks do **not** fire. Only `sessionStart`, `sessionEnd`, and tool-use hooks fire. This means E2E tests using `RunPrompt` (headless) will not get `TurnStart`/`TurnEnd` events — only `SessionStart`/`SessionEnd`. Interactive tmux-based tests get the full lifecycle. + +## Static Checks + +| Check | Result | Notes | +|-------|--------|-------| +| Binary present | PASS | `/Users/robin/.local/bin/agent` | +| Help available | PASS | Full CLI help with subcommands | +| Version info | PASS | `2026.02.13-41ac335` | +| Hook keywords | PASS | `session`, `resume`, `continue` in help | +| Session keywords | PASS | `--resume`, `--continue`, `ls` (list sessions) | +| Config directory | PASS | `~/.cursor/`, `.cursor/` (project-local) | +| Documentation | PASS | https://cursor.com/docs/agent/hooks, https://cursor.com/docs/cli/using | + +## Binary + +- Name: `agent` +- Version: `2026.02.13-41ac335` +- Install: `curl -fsSL https://cursor.com/install-agent | bash` (or via Cursor IDE: install shell integration) +- Also accessible as: `cursor agent` (when Cursor IDE is installed) + +## Hook Mechanism + +- Config file: `.cursor/hooks.json` (project-local) or `~/.cursor/hooks.json` (user-global) +- Config format: JSON +- Hook registration: Array of `{"command": "...", "matcher": "..."}` entries per hook type (matcher is optional, used for tool-use hooks) + +### Hook Names and When They Fire + +| Native Hook Name | When It Fires | Entire EventType | Fires in `-p` mode? | +|-----------------|---------------|-----------------|---------------------| +| `sessionStart` | New conversation created | `SessionStart` | Yes | +| `beforeSubmitPrompt` | After user presses send, before backend request | `TurnStart` | **No** | +| `stop` | Agent loop ends (one turn completes) | `TurnEnd` | **No** | +| `sessionEnd` | Conversation ends | `SessionEnd` | Yes | +| `preCompact` | Before context compaction | `Compaction` | Needs long context | +| `subagentStart` | Before spawning a subagent (Task tool) | `SubagentStart` | Yes (when subagent used) | +| `subagentStop` | Subagent completes | `SubagentEnd` | Yes (when subagent used) | +| `preToolUse` | Before any tool execution | *(not mapped — informational)* | Yes | +| `postToolUse` | After tool execution | *(not mapped — informational)* | Yes | + +### Hook Input (stdin JSON) + +All hooks share these common fields: + +```json +{ + "conversation_id": "uuid", + "generation_id": "uuid", + "model": "gpt-5.2-codex-xhigh-fast", + "hook_event_name": "sessionStart", + "cursor_version": "2026.02.13-41ac335", + "workspace_roots": ["/path/to/repo"], + "user_email": "user@example.com", + "transcript_path": null +} +``` + +**Important:** `transcript_path` is **always `null`** in CLI mode. The existing cursor agent handles this via `resolveTranscriptRef()` which computes the path dynamically from the repo root. + +#### sessionStart additional fields + +```json +{ + "session_id": "uuid", + "is_background_agent": false +} +``` + +Note: IDE also sends `composer_mode: "agent"` — CLI omits this field. + +#### sessionEnd additional fields + +```json +{ + "session_id": "uuid", + "reason": "completed", + "duration_ms": 5505, + "is_background_agent": false, + "final_status": "completed" +} +``` + +#### beforeSubmitPrompt additional fields (interactive mode only) + +```json +{ + "prompt": "user prompt text" +} +``` + +#### stop additional fields (interactive mode only) + +```json +{ + "status": "completed", + "loop_count": 0 +} +``` + +#### subagentStart additional fields + +```json +{ + "subagent_id": "uuid", + "subagent_type": "generalPurpose", + "subagent_model": "model-name", + "task": "task description", + "parent_conversation_id": "uuid", + "tool_call_id": "id", + "is_parallel_worker": false +} +``` + +#### subagentStop additional fields + +```json +{ + "subagent_id": "uuid", + "subagent_type": "generalPurpose", + "status": "completed", + "duration_ms": 5000, + "summary": "result text", + "parent_conversation_id": "uuid", + "message_count": 10, + "tool_call_count": 3, + "modified_files": ["file.txt"], + "loop_count": 1, + "task": "task description", + "description": "...", + "agent_transcript_path": "/path/to/transcript" +} +``` + +#### preToolUse additional fields + +```json +{ + "tool_name": "Write", + "tool_input": {"file_path": "/path", "content": "..."}, + "tool_use_id": "call_xxx\nctc_xxx" +} +``` + +#### postToolUse additional fields + +```json +{ + "tool_name": "Write", + "tool_input": {"file_path": "/path", "content": "..."}, + "tool_output": "{\"success\":true}", + "duration": 36.841, + "tool_use_id": "call_xxx\nctc_xxx" +} +``` + +## Transcript + +- Location: `~/.cursor/projects//agent-transcripts/.jsonl` + - CLI uses flat layout: `/.jsonl` + - IDE uses nested layout: `//.jsonl` + - The existing `ResolveSessionFile()` handles both +- Path sanitization: leading `/` stripped, all non-alphanumeric chars replaced with `-` +- Format: JSONL (one JSON object per line) +- Session ID extraction: `conversation_id` field from hook payload (same value as `session_id`) +- Example entries: + +```jsonl +{"role":"user","message":{"content":[{"type":"text","text":"\ncreate a file\n"}]}} +{"role":"assistant","message":{"content":[{"type":"text","text":"Created the file."}]}} +``` + +- Note: Transcript does NOT contain tool_use blocks — file detection relies on git status +- Override for testing: set `ENTIRE_TEST_CURSOR_PROJECT_DIR` env var to override the transcript directory + +## Config Preservation + +- `.cursor/hooks.json`: Read-modify-write using `map[string]json.RawMessage` to preserve unknown fields +- `~/.cursor/cli-config.json`: User-level config — do not modify (contains auth, permissions, model settings) +- Keys to preserve: `version`, any unknown hook types, user's custom hooks + +## CLI Flags + +- Non-interactive prompt: `agent -p "prompt text" --force --trust --workspace ` + - `-p` / `--print`: Headless mode, prints response to stdout + - `--force` / `--yolo`: Auto-approve all tool use + - `--trust`: Trust workspace without prompting (headless only) + - `--workspace `: Set working directory + - `--model `: Model override (e.g., `sonnet-4`, `gpt-5`) + - `--output-format `: `text` (default), `json`, `stream-json` +- Interactive mode: `agent --force` (launches TUI) + - Prompt pattern for TUI ready: TBD (needs interactive probe) + - `--resume [chatId]`: Resume specific session + - `--continue`: Resume most recent session +- Relevant env vars: + - `CURSOR_API_KEY`: API key for authentication + - `ENTIRE_TEST_CURSOR_PROJECT_DIR`: Override transcript directory (for testing) + - `ENTIRE_TEST_TTY=0`: Disable TTY detection in Entire hooks + +## Gaps & Limitations + +1. **`beforeSubmitPrompt` and `stop` don't fire in `-p` mode**: This is the main limitation. In headless mode, Entire won't get TurnStart/TurnEnd events. Checkpoints can only be created via sessionStart/sessionEnd flow. E2E tests using `RunPrompt` won't trigger the normal TurnStart→TurnEnd checkpoint flow. +2. **`transcript_path` is always `null` in CLI mode**: Handled by existing `resolveTranscriptRef()` which computes the path dynamically. +3. **No `composer_mode` field in CLI**: IDE sends `"agent"`, CLI omits it. Not impactful. +4. **Transcript lacks tool_use blocks**: Modified file detection relies on git status (already handled). +5. **`tool_use_id` format**: Contains newline (`call_xxx\nctc_xxx`) — may need sanitization if used as identifiers. + +## Captured Payloads + +Probe run on 2026-03-02 using `agent -p` in a temp git repo. + +Hooks captured in headless (`-p`) mode: +- `sessionStart` (1 capture) +- `sessionEnd` (1 capture) +- `preToolUse` (2 captures: Read, Write) +- `postToolUse` (1 capture: Write) + +Hooks NOT captured in headless mode: +- `beforeSubmitPrompt` — does not fire in `-p` mode +- `stop` — does not fire in `-p` mode +- `preCompact` — requires long context (not triggered by short prompt) +- `subagentStart/Stop` — requires subagent usage + +See `.entire/tmp/probe-cursor-cli-*/captures/` for raw JSON captures. \ No newline at end of file diff --git a/e2e/agents/cursor_cli.go b/e2e/agents/cursor_cli.go new file mode 100644 index 000000000..5c3e1d039 --- /dev/null +++ b/e2e/agents/cursor_cli.go @@ -0,0 +1,190 @@ +package agents + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "strings" + "time" +) + +func init() { + if env := os.Getenv("E2E_AGENT"); env != "" && env != "cursor-cli" { + return + } + Register(&CursorCLI{}) +} + +// CursorCLI implements the E2E Agent interface for the Cursor Agent CLI binary. +// The CLI binary is called "agent" and uses Cursor's hooks system via +// .cursor/hooks.json. It maps to the same Entire agent as Cursor IDE ("cursor"). +// +// All E2E interactions use interactive (tmux) mode so that the full hook +// lifecycle fires (sessionStart, beforeSubmitPrompt, stop, sessionEnd). +// Headless (-p) mode skips beforeSubmitPrompt and stop hooks. +type CursorCLI struct{} + +func (a *CursorCLI) Name() string { return "cursor-cli" } +func (a *CursorCLI) Binary() string { return "agent" } +func (a *CursorCLI) EntireAgent() string { return "cursor" } +func (a *CursorCLI) TimeoutMultiplier() float64 { return 1.5 } + +// PromptPattern returns a regex matching the Cursor CLI's TUI input prompt. +// The CLI shows a styled input box with placeholder text when ready for input. +func (a *CursorCLI) PromptPattern() string { return `/ commands` } + +func (a *CursorCLI) IsTransientError(out Output, err error) bool { + if err == nil { + return false + } + if errors.Is(err, context.DeadlineExceeded) { + return true + } + combined := out.Stdout + out.Stderr + for _, p := range []string{ + "overloaded", + "rate limit", + "429", + "503", + "529", + "ECONNRESET", + "ETIMEDOUT", + "server error", + "Internal Server Error", + } { + if strings.Contains(combined, p) { + return true + } + } + return false +} + +func (a *CursorCLI) Bootstrap() error { + // The Cursor CLI authenticates via CURSOR_API_KEY env var or OAuth. + // On CI, ensure CURSOR_API_KEY is set. Locally, OAuth/keychain works. + if os.Getenv("CI") != "" && os.Getenv("CURSOR_API_KEY") == "" { + return errors.New("CURSOR_API_KEY must be set on CI for cursor-cli E2E tests") + } + return nil +} + +func (a *CursorCLI) RunPrompt(ctx context.Context, dir string, prompt string, opts ...Option) (Output, error) { + cfg := &runConfig{} + for _, o := range opts { + o(cfg) + } + + timeout := 90 * time.Second + if cfg.PromptTimeout > 0 { + timeout = cfg.PromptTimeout + } + + displayCmd := a.Binary() + " --force --workspace " + dir + " (interactive prompt: " + prompt + ")" + + // Start an interactive tmux session so all hooks fire + // (beforeSubmitPrompt and stop don't fire in headless -p mode). + s, err := a.startInteractiveSession(dir) + if err != nil { + return Output{Command: displayCmd, ExitCode: -1}, + fmt.Errorf("start interactive session: %w", err) + } + defer s.Close() + + // Wait for trust dialog and accept it. + if err := a.acceptTrustDialogIfNeeded(s); err != nil { + return Output{Command: displayCmd, Stdout: s.Capture(), ExitCode: -1}, err + } + + // Wait for the TUI to be ready. + if _, err := s.WaitFor(a.PromptPattern(), 30*time.Second); err != nil { + return Output{Command: displayCmd, Stdout: s.Capture(), ExitCode: -1}, + fmt.Errorf("waiting for startup prompt: %w", err) + } + + // Send the prompt. + if err := s.Send(prompt); err != nil { + return Output{Command: displayCmd, Stdout: s.Capture(), ExitCode: -1}, + fmt.Errorf("sending prompt: %w", err) + } + + // Wait for the prompt pattern to reappear (agent finished processing). + content, waitErr := s.WaitFor(a.PromptPattern(), timeout) + if waitErr != nil { + // Check for deadline exceeded to allow transient error detection. + if ctx.Err() == context.DeadlineExceeded { + waitErr = fmt.Errorf("%w: %w", waitErr, context.DeadlineExceeded) + } + return Output{Command: displayCmd, Stdout: content, ExitCode: -1}, waitErr + } + + return Output{Command: displayCmd, Stdout: content, ExitCode: 0}, nil +} + +func (a *CursorCLI) StartSession(ctx context.Context, dir string) (Session, error) { + s, err := a.startInteractiveSession(dir) + if err != nil { + return nil, err + } + + if err := a.acceptTrustDialogIfNeeded(s); err != nil { + _ = s.Close() + return nil, err + } + + // Wait for the TUI to be ready (input prompt). + if _, err := s.WaitFor(a.PromptPattern(), 30*time.Second); err != nil { + _ = s.Close() + return nil, fmt.Errorf("waiting for startup prompt: %w", err) + } + s.stableAtSend = "" + + return s, nil +} + +// startInteractiveSession creates a new tmux session running the Cursor CLI +// in interactive mode (no -p flag) so all hooks fire. +func (a *CursorCLI) startInteractiveSession(dir string) (*TmuxSession, error) { + // Resolve to absolute path so tmux can find the binary even if its + // shell doesn't inherit the test process's PATH (common on CI). + bin, err := exec.LookPath(a.Binary()) + if err != nil { + return nil, fmt.Errorf("agent binary not found: %w", err) + } + + // Build env-wrapped command so the tmux session inherits critical env vars. + // tmux starts a new shell that doesn't inherit Go's os.Environ(). + var envArgs []string + for _, key := range []string{"CURSOR_API_KEY", "PATH", "HOME", "TERM"} { + if v := os.Getenv(key); v != "" { + envArgs = append(envArgs, key+"="+v) + } + } + + args := append([]string{"env"}, envArgs...) + args = append(args, bin, "--force", "--workspace", dir) + + name := fmt.Sprintf("cursor-cli-test-%d", time.Now().UnixNano()) + unset := []string{"CI"} + return NewTmuxSession(name, dir, unset, args[0], args[1:]...) +} + +// acceptTrustDialogIfNeeded checks whether the workspace trust dialog appears +// and presses "a" to accept it. The dialog only shows on the first launch in +// a workspace — subsequent sessions in the same directory skip it. +func (a *CursorCLI) acceptTrustDialogIfNeeded(s *TmuxSession) error { + // Race: either the trust dialog or the input prompt will appear first. + // Use a short timeout to check for the trust dialog without blocking + // too long if the workspace is already trusted. + content, err := s.WaitFor(`Trust this workspace|`+a.PromptPattern(), 30*time.Second) + if err != nil { + return fmt.Errorf("waiting for trust dialog or prompt: %w", err) + } + if strings.Contains(content, "Trust this workspace") { + if err := s.SendKeys("a"); err != nil { + return fmt.Errorf("accepting trust dialog: %w", err) + } + } + return nil +}