diff --git a/.gitignore b/.gitignore index 28b5b26..5e30fb8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Binary -bin/ +/bin/ +/browser/bin/agent-browser-* cdp # Go diff --git a/.goreleaser.yml b/.goreleaser.yml index e9af4f5..0a7d2e5 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -2,6 +2,10 @@ version: 2 project_name: tap +before: + hooks: + - scripts/prepare-agent-browser.sh + builds: - main: ./cmd/tap binary: tap @@ -14,6 +18,9 @@ builds: goarch: - amd64 - arm64 + ignore: + - goos: windows + goarch: arm64 ldflags: - -s -w -X main.version={{.Version}} diff --git a/AGENTS.md b/AGENTS.md index bea295f..8b78a68 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -2,11 +2,11 @@ ## Project -Go CLI and library for running JS scripts against websites (QuickJS + Chrome CDP fallback) and extracting clean content from URLs via go-defuddle. +Go CLI and library for running JS scripts against websites (QuickJS + agent-browser fallback) and extracting clean content from URLs via go-defuddle. ## Stack -Go 1.26+, urfave/cli v3, QuickJS (fastschema/qjs), chromedp, go-defuddle, mise. +Go 1.26+, urfave/cli v3, QuickJS (fastschema/qjs), agent-browser, go-defuddle, mise. ## Commands @@ -35,8 +35,8 @@ Emoji-prefixed Conventional Commits: `✨ feat:`, `🐛 fix:`, `♻️ refactor: ``` tap.go / options.go → Client API + functional options -transport/ → Shared HTTP + CDP browser layer -browser/ → Persistent sessions, tabs, network interception +transport/ → Shared HTTP + agent-browser bridge +browser/ → agent-browser adapter, binary install, pass-through types engine/ → QuickJS + browser fallback fetch/ → URL → clean content (go-defuddle) cmd/tap/ → CLI (site, fetch, sync, browser) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8c5a57..608d3a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/). ## [Unreleased] +### Changed + +- **Replaced browser backend with embedded agent-browser** — removed ~4,000 lines of hand-rolled chromedp/CDP code and switched to [agent-browser](https://github.com/vercel-labs/agent-browser) as the sole browser backend. Tap embeds the native agent-browser binary for supported release platforms and lets agent-browser manage Chrome/browser installation. +- **Removed chromedp dependency** — `go.mod` no longer depends on `chromedp/chromedp`, `chromedp/cdproto`, or `chromedp/sysutil`. + +### Added + +- **New browser pass-through commands** — `tap browser set` (viewport, device, geo, offline, headers, credentials, media), `tap browser storage` (local/session), `tap browser state` (save/load/list/show/clear), `tap browser auth` (save/login/list/show/delete), `tap browser get` (text/html/value/attr/title/url/count/box/styles), `tap browser vitals`, `tap browser diff` (snapshot/screenshot/url). +- **Global Lightpanda engine flag** — `--lightpanda` / `--lp` selects Lightpanda as the agent-browser engine for browser-backed commands. + ## [0.4.8] - 2026-05-18 ### Added diff --git a/README.md b/README.md index ac823a2..3f8fb04 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Upgrade later with: tap upgrade ``` -Browser features use Chrome by default. Check dependencies with `tap doctor`. +Browser features use the embedded agent-browser backend. Run `tap doctor --install` to let agent-browser install browser dependencies. ## Quick start @@ -54,7 +54,7 @@ tap fetch -b https://github.com/notifications ### Reuse your existing Chrome -Chrome must already expose DevTools. +agent-browser manages Chrome automatically. To attach to an existing Chrome with DevTools enabled: ```bash tap attach chrome @@ -70,7 +70,6 @@ You can also attach explicitly: ```bash tap attach chrome --browser-url http://127.0.0.1:9222 -tap attach chrome --port-file ~/Library/Application\ Support/Google/Chrome/DevToolsActivePort ``` ### Browser workflow @@ -79,7 +78,7 @@ tap attach chrome --port-file ~/Library/Application\ Support/Google/Chrome/DevTo tap browser open https://news.ycombinator.com tap browser open https://github.com --new-tab tap browser tabs -tap browser switch tab-2 +tap browser switch t2 tap browser screenshot --output github.png tap browser status ``` @@ -152,9 +151,9 @@ These show up on the relevant commands instead of only in global help: | `--wait-selector` | Wait for a CSS selector | | `--wait-js` | Wait for a JS expression | | `--timeout` | Set execution timeout | -| `--browser-url` | One-shot DevTools override | +| `--browser-url` | One-shot agent-browser/DevTools connection override | | `--profile-dir` | One-shot profile override | -| `--lightpanda`, `--lp` | Use Lightpanda instead of Chrome | +| `--lightpanda`, `--lp` | Use Lightpanda as the browser engine | Compatibility aliases still work: - `--ws-url` -> `--browser-url` @@ -171,16 +170,18 @@ tap browser snapshot tap browser forms tap browser cookies ... tap browser network ... +tap browser set ... +tap browser storage ... +tap browser state ... +tap browser auth ... +tap browser get ... +tap browser vitals +tap browser diff ... ``` -## Lightpanda +## Browser backend -| Backend | Platforms | Best for | -| --- | --- | --- | -| Chrome | macOS, Linux, Windows | Full browser automation, auth, network interception | -| Lightpanda | macOS, Linux | Fast headless rendering without auth-heavy flows | - -Install or update Lightpanda with: +Tap embeds agent-browser as the single browser backend. Chrome is the default engine; pass `--lightpanda`/`--lp` to use Lightpanda for fast browser-backed rendering. agent-browser manages browser installation for full automation, auth, screenshots, and network workflows. Install browser dependencies with: ```bash tap doctor --install diff --git a/browser/agentbrowser.go b/browser/agentbrowser.go new file mode 100644 index 0000000..93e8f39 --- /dev/null +++ b/browser/agentbrowser.go @@ -0,0 +1,140 @@ +package browser + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os/exec" + "slices" + "strings" +) + +const DefaultAgentBrowserSession = "default" + +type AgentBrowser struct { + Path string + SessionName string + ProfileDir string + Headed bool + Attached bool + Engine string +} + +type OpenOpts struct { + Headed bool + Headers map[string]string + InitScript string +} + +type ExecResult struct { + Stdout json.RawMessage + Stderr string +} + +func NewAgentBrowser(path string) (*AgentBrowser, error) { + if path == "" { + resolved, err := ResolveAgentBrowserPath() + if err != nil { + return nil, err + } + path = resolved + } + return &AgentBrowser{Path: path, SessionName: DefaultAgentBrowserSession}, nil +} + +func (a *AgentBrowser) Exec(ctx context.Context, args ...string) (json.RawMessage, string, error) { + return a.exec(ctx, nil, args...) +} + +func (a *AgentBrowser) exec(ctx context.Context, stdin []byte, args ...string) (json.RawMessage, string, error) { + cmdArgs := a.commandArgs(args...) + cmd := exec.CommandContext(ctx, a.Path, cmdArgs...) + if stdin != nil { + cmd.Stdin = bytes.NewReader(stdin) + } + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return nil, stderr.String(), fmt.Errorf("agent-browser %s: %w: %s", strings.Join(cmdArgs, " "), err, strings.TrimSpace(stderr.String())) + } + return json.RawMessage(bytes.TrimSpace(stdout.Bytes())), stderr.String(), nil +} + +func (a *AgentBrowser) commandArgs(args ...string) []string { + out := make([]string, 0, len(args)+8) + out = append(out, args...) + if !slices.Contains(out, "--json") { + out = append(out, "--json") + } + if !a.Attached && a.SessionName != "" && !hasFlag(out, "--session-name") { + out = append(out, "--session-name", a.SessionName) + } + if a.ProfileDir != "" && !hasFlag(out, "--profile") { + out = append(out, "--profile", a.ProfileDir) + } + if a.Headed && !slices.Contains(out, "--headed") { + out = append(out, "--headed") + } + if a.Engine != "" && !hasFlag(out, "--engine") { + out = append(out, "--engine", a.Engine) + } + return out +} + +func hasFlag(args []string, flag string) bool { + for _, arg := range args { + if arg == flag || strings.HasPrefix(arg, flag+"=") { + return true + } + } + return false +} + +func (a *AgentBrowser) Open(ctx context.Context, url string, opts OpenOpts) error { + args := []string{"open", url} + if opts.Headed { + args = append(args, "--headed") + } + if opts.InitScript != "" { + args = append(args, "--init-script", opts.InitScript) + } + for name, value := range opts.Headers { + args = append(args, "--headers", name+": "+value) + } + _, _, err := a.Exec(ctx, args...) + return err +} + +func (a *AgentBrowser) Eval(ctx context.Context, js string) (any, error) { + out, stderr, err := a.exec(ctx, []byte(js), "eval", "--stdin") + if err != nil { + return nil, err + } + var envelope AgentBrowserEnvelope[map[string]any] + if err := json.Unmarshal(out, &envelope); err == nil && envelope.Success { + return envelope.Data["result"], nil + } + var value any + if err := json.Unmarshal(out, &value); err != nil { + return nil, fmt.Errorf("parse eval JSON: %w: %s", err, stderr) + } + return value, nil +} + +func (a *AgentBrowser) GetHTML(ctx context.Context) (string, error) { + value, err := a.Eval(ctx, "document.documentElement.outerHTML") + if err != nil { + return "", err + } + if s, ok := value.(string); ok { + return s, nil + } + return "", fmt.Errorf("agent-browser html result is %T, not string", value) +} + +func (a *AgentBrowser) Close(ctx context.Context) error { + _, _, err := a.Exec(ctx, "close") + return err +} diff --git a/browser/agentbrowser_test.go b/browser/agentbrowser_test.go new file mode 100644 index 0000000..ec7179a --- /dev/null +++ b/browser/agentbrowser_test.go @@ -0,0 +1,55 @@ +package browser + +import ( + "slices" + "testing" +) + +func TestAgentBrowserCommandArgs(t *testing.T) { + ab := &AgentBrowser{Path: "agent-browser", SessionName: "dev", ProfileDir: "/tmp/profile", Headed: true} + args := ab.commandArgs("open", "https://example.com") + for _, want := range []string{"open", "https://example.com", "--json", "--session-name", "dev", "--profile", "/tmp/profile", "--headed"} { + if !slices.Contains(args, want) { + t.Fatalf("args %v missing %q", args, want) + } + } +} + +func TestAgentBrowserCommandArgsIncludesEngine(t *testing.T) { + ab := &AgentBrowser{Path: "agent-browser", SessionName: "dev", Engine: "lightpanda"} + args := ab.commandArgs("open", "https://example.com") + for _, want := range []string{"--engine", "lightpanda"} { + if !slices.Contains(args, want) { + t.Fatalf("args %v missing %q", args, want) + } + } +} + +func TestAgentBrowserCommandArgsAttachedSkipsSession(t *testing.T) { + ab := &AgentBrowser{Path: "agent-browser", SessionName: "dev", Attached: true} + args := ab.commandArgs("get", "url") + if slices.Contains(args, "--session-name") { + t.Fatalf("attached args included session name: %v", args) + } +} + +func TestAgentBrowserCommandArgsPreservesExplicitJSONAndSession(t *testing.T) { + ab := &AgentBrowser{Path: "agent-browser", SessionName: "dev"} + args := ab.commandArgs("session", "--json", "--session-name", "other") + if got := count(args, "--json"); got != 1 { + t.Fatalf("--json count = %d, want 1 in %v", got, args) + } + if got := count(args, "--session-name"); got != 1 { + t.Fatalf("--session-name count = %d, want 1 in %v", got, args) + } +} + +func count(values []string, target string) int { + var n int + for _, value := range values { + if value == target { + n++ + } + } + return n +} diff --git a/browser/cdp.go b/browser/cdp.go deleted file mode 100644 index 08dcb22..0000000 --- a/browser/cdp.go +++ /dev/null @@ -1,752 +0,0 @@ -package browser - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "time" - - "github.com/chromedp/cdproto/cdp" - "github.com/chromedp/cdproto/dom" - "github.com/chromedp/cdproto/input" - "github.com/chromedp/cdproto/network" - "github.com/chromedp/cdproto/page" - "github.com/chromedp/cdproto/runtime" - "github.com/chromedp/cdproto/target" - "github.com/chromedp/chromedp" -) - -const TargetTypePage = "page" - -// TargetInfo holds metadata about a CDP target (browser tab). -type TargetInfo struct { - TargetID string - Title string - URL string - Type string -} - -// ListTargets enumerates page targets in a browser reachable at debugURL. -func ListTargets(ctx context.Context, debugURL string) ([]TargetInfo, error) { - bctx, cancel := withBrowser(ctx, debugURL) - defer cancel() - - var out []TargetInfo - err := chromedp.Run(bctx, chromedp.ActionFunc(func(ctx context.Context) error { - infos, err := target.GetTargets().Do(ctx) - if err != nil { - return err - } - for _, ti := range infos { - if ti.Type != TargetTypePage { - continue - } - out = append(out, TargetInfo{ - TargetID: string(ti.TargetID), - Title: ti.Title, - URL: ti.URL, - Type: ti.Type, - }) - } - return nil - })) - if err != nil { - return nil, fmt.Errorf("list targets: %w", err) - } - return out, nil -} - -// ListTargetsHTTP enumerates page targets via the HTTP /json/list endpoint. -// This is more compatible with Electron apps and CEF-based browsers that may -// not support the Target.getTargets CDP command over the browser WebSocket. -func ListTargetsHTTP(ctx context.Context, debugURL string) ([]TargetInfo, error) { - httpBase, err := debugURLToHTTP(debugURL) - if err != nil { - return nil, fmt.Errorf("list targets: %w", err) - } - req, err := http.NewRequestWithContext(ctx, http.MethodGet, httpBase+"/json/list", nil) - if err != nil { - return nil, fmt.Errorf("list targets: build request: %w", err) - } - client := &http.Client{Timeout: 10 * time.Second} - resp, err := client.Do(req) - if err != nil { - return nil, fmt.Errorf("list targets: %w", err) - } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() - - var raw []struct { - ID string `json:"id"` - Title string `json:"title"` - URL string `json:"url"` - Type string `json:"type"` - } - if err := json.NewDecoder(resp.Body).Decode(&raw); err != nil { - return nil, fmt.Errorf("list targets: parse response: %w", err) - } - - out := make([]TargetInfo, 0, len(raw)) - for _, t := range raw { - if t.Type != TargetTypePage { - continue - } - out = append(out, TargetInfo{ - TargetID: t.ID, - Title: t.Title, - URL: t.URL, - Type: t.Type, - }) - } - return out, nil -} - -// CreateTarget creates a new browser tab navigated to url and returns its target ID. -func CreateTarget(ctx context.Context, debugURL string, url string) (string, error) { - bctx, cancel := withBrowser(ctx, debugURL) - defer cancel() - - var id target.ID - err := chromedp.Run(bctx, chromedp.ActionFunc(func(ctx context.Context) error { - var err error - id, err = target.CreateTarget(url).WithBackground(true).Do(ctx) - return err - })) - if err != nil { - return "", fmt.Errorf("create target: %w", err) - } - return string(id), nil -} - -// CloseTarget closes the browser tab identified by targetID. -func CloseTarget(ctx context.Context, debugURL string, targetID string) error { - bctx, cancel := withBrowser(ctx, debugURL) - defer cancel() - - // Use the browser-level executor because chromedp's tab-level Target.Execute - // intercepts and rejects CloseTarget commands. - return chromedp.Run(bctx, chromedp.ActionFunc(func(ctx context.Context) error { - // Use bctx (the outer chromedp context) to reach the Browser executor. - // The inner ctx from ActionFunc is bound to the tab-level Target executor - // which intercepts CloseTarget, so we need the browser-level one. - c := chromedp.FromContext(bctx) - if c == nil || c.Browser == nil { - return fmt.Errorf("close target: no browser connection") - } - browserCtx := cdp.WithExecutor(ctx, c.Browser) - if err := target.CloseTarget(target.ID(targetID)).Do(browserCtx); err != nil { - return fmt.Errorf("close target: %w", err) - } - return nil - })) -} - -// NavigateTarget navigates an existing browser tab to url and waits for the body to be ready. -func NavigateTarget(ctx context.Context, debugURL string, targetID string, url string) error { - if err := withTarget(ctx, debugURL, targetID, - chromedp.Navigate(url), - chromedp.WaitReady("body"), - ); err != nil { - return fmt.Errorf("navigate target: %w", err) - } - return nil -} - -// EvalTarget evaluates JavaScript in the context of an existing browser tab -// and returns the result. -func EvalTarget(ctx context.Context, debugURL string, targetID string, js string) (any, error) { - var result any - err := withTarget(ctx, debugURL, targetID, - chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithReturnByValue(true).WithAwaitPromise(true) - }), - ) - if err != nil { - return nil, fmt.Errorf("eval target: %w", err) - } - return result, nil -} - -// ScreenshotTarget captures a full-page screenshot of an existing browser tab -// and returns the PNG bytes. -func ScreenshotTarget(ctx context.Context, debugURL string, targetID string) ([]byte, error) { - var buf []byte - err := withTarget(ctx, debugURL, targetID, - chromedp.FullScreenshot(&buf, 90), - ) - if err != nil { - return nil, fmt.Errorf("screenshot target: %w", err) - } - return buf, nil -} - -// FormField describes a fillable form element on the page. -type FormField struct { - Selector string `json:"selector"` - Tag string `json:"tag"` - Type string `json:"type,omitempty"` - Name string `json:"name,omitempty"` - ID string `json:"id,omitempty"` - Placeholder string `json:"placeholder,omitempty"` - Value string `json:"value,omitempty"` - Label string `json:"label,omitempty"` - Required bool `json:"required,omitempty"` - Disabled bool `json:"disabled,omitempty"` - Role string `json:"role,omitempty"` -} - -// FillField pairs a CSS selector with the value to fill. -type FillField struct { - Selector string - Value string -} - -// FormsTarget discovers fillable form elements in a browser tab. -func FormsTarget(ctx context.Context, debugURL string, targetID string) ([]FormField, error) { - js := ` -(() => { - const els = document.querySelectorAll('input, textarea, select, button[type="submit"], button:not([type]), input[type="submit"]'); - return [...els].map(el => { - const tag = el.tagName.toLowerCase(); - let selector = ""; - if (el.id) { - selector = "#" + CSS.escape(el.id); - } else if (el.name) { - selector = tag + "[name=" + JSON.stringify(el.name) + "]"; - // Disambiguate when multiple elements share the same name (radio groups, checkboxes) - const dupes = document.querySelectorAll(selector); - if (dupes.length > 1) { - if (el.value) { - selector += "[value=" + JSON.stringify(el.value) + "]"; - } else { - const idx = [...dupes].indexOf(el); - if (idx > 0) { - // nth-of-type won't work here; use a parent-scoped index - const parent = el.parentElement; - if (parent) { - const siblings = [...parent.querySelectorAll(selector)]; - const sIdx = siblings.indexOf(el); - if (sIdx >= 0) { - let parentSel = parent.id ? "#" + CSS.escape(parent.id) : parent.tagName.toLowerCase(); - selector = parentSel + " " + selector + ":nth-of-type(" + (sIdx + 1) + ")"; - } - } - } - } - } - } else if (el.placeholder) { - selector = tag + "[placeholder=" + JSON.stringify(el.placeholder) + "]"; - } else if (el.type === "submit" || (tag === "button" && !el.type)) { - const parent = el.parentElement; - if (parent) { - const siblings = [...parent.children].filter(c => c.tagName.toLowerCase() === tag); - const idx = siblings.indexOf(el); - if (idx >= 0) { - // Build a unique path: parent selector + child nth-of-type - let parentSel = ""; - if (parent.id) { - parentSel = "#" + CSS.escape(parent.id); - } else { - parentSel = parent.tagName.toLowerCase(); - const gp = parent.parentElement; - if (gp) { - const pSiblings = [...gp.children].filter(c => c.tagName.toLowerCase() === parent.tagName.toLowerCase()); - const pIdx = pSiblings.indexOf(parent); - if (pIdx >= 0) parentSel += ":nth-of-type(" + (pIdx + 1) + ")"; - } - } - selector = parentSel + " > " + tag + ":nth-of-type(" + (idx + 1) + ")"; - } - } - } - - let label = ""; - if (el.id) { - const lbl = document.querySelector("label[for=" + JSON.stringify(el.id) + "]"); - if (lbl) label = lbl.textContent.trim(); - } - if (!label && el.closest("label")) { - label = el.closest("label").textContent.trim(); - } - if (!label && el.getAttribute("aria-label")) { - label = el.getAttribute("aria-label"); - } - - let role = ""; - if (tag === "button" || el.type === "submit") role = "submit"; - else if (tag === "select") role = "select"; - else if (tag === "textarea") role = "text"; - else if (["text","email","password","search","tel","url","number"].includes(el.type)) role = "text"; - else if (["checkbox","radio"].includes(el.type)) role = "toggle"; - else if (el.type === "hidden") role = "hidden"; - else if (el.type === "file") role = "file"; - - return { - selector: selector, - tag: tag, - type: el.type || "", - name: el.name || "", - id: el.id || "", - placeholder: el.placeholder || "", - value: tag === "select" ? el.value || "" : el.value || "", - label: label, - required: el.required || false, - disabled: el.disabled || false, - role: role, - }; - }).filter(f => f.role !== "hidden" && f.selector !== ""); -})() -` - var fields []FormField - err := withTarget(ctx, debugURL, targetID, - chromedp.Evaluate(js, &fields, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithReturnByValue(true).WithAwaitPromise(true) - }), - ) - if err != nil { - return nil, fmt.Errorf("forms target: %w", err) - } - return fields, nil -} - -// FillTarget fills form fields in a browser tab using React-compatible value setting. -func FillTarget(ctx context.Context, debugURL string, targetID string, fields []FillField, submitSelector string) error { - var actions []chromedp.Action - - for _, f := range fields { - sel := f.Selector - val := f.Value - // Use a JS snippet that sets value via native setter and dispatches events. - // This works with React, Vue, Angular, and vanilla HTML forms. - js := fmt.Sprintf(` -(() => { - const el = document.querySelector(%q); - if (!el) throw new Error("element not found: " + %q); - el.focus(); - const tag = el.tagName.toLowerCase(); - if (tag === "select") { - el.value = %q; - el.dispatchEvent(new Event("change", { bubbles: true })); - } else if (el.type === "checkbox" || el.type === "radio") { - const want = %q; - if (want === "true" || want === "1" || want === "on") { - if (!el.checked) el.click(); - } else { - if (el.checked) el.click(); - } - } else { - const setter = (tag === "textarea") - ? Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, "value")?.set - : Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, "value")?.set; - if (setter) { - setter.call(el, %q); - } else { - el.value = %q; - } - el.dispatchEvent(new Event("input", { bubbles: true })); - el.dispatchEvent(new Event("change", { bubbles: true })); - } - return true; -})() -`, sel, sel, val, val, val, val) - - var ok bool - actions = append(actions, chromedp.Evaluate(js, &ok, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithReturnByValue(true).WithAwaitPromise(true) - })) - } - - if submitSelector != "" { - submitJS := fmt.Sprintf(` -(() => { - const el = document.querySelector(%q); - if (!el) throw new Error("submit element not found: " + %q); - el.click(); - return true; -})() -`, submitSelector, submitSelector) - var ok bool - actions = append(actions, chromedp.Evaluate(submitJS, &ok, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithReturnByValue(true).WithAwaitPromise(true) - })) - } - - if err := withTarget(ctx, debugURL, targetID, actions...); err != nil { - return fmt.Errorf("fill target: %w", err) - } - return nil -} - -// GetHTMLTarget returns the outerHTML of the element matching sel, or the full -// page HTML if sel is empty. -func GetHTMLTarget(ctx context.Context, debugURL string, targetID string, sel string) (string, string, error) { - if sel == "" { - sel = "html" - } - js := fmt.Sprintf(` -(() => { - const el = document.querySelector(%q); - if (!el) return {html: "", url: location.href}; - return {html: el.outerHTML, url: location.href}; -})() -`, sel) - var result struct { - HTML string `json:"html"` - URL string `json:"url"` - } - err := withTarget(ctx, debugURL, targetID, - chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithReturnByValue(true).WithAwaitPromise(true) - }), - ) - if err != nil { - return "", "", fmt.Errorf("get html target: %w", err) - } - return result.HTML, result.URL, nil -} - -// KeypressTarget sends key events to the page (not a specific element). -// The keys string uses chromedp/kb constants: "\r" for Enter, "\t" for Tab, -// "\u001b" for Escape, etc. Regular characters are sent as-is. -func KeypressTarget(ctx context.Context, debugURL string, targetID string, keys string) error { - if err := withTarget(ctx, debugURL, targetID, - chromedp.KeyEvent(keys), - ); err != nil { - return fmt.Errorf("keypress target: %w", err) - } - return nil -} - -// DialogTarget accepts or dismisses a pending JavaScript dialog (alert/confirm/prompt). -// For prompt dialogs, promptText is entered before accepting. -func DialogTarget(ctx context.Context, debugURL string, targetID string, accept bool, promptText string) error { - err := withTarget(ctx, debugURL, targetID, - chromedp.ActionFunc(func(ctx context.Context) error { - p := page.HandleJavaScriptDialog(accept) - if promptText != "" { - p = p.WithPromptText(promptText) - } - return p.Do(ctx) - }), - ) - if err != nil { - return fmt.Errorf("dialog target: %w", err) - } - return nil -} - -// CookieEntry represents a browser cookie for JSON output. -type CookieEntry struct { - Name string `json:"name"` - Value string `json:"value"` - Domain string `json:"domain"` - Path string `json:"path"` - Expires float64 `json:"expires"` - HTTPOnly bool `json:"httpOnly"` - Secure bool `json:"secure"` - Session bool `json:"session"` - SameSite string `json:"sameSite,omitempty"` -} - -// GetCookiesTarget returns all cookies for the current page. -func GetCookiesTarget(ctx context.Context, debugURL string, targetID string) ([]CookieEntry, error) { - var cookies []CookieEntry - err := withTarget(ctx, debugURL, targetID, - chromedp.ActionFunc(func(ctx context.Context) error { - raw, err := network.GetCookies().Do(ctx) - if err != nil { - return err - } - for _, c := range raw { - cookies = append(cookies, CookieEntry{ - Name: c.Name, - Value: c.Value, - Domain: c.Domain, - Path: c.Path, - Expires: c.Expires, - HTTPOnly: c.HTTPOnly, - Secure: c.Secure, - Session: c.Session, - SameSite: string(c.SameSite), - }) - } - return nil - }), - ) - if err != nil { - return nil, fmt.Errorf("get cookies target: %w", err) - } - return cookies, nil -} - -// SetCookieTarget sets a cookie on the page. -func SetCookieTarget(ctx context.Context, debugURL string, targetID string, name, value, domain, path string) error { - err := withTarget(ctx, debugURL, targetID, - chromedp.ActionFunc(func(ctx context.Context) error { - p := network.SetCookie(name, value) - if domain != "" { - p = p.WithDomain(domain) - } - if path != "" { - p = p.WithPath(path) - } - return p.Do(ctx) - }), - ) - if err != nil { - return fmt.Errorf("set cookie target: %w", err) - } - return nil -} - -// ClearCookiesTarget deletes all cookies for the current page. -func ClearCookiesTarget(ctx context.Context, debugURL string, targetID string) error { - err := withTarget(ctx, debugURL, targetID, - chromedp.ActionFunc(func(ctx context.Context) error { - raw, err := network.GetCookies().Do(ctx) - if err != nil { - return err - } - for _, c := range raw { - if err := network.DeleteCookies(c.Name).WithDomain(c.Domain).WithPath(c.Path).Do(ctx); err != nil { - return fmt.Errorf("delete cookie %q: %w", c.Name, err) - } - } - return nil - }), - ) - if err != nil { - return fmt.Errorf("clear cookies target: %w", err) - } - return nil -} - -// PDFTarget saves the current page as PDF and returns the bytes. -func PDFTarget(ctx context.Context, debugURL string, targetID string, landscape bool, printBackground bool, scale float64) ([]byte, error) { - var buf []byte - err := withTarget(ctx, debugURL, targetID, - chromedp.ActionFunc(func(ctx context.Context) error { - params := page.PrintToPDF(). - WithLandscape(landscape). - WithPrintBackground(printBackground) - if scale > 0 { - params = params.WithScale(scale) - } - data, _, err := params.Do(ctx) - if err != nil { - return err - } - buf = data - return nil - }), - ) - if err != nil { - return nil, fmt.Errorf("pdf target: %w", err) - } - return buf, nil -} - -// ClickTarget dispatches a real mouse click on the first element matching sel. -// chromedp.Click sends the full mouseMoved → mousePressed → mouseReleased sequence. -func ClickTarget(ctx context.Context, debugURL string, targetID string, sel string) error { - if err := withTarget(ctx, debugURL, targetID, - chromedp.Click(sel, chromedp.ByQuery, chromedp.NodeVisible), - ); err != nil { - return fmt.Errorf("click target: %w", err) - } - return nil -} - -// TypeTarget sends individual key events to the element matching sel. -// Unlike FillTarget which sets .value directly, this dispatches keyDown/keyUp -// per character — behaving like a real user typing. -func TypeTarget(ctx context.Context, debugURL string, targetID string, sel string, text string) error { - if err := withTarget(ctx, debugURL, targetID, - chromedp.Click(sel, chromedp.ByQuery, chromedp.NodeVisible), - chromedp.SendKeys(sel, text, chromedp.ByQuery), - ); err != nil { - return fmt.Errorf("type target: %w", err) - } - return nil -} - -// HoverTarget moves the mouse to the center of the element matching sel, -// dispatching mouseMoved events that trigger CSS :hover states and mouseenter listeners. -func HoverTarget(ctx context.Context, debugURL string, targetID string, sel string) error { - var nodes []*cdp.Node - if err := withTarget(ctx, debugURL, targetID, - chromedp.Nodes(sel, &nodes, chromedp.ByQuery, chromedp.NodeVisible), - chromedp.ActionFunc(func(ctx context.Context) error { - if len(nodes) == 0 { - return fmt.Errorf("no element matching %q", sel) - } - box, err := dom.GetBoxModel().WithNodeID(nodes[0].NodeID).Do(ctx) - if err != nil { - return fmt.Errorf("get box model: %w", err) - } - c := box.Content - x := (c[0] + c[2] + c[4] + c[6]) / 4 - y := (c[1] + c[3] + c[5] + c[7]) / 4 - return chromedp.MouseEvent(input.MouseMoved, x, y).Do(ctx) - }), - ); err != nil { - return fmt.Errorf("hover target: %w", err) - } - return nil -} - -// ScrollTarget scrolls the element matching sel into view. If sel is empty, -// scrolls to the given x,y pixel coordinates. -func ScrollTarget(ctx context.Context, debugURL string, targetID string, sel string, x, y float64) error { - var actions []chromedp.Action - if sel != "" { - actions = append(actions, chromedp.ScrollIntoView(sel, chromedp.ByQuery)) - } else { - js := fmt.Sprintf("window.scrollTo(%f, %f)", x, y) - var ignore any - actions = append(actions, chromedp.Evaluate(js, &ignore)) - } - if err := withTarget(ctx, debugURL, targetID, actions...); err != nil { - return fmt.Errorf("scroll target: %w", err) - } - return nil -} - -// SelectTarget selects an option in a "); - el.focus(); - el.value = %q; - el.dispatchEvent(new Event("input", { bubbles: true })); - el.dispatchEvent(new Event("change", { bubbles: true })); - return true; -})() -`, sel, sel, value) - var ok bool - if err := withTarget(ctx, debugURL, targetID, - chromedp.Evaluate(js, &ok, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { - return p.WithReturnByValue(true).WithAwaitPromise(true) - }), - ); err != nil { - return fmt.Errorf("select target: %w", err) - } - return nil -} - -// WaitForTarget waits until the element matching sel is visible in the tab. -// The caller controls the deadline via ctx. -func WaitForTarget(ctx context.Context, debugURL string, targetID string, sel string, timeout time.Duration) error { - waitCtx := ctx - if timeout > 0 { - var cancel context.CancelFunc - waitCtx, cancel = context.WithTimeout(ctx, timeout) - defer cancel() - } - if err := withTarget(waitCtx, debugURL, targetID, - chromedp.WaitVisible(sel, chromedp.ByQuery), - ); err != nil { - return fmt.Errorf("wait for target: %w", err) - } - return nil -} - -// BackTarget navigates the tab backwards in history. -func BackTarget(ctx context.Context, debugURL string, targetID string) error { - if err := withTarget(ctx, debugURL, targetID, - chromedp.NavigateBack(), - ); err != nil { - return fmt.Errorf("back target: %w", err) - } - return nil -} - -// ForwardTarget navigates the tab forwards in history. -func ForwardTarget(ctx context.Context, debugURL string, targetID string) error { - if err := withTarget(ctx, debugURL, targetID, - chromedp.NavigateForward(), - ); err != nil { - return fmt.Errorf("forward target: %w", err) - } - return nil -} - -// ReloadTarget reloads the current page in the tab. -func ReloadTarget(ctx context.Context, debugURL string, targetID string) error { - if err := withTarget(ctx, debugURL, targetID, - chromedp.Reload(), - ); err != nil { - return fmt.Errorf("reload target: %w", err) - } - return nil -} - -// withTargetListen connects to debugURL, attaches to the specific target, and -// returns a long-lived context suitable for event listening. Unlike withTarget, -// it does NOT run actions or detach automatically — the caller controls the -// session lifetime. -// -// Usage: -// 1. Call withTargetListen to get taskCtx. -// 2. Register event listeners with chromedp.ListenTarget(taskCtx, ...). -// 3. Enable the domain with chromedp.Run(taskCtx, ...). -// 4. Wait/process events. -// 5. Call cancel — clears TargetID first (detach-without-close). -func withTargetListen(ctx context.Context, debugURL string, targetID string) (context.Context, func(), error) { - allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, debugURL, chromedp.NoModifyURL) - taskCtx, taskCancel := chromedp.NewContext(allocCtx, chromedp.WithTargetID(target.ID(targetID))) - - // Run an empty action to force the CDP session to attach to the target. - if err := chromedp.Run(taskCtx); err != nil { - taskCancel() - allocCancel() - return nil, nil, fmt.Errorf("attach target for listen: %w", err) - } - - cancel := func() { - // Clear TargetID BEFORE cancel so chromedp's cancel handler does not - // close the tab. Same detach-without-close trick as withTarget. - if c := chromedp.FromContext(taskCtx); c != nil && c.Target != nil { - c.Target.TargetID = "" - } - taskCancel() - allocCancel() - } - - return taskCtx, cancel, nil -} - -// withBrowser connects to debugURL at the browser level and returns contexts for CDP commands. -func withBrowser(ctx context.Context, debugURL string) (context.Context, context.CancelFunc) { - allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, debugURL, chromedp.NoModifyURL) - taskCtx, taskCancel := chromedp.NewContext(allocCtx) - return taskCtx, func() { taskCancel(); allocCancel() } -} - -// withTarget connects to debugURL, attaches to the specific target, runs the actions, and cleans up. -// It detaches from the target without closing it so the tab survives across calls. -func withTarget(ctx context.Context, debugURL string, targetID string, actions ...chromedp.Action) error { - allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, debugURL, chromedp.NoModifyURL) - - taskCtx, taskCancel := chromedp.NewContext(allocCtx, chromedp.WithTargetID(target.ID(targetID))) - - err := chromedp.Run(taskCtx, actions...) - - // Clear TargetID BEFORE cancel so chromedp's cancel handler does not - // close the tab. We attach to an existing tab we don't own. - if c := chromedp.FromContext(taskCtx); c != nil && c.Target != nil { - c.Target.TargetID = "" - } - taskCancel() - allocCancel() - - return err -} diff --git a/browser/cdp_snapshot.go b/browser/cdp_snapshot.go deleted file mode 100644 index f725d5e..0000000 --- a/browser/cdp_snapshot.go +++ /dev/null @@ -1,272 +0,0 @@ -package browser - -import ( - "context" - "encoding/json" - "fmt" - "strings" - - "github.com/chromedp/cdproto/accessibility" - "github.com/chromedp/cdproto/cdp" - "github.com/chromedp/cdproto/dom" - "github.com/chromedp/cdproto/input" - "github.com/chromedp/cdproto/runtime" - "github.com/chromedp/chromedp" -) - -// SnapshotTarget builds an AX-powered semantic snapshot for a target tab. -func SnapshotTarget(ctx context.Context, debugURL, targetID string, opts SnapshotOptions) (*SnapshotResult, error) { - if opts.Mode == "" { - opts.Mode = "auto" - } - - out := &SnapshotResult{Mode: "ax"} - err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { - if err := accessibility.Enable().Do(ctx); err != nil { - return fmt.Errorf("enable accessibility: %w", err) - } - defer func() { _ = accessibility.Disable().Do(ctx) }() - - var nodes []*accessibility.Node - var err error - if opts.Depth > 0 { - nodes, err = accessibility.GetFullAXTree().WithDepth(opts.Depth).Do(ctx) - } else { - nodes, err = accessibility.GetFullAXTree().Do(ctx) - } - if err != nil { - return fmt.Errorf("get full ax tree: %w", err) - } - meta, err := EvalTarget(ctx, debugURL, targetID, `({ - url: location.href, - title: document.title || "", - key: [location.href, performance.timeOrigin || 0, document.body ? document.body.childElementCount : 0].join("|") -})`) - if err != nil { - return fmt.Errorf("snapshot metadata: %w", err) - } - if m, ok := meta.(map[string]any); ok { - if v, ok := m["url"].(string); ok { - out.URL = v - } - if v, ok := m["title"].(string); ok { - out.Title = v - } - if v, ok := m["key"].(string); ok { - out.DocumentKey = v - } - } - buildSnapshotTree(out, nodes, opts) - return nil - })) - if err != nil { - return nil, err - } - return out, nil -} - -func buildSnapshotTree(out *SnapshotResult, axNodes []*accessibility.Node, opts SnapshotOptions) { - idToNode := make(map[accessibility.NodeID]*accessibility.Node, len(axNodes)) - for _, n := range axNodes { - idToNode[n.NodeID] = n - } - - interactiveRoles := map[string]bool{ - "button": true, "link": true, "textbox": true, "searchbox": true, "checkbox": true, - "radio": true, "combobox": true, "option": true, "menuitem": true, "tab": true, - } - structuralRoles := map[string]bool{ - "rootwebarea": true, "document": true, "main": true, "article": true, - "heading": true, "list": true, "listitem": true, "dialog": true, - } - - axToIndex := make(map[accessibility.NodeID]int, len(axNodes)) - nextRef := 1 - for _, n := range axNodes { - role := normalizeAXValue(n.Role) - name := normalizeAXValue(n.Name) - if n.Ignored && !interactiveRoles[role] { - continue - } - keep := structuralRoles[role] || interactiveRoles[role] || name != "" - if opts.InteractiveOnly && !interactiveRoles[role] { - keep = false - } - if !keep { - continue - } - - node := SnapshotNode{ - Role: role, - Name: name, - Value: normalizeAXValue(n.Value), - Description: normalizeAXValue(n.Description), - States: axStates(n.Properties), - } - if interactiveRoles[role] { - node.Ref = fmt.Sprintf("@e%d", nextRef) - nextRef++ - out.Refs = append(out.Refs, SnapshotRef{ - Ref: node.Ref, - BackendDOMNodeID: int64(n.BackendDOMNodeID), - AXNodeID: string(n.NodeID), - FrameID: string(n.FrameID), - Role: role, - Name: name, - }) - } - axToIndex[n.NodeID] = len(out.Nodes) - out.Nodes = append(out.Nodes, node) - } - - for _, n := range axNodes { - parentIdx, ok := axToIndex[n.NodeID] - if !ok { - continue - } - for _, childID := range n.ChildIDs { - if childIdx, ok := axToIndex[childID]; ok { - out.Nodes[parentIdx].Children = append(out.Nodes[parentIdx].Children, childIdx) - } - } - } -} - -func normalizeAXValue(v *accessibility.Value) string { - if v == nil { - return "" - } - var anyv any - if err := json.Unmarshal(v.Value, &anyv); err != nil { - return "" - } - switch tv := anyv.(type) { - case string: - return strings.TrimSpace(tv) - case bool, float64: - return fmt.Sprintf("%v", tv) - default: - return "" - } -} - -func axStates(props []*accessibility.Property) []string { - out := make([]string, 0, len(props)) - for _, p := range props { - name := string(p.Name) - if name == "disabled" || name == "checked" || name == "expanded" || name == "selected" { - val := normalizeAXValue(p.Value) - if val == "" { - val = "true" - } - out = append(out, fmt.Sprintf("%s=%s", name, val)) - } - } - return out -} - -func ClickTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) error { - return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { - if err := dom.ScrollIntoViewIfNeeded().WithBackendNodeID(backendNodeID).Do(ctx); err != nil { - return fmt.Errorf("scroll into view: %w", err) - } - box, err := dom.GetBoxModel().WithBackendNodeID(backendNodeID).Do(ctx) - if err != nil { - return fmt.Errorf("get box model: %w", err) - } - q := box.Content - x := (q[0] + q[2] + q[4] + q[6]) / 4 - y := (q[1] + q[3] + q[5] + q[7]) / 4 - if err := input.DispatchMouseEvent(input.MouseMoved, x, y).Do(ctx); err != nil { - return err - } - if err := input.DispatchMouseEvent(input.MousePressed, x, y).WithButton(input.Left).WithClickCount(1).Do(ctx); err != nil { - return err - } - return input.DispatchMouseEvent(input.MouseReleased, x, y).WithButton(input.Left).WithClickCount(1).Do(ctx) - })) -} - -func FillTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID, value string) error { - return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { - obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) - if err != nil { - return fmt.Errorf("resolve node: %w", err) - } - fn := `function(v){ - this.focus(); - const tag = this.tagName ? this.tagName.toLowerCase() : ""; - if (tag === "select") { - this.value = v; - this.dispatchEvent(new Event("change", { bubbles: true })); - return true; - } - const setter = (tag === "textarea") - ? Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, "value")?.set - : Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, "value")?.set; - if (setter) setter.call(this, v); else this.value = v; - this.dispatchEvent(new Event("input", { bubbles: true })); - this.dispatchEvent(new Event("change", { bubbles: true })); - return true; -}` - _, ex, err := runtime.CallFunctionOn(fmt.Sprintf(`function(){ return (%s).call(this, %q) }`, fn, value)). - WithObjectID(obj.ObjectID). - WithAwaitPromise(true). - WithReturnByValue(true). - Do(ctx) - if err != nil { - return fmt.Errorf("fill by backend node: %w", err) - } - if ex != nil { - return fmt.Errorf("fill by backend node: javascript exception") - } - return nil - })) -} - -func TypeTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID, text string) error { - return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { - obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) - if err != nil { - return fmt.Errorf("resolve node: %w", err) - } - if _, ex, err := runtime.CallFunctionOn(`function(){ this.focus(); return true; }`). - WithObjectID(obj.ObjectID). - WithReturnByValue(true). - Do(ctx); err != nil { - return err - } else if ex != nil { - return fmt.Errorf("focus by backend node: javascript exception") - } - return input.InsertText(text).Do(ctx) - })) -} - -func SelectTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID, value string) error { - return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { - obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) - if err != nil { - return fmt.Errorf("resolve node: %w", err) - } - fn := `function(v){ - if (!this.tagName || this.tagName.toLowerCase() !== "select") throw new Error("element is not select"); - this.focus(); - this.value = v; - this.dispatchEvent(new Event("input", { bubbles: true })); - this.dispatchEvent(new Event("change", { bubbles: true })); - return true; -}` - _, ex, err := runtime.CallFunctionOn(fmt.Sprintf(`function(){ return (%s).call(this, %q) }`, fn, value)). - WithObjectID(obj.ObjectID). - WithAwaitPromise(true). - WithReturnByValue(true). - Do(ctx) - if err != nil { - return err - } - if ex != nil { - return fmt.Errorf("select by backend node: javascript exception") - } - return nil - })) -} diff --git a/browser/cdp_test.go b/browser/cdp_test.go deleted file mode 100644 index 4cc77c3..0000000 --- a/browser/cdp_test.go +++ /dev/null @@ -1,9 +0,0 @@ -package browser - -import "testing" - -func TestTargetTypePageConstant(t *testing.T) { - if TargetTypePage != "page" { - t.Fatalf("TargetTypePage = %q, want %q", TargetTypePage, "page") - } -} diff --git a/browser/chrome.go b/browser/chrome.go deleted file mode 100644 index 83a0f75..0000000 --- a/browser/chrome.go +++ /dev/null @@ -1,41 +0,0 @@ -package browser - -import ( - "os/exec" - "strings" -) - -// ChromeInfo holds information about an installed Chrome browser. -type ChromeInfo struct { - Path string - Version string -} - -// DetectChrome finds a Chrome/Chromium installation and returns its path and version. -// It reuses the same discovery logic as the browser launcher (findChrome). -// Returns nil if no Chrome is found. -func DetectChrome() *ChromeInfo { - path, err := findChrome() - if err != nil { - return nil - } - - out, err := exec.Command(path, "--version").Output() - if err != nil { - return &ChromeInfo{Path: path} - } - return &ChromeInfo{ - Path: path, - Version: parseVersion(string(out)), - } -} - -func parseVersion(output string) string { - // "Google Chrome 125.0.6422.141" → "125.0.6422.141" - output = strings.TrimSpace(output) - parts := strings.Fields(output) - if len(parts) == 0 { - return "" - } - return parts[len(parts)-1] -} diff --git a/browser/daemon.go b/browser/daemon.go deleted file mode 100644 index e15c76a..0000000 --- a/browser/daemon.go +++ /dev/null @@ -1,198 +0,0 @@ -package browser - -import ( - "context" - "crypto/rand" - "encoding/hex" - "encoding/json" - "errors" - "fmt" - "io" - "net" - "net/http" - "strings" - "time" -) - -const ( - AttachStateDetached = "detached" - AttachStateAttachedReady = "attached-ready" - AttachStateAttachedStale = "attached-stale" -) - -type ProxyDaemonHealth struct { - CheckedAt time.Time `json:"checked_at"` - PIDAlive bool `json:"pid_alive"` - DaemonReachable bool `json:"daemon_reachable"` - UpstreamReachable bool `json:"upstream_reachable"` - Healthy bool `json:"healthy"` - Status string `json:"status"` - Reason string `json:"reason,omitempty"` -} - -func GenerateOwnershipToken() (string, error) { - buf := make([]byte, 16) - if _, err := rand.Read(buf); err != nil { - return "", fmt.Errorf("generate ownership token: %w", err) - } - return hex.EncodeToString(buf), nil -} - -func ProxyEndpointForListenAddr(listenAddr string) string { - return fmt.Sprintf("ws://%s/devtools/browser/proxy", listenAddr) -} - -func ProxyHTTPBaseForListenAddr(listenAddr string) string { - return fmt.Sprintf("http://%s", listenAddr) -} - -func CheckProxyDaemon(ctx context.Context, record *ProxyDaemonRecord) ProxyDaemonHealth { - health := ProxyDaemonHealth{CheckedAt: time.Now().UTC(), Status: AttachStateDetached} - if record == nil { - health.Reason = "no proxy daemon recorded" - return health - } - - var reasons []string - health.Status = AttachStateAttachedStale - if record.PID > 0 && isProcessAlive(record.PID) { - health.PIDAlive = true - } else { - reasons = append(reasons, "daemon pid is not alive") - } - - client := &http.Client{Timeout: 3 * time.Second} - if strings.TrimSpace(record.ListenAddr) != "" { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, ProxyHTTPBaseForListenAddr(record.ListenAddr)+"/healthz", nil) - if err == nil { - resp, err := client.Do(req) - if err == nil { - var payload struct { - UpstreamReady bool `json:"upstreamReady"` - } - body, _ := io.ReadAll(resp.Body) - _ = resp.Body.Close() - _ = json.Unmarshal(body, &payload) - if resp.StatusCode == http.StatusOK { - health.DaemonReachable = true - } - if payload.UpstreamReady { - health.UpstreamReachable = true - } - } else { - reasons = append(reasons, fmt.Sprintf("daemon health check failed: %v", err)) - } - } else { - reasons = append(reasons, fmt.Sprintf("daemon health request failed: %v", err)) - } - } else { - reasons = append(reasons, "daemon listen address is missing") - } - - if !health.DaemonReachable { - reasons = append(reasons, "daemon endpoint is unreachable") - } - if !health.UpstreamReachable { - reasons = append(reasons, "upstream Chrome is unreachable") - } - if health.PIDAlive && health.DaemonReachable && health.UpstreamReachable { - health.Healthy = true - health.Status = AttachStateAttachedReady - health.Reason = "" - return health - } - if len(reasons) == 0 { - reasons = append(reasons, "proxy daemon is unhealthy") - } - health.Reason = strings.Join(uniqueStrings(reasons), "; ") - return health -} - -func ShouldReuseProxyDaemon(record *ProxyDaemonRecord, discoveredUpstreamWSURL string, health ProxyDaemonHealth) bool { - return record != nil && record.UpstreamWSURL == strings.TrimSpace(discoveredUpstreamWSURL) && health.Healthy -} - -func ShutdownProxyDaemon(ctx context.Context, record *ProxyDaemonRecord) error { - if record == nil { - return nil - } - if strings.TrimSpace(record.ListenAddr) == "" { - return errors.New("proxy daemon listen address is missing") - } - if strings.TrimSpace(record.OwnershipToken) == "" { - return errors.New("proxy daemon ownership token is missing") - } - req, err := http.NewRequestWithContext(ctx, http.MethodPost, ProxyHTTPBaseForListenAddr(record.ListenAddr)+"/shutdown", nil) - if err != nil { - return fmt.Errorf("build proxy shutdown request: %w", err) - } - req.Header.Set("X-Tap-Ownership-Token", record.OwnershipToken) - resp, err := (&http.Client{Timeout: 3 * time.Second}).Do(req) - if err != nil { - return fmt.Errorf("request proxy shutdown: %w", err) - } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("proxy shutdown returned status %d", resp.StatusCode) - } - return nil -} - -func WaitForProxyDaemonExit(ctx context.Context, record *ProxyDaemonRecord) error { - if record == nil || record.PID <= 0 { - return nil - } - ticker := time.NewTicker(100 * time.Millisecond) - defer ticker.Stop() - for { - if !isProcessAlive(record.PID) { - return nil - } - select { - case <-ctx.Done(): - return ctx.Err() - case <-ticker.C: - } - } -} - -func WaitForProxyDaemonHealth(ctx context.Context, record *ProxyDaemonRecord) (ProxyDaemonHealth, error) { - ticker := time.NewTicker(100 * time.Millisecond) - defer ticker.Stop() - for { - health := CheckProxyDaemon(ctx, record) - if health.Healthy { - return health, nil - } - select { - case <-ctx.Done(): - return health, ctx.Err() - case <-ticker.C: - } - } -} - -func IsListenAddrInUse(err error) bool { - var opErr *net.OpError - return errors.As(err, &opErr) && strings.Contains(strings.ToLower(opErr.Err.Error()), "address already in use") || strings.Contains(strings.ToLower(err.Error()), "address already in use") -} - -func uniqueStrings(items []string) []string { - seen := make(map[string]struct{}, len(items)) - out := make([]string, 0, len(items)) - for _, item := range items { - item = strings.TrimSpace(item) - if item == "" { - continue - } - if _, ok := seen[item]; ok { - continue - } - seen[item] = struct{}{} - out = append(out, item) - } - return out -} diff --git a/browser/daemon_test.go b/browser/daemon_test.go deleted file mode 100644 index 0f02bc0..0000000 --- a/browser/daemon_test.go +++ /dev/null @@ -1,167 +0,0 @@ -package browser - -import ( - "context" - "net" - "net/http" - "net/http/httptest" - "os" - "strings" - "testing" - "time" - - "github.com/gorilla/websocket" -) - -func startFakeUpstreamWS(t *testing.T) (string, func()) { - t.Helper() - upgrader := websocket.Upgrader{CheckOrigin: func(_ *http.Request) bool { return true }} - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path != "/devtools/browser/test" { - http.NotFound(w, r) - return - } - conn, err := upgrader.Upgrade(w, r, nil) - if err != nil { - return - } - defer func() { _ = conn.Close() }() - for { - if _, _, err := conn.NextReader(); err != nil { - return - } - } - })) - wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") + "/devtools/browser/test" - return wsURL, srv.Close -} - -func startProxyForTest(t *testing.T, upstream, token string) (*ProxyDaemonRecord, func()) { - t.Helper() - ln, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - record := &ProxyDaemonRecord{ - PID: os.Getpid(), - ListenAddr: ln.Addr().String(), - Endpoint: ProxyEndpointForListenAddr(ln.Addr().String()), - UpstreamWSURL: upstream, - OwnershipToken: token, - } - proxy := NewProxy(ProxyConfig{ListenAddr: record.ListenAddr, Upstream: upstream, OwnershipToken: token}) - errCh := make(chan error, 1) - go func() { - errCh <- proxy.ServeListener(t.Context(), ln) - }() - ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second) - defer cancel() - if _, err := WaitForProxyDaemonHealth(ctx, record); err != nil { - t.Fatalf("WaitForProxyDaemonHealth failed: %v", err) - } - cleanup := func() { - shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - _ = ShutdownProxyDaemon(shutdownCtx, record) - select { - case <-time.After(250 * time.Millisecond): - case <-errCh: - } - } - return record, cleanup -} - -func TestStorePersistsProxyDaemonState(t *testing.T) { - store := testStore(t) - now := time.Date(2026, 4, 7, 12, 0, 0, 0, time.UTC) - want := &ProxyDaemonRecord{ - PID: 42, - ListenAddr: "127.0.0.1:12345", - Endpoint: "ws://127.0.0.1:12345/devtools/browser/proxy", - UpstreamWSURL: "ws://127.0.0.1:9222/devtools/browser/test", - OwnershipToken: "token-1", - State: AttachStateAttachedReady, - Status: AttachStateAttachedReady, - LastHealthCheckAt: now, - LastHealthyAt: now, - StartedAt: now, - UpdatedAt: now, - } - if err := store.Update(func(state *State) error { - state.ProxyDaemon = want - return nil - }); err != nil { - t.Fatalf("Update failed: %v", err) - } - reloaded, err := store.Load() - if err != nil { - t.Fatalf("Load failed: %v", err) - } - if reloaded.ProxyDaemon == nil { - t.Fatal("ProxyDaemon = nil, want record") - } - if reloaded.ProxyDaemon.Endpoint != want.Endpoint { - t.Fatalf("Endpoint = %q, want %q", reloaded.ProxyDaemon.Endpoint, want.Endpoint) - } - if reloaded.ProxyDaemon.OwnershipToken != want.OwnershipToken { - t.Fatalf("OwnershipToken = %q, want %q", reloaded.ProxyDaemon.OwnershipToken, want.OwnershipToken) - } -} - -func TestShouldReuseProxyDaemon(t *testing.T) { - record := &ProxyDaemonRecord{UpstreamWSURL: "ws://127.0.0.1:9222/devtools/browser/test"} - health := ProxyDaemonHealth{Healthy: true} - if !ShouldReuseProxyDaemon(record, record.UpstreamWSURL, health) { - t.Fatal("ShouldReuseProxyDaemon = false, want true") - } - if ShouldReuseProxyDaemon(record, "ws://127.0.0.1:9223/devtools/browser/test", health) { - t.Fatal("ShouldReuseProxyDaemon should be false for changed upstream") - } - if ShouldReuseProxyDaemon(record, record.UpstreamWSURL, ProxyDaemonHealth{}) { - t.Fatal("ShouldReuseProxyDaemon should be false for unhealthy daemon") - } -} - -func TestCheckProxyDaemonDetectsStaleReasons(t *testing.T) { - health := CheckProxyDaemon(t.Context(), &ProxyDaemonRecord{ - PID: 999999, - ListenAddr: "127.0.0.1:1", - Endpoint: "ws://127.0.0.1:1/devtools/browser/proxy", - UpstreamWSURL: "ws://127.0.0.1:2/devtools/browser/test", - OwnershipToken: "token", - }) - if health.Healthy { - t.Fatal("health.Healthy = true, want false") - } - if !strings.Contains(health.Reason, "daemon pid is not alive") { - t.Fatalf("health.Reason = %q, want pid reason", health.Reason) - } - if !strings.Contains(health.Reason, "daemon endpoint is unreachable") { - t.Fatalf("health.Reason = %q, want endpoint reason", health.Reason) - } -} - -func TestShutdownProxyDaemonRequiresOwnershipToken(t *testing.T) { - upstream, closeUpstream := startFakeUpstreamWS(t) - defer closeUpstream() - record, cleanup := startProxyForTest(t, upstream, "correct-token") - defer cleanup() - - wrong := *record - wrong.OwnershipToken = "wrong-token" - shutdownCtx, cancel := context.WithTimeout(t.Context(), 2*time.Second) - defer cancel() - if err := ShutdownProxyDaemon(shutdownCtx, &wrong); err == nil { - t.Fatal("ShutdownProxyDaemon with wrong token should fail") - } - health := CheckProxyDaemon(t.Context(), record) - if !health.Healthy { - t.Fatalf("health after wrong-token shutdown = %+v, want healthy", health) - } - - shutdownCtx, cancel = context.WithTimeout(t.Context(), 2*time.Second) - defer cancel() - if err := ShutdownProxyDaemon(shutdownCtx, record); err != nil { - t.Fatalf("ShutdownProxyDaemon failed: %v", err) - } -} diff --git a/browser/discover.go b/browser/discover.go deleted file mode 100644 index 22880af..0000000 --- a/browser/discover.go +++ /dev/null @@ -1,92 +0,0 @@ -package browser - -import ( - "fmt" - "os" - "path/filepath" - "runtime" - "strconv" - "strings" -) - -// DiscoverUserChromeDebugURL resolves the browser WebSocket URL for an already -// running user Chrome/Chromium instance that has remote debugging enabled. -// It reads the DevToolsActivePort file from common user-data directories. -func DiscoverUserChromeDebugURL() (string, string, error) { - for _, path := range devToolsActivePortCandidates() { - url, err := ResolveDebugURLFromDevToolsFile(path) - if err == nil { - return url, path, nil - } - if !os.IsNotExist(err) { - return "", path, err - } - } - return "", "", fmt.Errorf("could not find DevToolsActivePort for a running user Chrome; enable remote debugging on your existing browser or pass --upstream") -} - -// ResolveDebugURLFromDevToolsFile parses a DevToolsActivePort file and returns -// the browser WebSocket URL. -func ResolveDebugURLFromDevToolsFile(path string) (string, error) { - content, err := os.ReadFile(path) - if err != nil { - return "", err - } - return parseDevToolsActivePort(string(content)) -} - -func parseDevToolsActivePort(content string) (string, error) { - lines := strings.Split(strings.TrimSpace(content), "\n") - if len(lines) < 2 { - return "", fmt.Errorf("invalid DevToolsActivePort content") - } - - port, err := strconv.Atoi(strings.TrimSpace(lines[0])) - if err != nil || port <= 0 { - return "", fmt.Errorf("invalid DevToolsActivePort port") - } - - wsPath := strings.TrimSpace(lines[1]) - if !strings.HasPrefix(wsPath, "/") { - return "", fmt.Errorf("invalid DevToolsActivePort websocket path") - } - - return fmt.Sprintf("ws://127.0.0.1:%d%s", port, wsPath), nil -} - -func devToolsActivePortCandidates() []string { - home, _ := os.UserHomeDir() - xdgConfig := os.Getenv("XDG_CONFIG_HOME") - if xdgConfig == "" && home != "" { - xdgConfig = filepath.Join(home, ".config") - } - - switch runtime.GOOS { - case "darwin": - return existingCandidates(home, - filepath.Join(home, "Library", "Application Support", "Google", "Chrome", "DevToolsActivePort"), - filepath.Join(home, "Library", "Application Support", "Google", "Chrome Beta", "DevToolsActivePort"), - filepath.Join(home, "Library", "Application Support", "Chromium", "DevToolsActivePort"), - ) - case "windows": - localAppData := os.Getenv("LOCALAPPDATA") - return existingCandidates(localAppData, - filepath.Join(localAppData, "Google", "Chrome", "User Data", "DevToolsActivePort"), - filepath.Join(localAppData, "Google", "Chrome Beta", "User Data", "DevToolsActivePort"), - filepath.Join(localAppData, "Chromium", "User Data", "DevToolsActivePort"), - ) - default: - return existingCandidates(xdgConfig, - filepath.Join(xdgConfig, "google-chrome", "DevToolsActivePort"), - filepath.Join(xdgConfig, "google-chrome-beta", "DevToolsActivePort"), - filepath.Join(xdgConfig, "chromium", "DevToolsActivePort"), - ) - } -} - -func existingCandidates(root string, paths ...string) []string { - if strings.TrimSpace(root) == "" { - return nil - } - return paths -} diff --git a/browser/discover_test.go b/browser/discover_test.go deleted file mode 100644 index 412073f..0000000 --- a/browser/discover_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package browser - -import "testing" - -func TestParseDevToolsActivePort(t *testing.T) { - t.Run("valid content", func(t *testing.T) { - got, err := parseDevToolsActivePort("9222\n/devtools/browser/abc\n") - if err != nil { - t.Fatalf("parseDevToolsActivePort returned error: %v", err) - } - want := "ws://127.0.0.1:9222/devtools/browser/abc" - if got != want { - t.Fatalf("parseDevToolsActivePort = %q, want %q", got, want) - } - }) - - t.Run("invalid port", func(t *testing.T) { - if _, err := parseDevToolsActivePort("abc\n/devtools/browser/abc\n"); err == nil { - t.Fatal("parseDevToolsActivePort should reject invalid ports") - } - }) - - t.Run("invalid websocket path", func(t *testing.T) { - if _, err := parseDevToolsActivePort("9222\ndevtools/browser/abc\n"); err == nil { - t.Fatal("parseDevToolsActivePort should reject invalid websocket paths") - } - }) -} diff --git a/browser/doc.go b/browser/doc.go index d956f1f..c5126c4 100644 --- a/browser/doc.go +++ b/browser/doc.go @@ -1,21 +1,21 @@ -// Package browser defines the persistent browser session metadata model used by -// the planned `tap browser ...` workflow. +// Package browser provides the agent-browser adapter for tap. // -// Phase 1 establishes the durable state contract before the runtime is added: +// It extracts tap's embedded agent-browser binary and provides a thin Go wrapper around the agent-browser CLI with --json +// output. The adapter handles --session-name injection, stdin-based eval, +// and basic typed wrappers for common commands. // -// - A session is one persistent browser instance, either local or remote. -// - A tab is a named tracked browser target within a session. -// - Stored metadata is the source of truth for names and selection defaults. -// - Each command must reconcile metadata against live targets before acting. -// - Untracked live browser tabs are ignored by default. -// - Missing tracked targets become stale until they are recreated or removed. +// Key types: // -// The package also defines the initial local-vs-remote capability matrix used -// by CLI help text and README documentation: +// - AgentBrowser: the core adapter with Exec(), Open(), Eval(), GetHTML(), Close() +// - AgentBrowserInstall: extracts and manages the embedded agent-browser binary +// - AgentBrowserEnvelope[T]: generic JSON response envelope // -// - Local sessions own their browser process and profile directory. -// - Remote sessions bind metadata to an explicit CDP WebSocket endpoint. -// - Remote session creation validates the endpoint up front. -// - Remote session close removes tap metadata only and never kills the remote -// browser process. +// Binary resolution order: +// 1. $TAP_AGENT_BROWSER env var +// 2. embedded binary extracted to ~/.cache/tap/agent-browser/agent-browser +// 3. agent-browser on $PATH +// +// Session model: +// - Default sessions use --session-name default for durable persistence +// - Attached mode uses agent-browser connect (no --session-name) package browser diff --git a/browser/electron.go b/browser/electron.go deleted file mode 100644 index ddc01ee..0000000 --- a/browser/electron.go +++ /dev/null @@ -1,104 +0,0 @@ -package browser - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "os/exec" - "time" -) - -// ElectronProcess describes a running process that exposes a CDP debug port. -type ElectronProcess struct { - PID int - Name string // basename of the binary - Port int -} - -// ScanElectronProcesses returns all running processes that have -// --remote-debugging-port=PORT in their command line. It works for Electron -// apps, CEF-based apps, and any Chromium-derived binary launched with a debug -// port. Returns an empty slice (not an error) when no matching processes exist. -func ScanElectronProcesses(ctx context.Context) ([]ElectronProcess, error) { - return scanElectronProcesses(ctx) -} - -// ResolveElectronDebugURL queries /json/version at the given port and returns -// the WebSocket debug URL. Electron (and Chrome) publish the exact WS URL -// including the browser UUID path in the webSocketDebuggerUrl field. -func ResolveElectronDebugURL(ctx context.Context, port int) (string, error) { - httpURL := fmt.Sprintf("http://127.0.0.1:%d/json/version", port) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, httpURL, nil) - if err != nil { - return "", fmt.Errorf("build request: %w", err) - } - client := &http.Client{Timeout: 10 * time.Second} - resp, err := client.Do(req) - if err != nil { - return "", fmt.Errorf("query /json/version at port %d: %w", port, err) - } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() - - var info struct { - WebSocketDebuggerURL string `json:"webSocketDebuggerUrl"` - } - if err := json.NewDecoder(resp.Body).Decode(&info); err != nil { - return "", fmt.Errorf("parse /json/version: %w", err) - } - if info.WebSocketDebuggerURL == "" { - return "", fmt.Errorf("no webSocketDebuggerUrl in /json/version response at port %d", port) - } - return info.WebSocketDebuggerURL, nil -} - -// LaunchElectronApp starts an Electron (or Electron-based) binary with -// --remote-debugging-port=0 prepended to the argument list. The OS assigns a -// free port; the debug WebSocket URL is extracted from stderr exactly as with -// Chrome. extra holds any additional arguments to pass to the binary. -// -// The returned ProcessRecord contains the PID and debug URL. The process is -// released (detached) so it survives tap exit; callers are responsible for -// tracking lifecycle via the PID and debug URL. -func LaunchElectronApp(ctx context.Context, binaryPath string, extra []string) (*ProcessRecord, error) { - args := make([]string, 0, 1+len(extra)) - args = append(args, "--remote-debugging-port=0") - args = append(args, extra...) - - // Use exec.Command (not CommandContext) so the Electron process is not - // killed when the CLI context is cancelled — same pattern as LaunchBrowser. - cmd := exec.Command(binaryPath, args...) - cmd.SysProcAttr = platformSysProcAttr() - - stderrPipe, err := cmd.StderrPipe() - if err != nil { - return nil, fmt.Errorf("pipe electron stderr: %w", err) - } - if err := cmd.Start(); err != nil { - return nil, fmt.Errorf("start electron app: %w", err) - } - - // parseDebugURL reuses the same logic as Chrome (both emit the same line). - debugURL, err := parseDebugURL(stderrPipe, 15*time.Second) - if c, ok := stderrPipe.(io.Closer); ok { - _ = c.Close() - } - if err != nil { - _ = cmd.Process.Kill() - _ = cmd.Wait() - return nil, fmt.Errorf("parse electron debug URL: %w", err) - } - - pid := cmd.Process.Pid - _ = cmd.Process.Release() - - return &ProcessRecord{ - PID: pid, - DebugURL: debugURL, - StartedAt: time.Now().UTC(), - }, nil -} diff --git a/browser/electron_unix.go b/browser/electron_unix.go deleted file mode 100644 index 27cb117..0000000 --- a/browser/electron_unix.go +++ /dev/null @@ -1,65 +0,0 @@ -//go:build !windows - -package browser - -import ( - "context" - "os/exec" - "path/filepath" - "strconv" - "strings" -) - -// scanElectronProcesses uses ps to list all processes and filters those that -// carry --remote-debugging-port=PORT in their command line. -func scanElectronProcesses(ctx context.Context) ([]ElectronProcess, error) { - out, err := exec.CommandContext(ctx, "ps", "-ax", "-o", "pid=,args=").Output() - if err != nil { - return nil, nil // ps failure is non-fatal; return empty list - } - - var procs []ElectronProcess - for _, line := range strings.Split(string(out), "\n") { - line = strings.TrimSpace(line) - if line == "" { - continue - } - fields := strings.Fields(line) - if len(fields) < 2 { - continue - } - pid, err := strconv.Atoi(fields[0]) - if err != nil { - continue - } - port := extractDebugPort(strings.Join(fields[1:], " ")) - if port == 0 { - continue - } - procs = append(procs, ElectronProcess{ - PID: pid, - Name: filepath.Base(fields[1]), - Port: port, - }) - } - return procs, nil -} - -// extractDebugPort parses --remote-debugging-port=PORT from a command line -// string. Returns 0 if not found or not a valid port. -func extractDebugPort(cmdline string) int { - const prefix = "--remote-debugging-port=" - idx := strings.Index(cmdline, prefix) - if idx < 0 { - return 0 - } - rest := cmdline[idx+len(prefix):] - if end := strings.IndexAny(rest, " \t"); end >= 0 { - rest = rest[:end] - } - port, _ := strconv.Atoi(rest) - if port <= 0 || port > 65535 { - return 0 - } - return port -} diff --git a/browser/electron_windows.go b/browser/electron_windows.go deleted file mode 100644 index 4fd142a..0000000 --- a/browser/electron_windows.go +++ /dev/null @@ -1,17 +0,0 @@ -//go:build windows - -package browser - -import "context" - -// scanElectronProcesses is not yet implemented on Windows. -// Returns an empty list without error. -func scanElectronProcesses(_ context.Context) ([]ElectronProcess, error) { - return nil, nil -} - -// extractDebugPort parses --remote-debugging-port=PORT from a command line. -// Shared definition for Windows (unix version lives in electron_unix.go). -func extractDebugPort(_ string) int { - return 0 -} diff --git a/browser/embedded_darwin_amd64.go b/browser/embedded_darwin_amd64.go new file mode 100644 index 0000000..2b5a85b --- /dev/null +++ b/browser/embedded_darwin_amd64.go @@ -0,0 +1,8 @@ +//go:build darwin && amd64 + +package browser + +import _ "embed" + +//go:embed bin/agent-browser-darwin-x64 +var embeddedAgentBrowser []byte diff --git a/browser/embedded_darwin_arm64.go b/browser/embedded_darwin_arm64.go new file mode 100644 index 0000000..d838a60 --- /dev/null +++ b/browser/embedded_darwin_arm64.go @@ -0,0 +1,8 @@ +//go:build darwin && arm64 + +package browser + +import _ "embed" + +//go:embed bin/agent-browser-darwin-arm64 +var embeddedAgentBrowser []byte diff --git a/browser/embedded_linux_amd64.go b/browser/embedded_linux_amd64.go new file mode 100644 index 0000000..d3ff00c --- /dev/null +++ b/browser/embedded_linux_amd64.go @@ -0,0 +1,8 @@ +//go:build linux && amd64 && !musl + +package browser + +import _ "embed" + +//go:embed bin/agent-browser-linux-x64 +var embeddedAgentBrowser []byte diff --git a/browser/embedded_linux_arm64.go b/browser/embedded_linux_arm64.go new file mode 100644 index 0000000..1036f41 --- /dev/null +++ b/browser/embedded_linux_arm64.go @@ -0,0 +1,8 @@ +//go:build linux && arm64 && !musl + +package browser + +import _ "embed" + +//go:embed bin/agent-browser-linux-arm64 +var embeddedAgentBrowser []byte diff --git a/browser/embedded_unsupported.go b/browser/embedded_unsupported.go new file mode 100644 index 0000000..85a464c --- /dev/null +++ b/browser/embedded_unsupported.go @@ -0,0 +1,5 @@ +//go:build !(darwin && arm64) && !(darwin && amd64) && !(linux && arm64) && !(linux && amd64) && !(windows && amd64) + +package browser + +var embeddedAgentBrowser []byte diff --git a/browser/embedded_windows_amd64.go b/browser/embedded_windows_amd64.go new file mode 100644 index 0000000..6f84ac6 --- /dev/null +++ b/browser/embedded_windows_amd64.go @@ -0,0 +1,8 @@ +//go:build windows && amd64 + +package browser + +import _ "embed" + +//go:embed bin/agent-browser-win32-x64.exe +var embeddedAgentBrowser []byte diff --git a/browser/endpoint.go b/browser/endpoint.go deleted file mode 100644 index 2886895..0000000 --- a/browser/endpoint.go +++ /dev/null @@ -1,71 +0,0 @@ -package browser - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "net/url" - "strings" - "time" -) - -// ResolveDebugURL accepts either a CDP browser WebSocket URL or an HTTP(S) -// DevTools base URL (for example http://127.0.0.1:9222) and returns the browser -// WebSocket URL that chromedp expects. -func ResolveDebugURL(ctx context.Context, endpoint string) (string, error) { - endpoint = strings.TrimSpace(endpoint) - if endpoint == "" { - return "", fmt.Errorf("debug endpoint is empty") - } - - u, err := url.Parse(endpoint) - if err != nil { - return "", fmt.Errorf("parse debug endpoint: %w", err) - } - - switch u.Scheme { - case "ws", "wss": - return endpoint, nil - case "http", "https": - base := strings.TrimRight(endpoint, "/") - if u.Path != "" && u.Path != "/" { - u.Path = "" - u.RawQuery = "" - u.Fragment = "" - base = strings.TrimRight(u.String(), "/") - } - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, base+"/json/version", nil) - if err != nil { - return "", fmt.Errorf("build debug endpoint request: %w", err) - } - - resp, err := (&http.Client{Timeout: 10 * time.Second}).Do(req) - if err != nil { - return "", fmt.Errorf("resolve debug endpoint: %w", err) - } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("resolve debug endpoint: status %d", resp.StatusCode) - } - - var payload struct { - WebSocketDebuggerURL string `json:"webSocketDebuggerUrl"` - } - if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { - return "", fmt.Errorf("decode debug endpoint response: %w", err) - } - if strings.TrimSpace(payload.WebSocketDebuggerURL) == "" { - return "", fmt.Errorf("debug endpoint response missing webSocketDebuggerUrl") - } - return payload.WebSocketDebuggerURL, nil - default: - return "", fmt.Errorf("unsupported debug endpoint scheme %q", u.Scheme) - } -} diff --git a/browser/endpoint_test.go b/browser/endpoint_test.go deleted file mode 100644 index 78797fd..0000000 --- a/browser/endpoint_test.go +++ /dev/null @@ -1,66 +0,0 @@ -package browser - -import ( - "context" - "fmt" - "net/http" - "net/http/httptest" - "testing" -) - -func TestResolveDebugURLFromEndpoint(t *testing.T) { - t.Run("passes through websocket endpoints", func(t *testing.T) { - got, err := ResolveDebugURL(context.Background(), "ws://127.0.0.1:9222/devtools/browser/abc") - if err != nil { - t.Fatalf("ResolveDebugURL returned error: %v", err) - } - want := "ws://127.0.0.1:9222/devtools/browser/abc" - if got != want { - t.Fatalf("ResolveDebugURL = %q, want %q", got, want) - } - }) - - t.Run("resolves browser websocket from http endpoint", func(t *testing.T) { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path != "/json/version" { - t.Fatalf("unexpected path: %s", r.URL.Path) - } - _, _ = fmt.Fprint(w, `{"webSocketDebuggerUrl":"ws://127.0.0.1:12345/devtools/browser/proxy"}`) - })) - defer server.Close() - - got, err := ResolveDebugURL(context.Background(), server.URL) - if err != nil { - t.Fatalf("ResolveDebugURL returned error: %v", err) - } - want := "ws://127.0.0.1:12345/devtools/browser/proxy" - if got != want { - t.Fatalf("ResolveDebugURL = %q, want %q", got, want) - } - }) - - t.Run("strips extra path from http endpoint", func(t *testing.T) { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path != "/json/version" { - t.Fatalf("unexpected path: %s", r.URL.Path) - } - _, _ = fmt.Fprint(w, `{"webSocketDebuggerUrl":"ws://127.0.0.1:9222/devtools/browser/xyz"}`) - })) - defer server.Close() - - got, err := ResolveDebugURL(context.Background(), server.URL+"/devtools/browser/ignored") - if err != nil { - t.Fatalf("ResolveDebugURL returned error: %v", err) - } - want := "ws://127.0.0.1:9222/devtools/browser/xyz" - if got != want { - t.Fatalf("ResolveDebugURL = %q, want %q", got, want) - } - }) - - t.Run("rejects unsupported schemes", func(t *testing.T) { - if _, err := ResolveDebugURL(context.Background(), "ftp://example.com"); err == nil { - t.Fatal("ResolveDebugURL should reject unsupported schemes") - } - }) -} diff --git a/browser/install.go b/browser/install.go new file mode 100644 index 0000000..88ab1ae --- /dev/null +++ b/browser/install.go @@ -0,0 +1,153 @@ +package browser + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "time" +) + +const ( + AgentBrowserVersion = "0.27.0" + EnvAgentBrowser = "TAP_AGENT_BROWSER" +) + +type AgentBrowserInstall struct { + binDir string + version string +} + +type AgentBrowserMeta struct { + InstalledAt time.Time `json:"installed_at"` + Source string `json:"source"` + Version string `json:"version"` +} + +func NewAgentBrowserInstall(binDir string) *AgentBrowserInstall { + if binDir == "" { + home, _ := os.UserHomeDir() + binDir = filepath.Join(home, ".cache", "tap", "agent-browser") + } + return &AgentBrowserInstall{binDir: binDir, version: AgentBrowserVersion} +} + +func ResolveAgentBrowserPath() (string, error) { + if path := os.Getenv(EnvAgentBrowser); path != "" { + return path, nil + } + install := NewAgentBrowserInstall("") + if err := install.EnsureInstalled(context.Background()); err == nil { + return install.binPath(), nil + } + if path, err := exec.LookPath("agent-browser"); err == nil { + return path, nil + } + return "", errors.New("agent-browser embedded binary is unavailable for this platform; set TAP_AGENT_BROWSER") +} + +func (a *AgentBrowserInstall) Installed() bool { + fi, err := os.Stat(a.binPath()) + return err == nil && fi.Size() > 0 +} + +func (a *AgentBrowserInstall) Update(ctx context.Context) error { + return a.extract(ctx) +} + +func (a *AgentBrowserInstall) EnsureInstalled(ctx context.Context) error { + if a.Installed() { + return nil + } + return a.extract(ctx) +} + +func (a *AgentBrowserInstall) ReadMeta() (*AgentBrowserMeta, error) { + data, err := os.ReadFile(a.metaPath()) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, err + } + var meta AgentBrowserMeta + if err := json.Unmarshal(data, &meta); err != nil { + return nil, err + } + return &meta, nil +} + +func (a *AgentBrowserInstall) binPath() string { + name := "agent-browser" + if runtime.GOOS == "windows" { + name += ".exe" + } + return filepath.Join(a.binDir, name) +} + +func (a *AgentBrowserInstall) metaPath() string { + return a.binPath() + ".meta.json" +} + +func (a *AgentBrowserInstall) extract(ctx context.Context) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + if len(embeddedAgentBrowser) == 0 { + return fmt.Errorf("embedded agent-browser is unavailable for %s/%s", runtime.GOOS, runtime.GOARCH) + } + if err := os.MkdirAll(a.binDir, 0o755); err != nil { + return fmt.Errorf("create dir: %w", err) + } + + bin := a.binPath() + tmp := bin + ".tmp" + if err := os.WriteFile(tmp, embeddedAgentBrowser, 0o755); err != nil { + _ = os.Remove(tmp) + return fmt.Errorf("write embedded binary: %w", err) + } + if err := os.Rename(tmp, bin); err != nil { + _ = os.Remove(tmp) + return fmt.Errorf("rename binary: %w", err) + } + return a.writeMeta("embedded") +} + +func (a *AgentBrowserInstall) writeMeta(source string) error { + meta := AgentBrowserMeta{InstalledAt: time.Now().UTC(), Source: source, Version: a.version} + data, err := json.MarshalIndent(meta, "", " ") + if err != nil { + return err + } + return os.WriteFile(a.metaPath(), data, 0o644) +} + +func agentBrowserPlatform(goos, goarch string) (string, error) { + switch goos { + case "darwin": + if goarch == "arm64" { + return "darwin-arm64", nil + } + if goarch == "amd64" { + return "darwin-x64", nil + } + case "linux": + if goarch == "arm64" { + return "linux-arm64", nil + } + if goarch == "amd64" { + return "linux-x64", nil + } + case "windows": + if goarch == "amd64" { + return "win32-x64", nil + } + } + return "", fmt.Errorf("unsupported platform for agent-browser: %s/%s", goos, goarch) +} diff --git a/browser/install_test.go b/browser/install_test.go new file mode 100644 index 0000000..8c91455 --- /dev/null +++ b/browser/install_test.go @@ -0,0 +1,52 @@ +package browser + +import ( + "context" + "os" + "testing" +) + +func TestAgentBrowserPlatform(t *testing.T) { + tests := []struct { + goos string + arch string + want string + }{ + {"darwin", "arm64", "darwin-arm64"}, + {"darwin", "amd64", "darwin-x64"}, + {"linux", "arm64", "linux-arm64"}, + {"linux", "amd64", "linux-x64"}, + {"windows", "amd64", "win32-x64"}, + } + for _, tt := range tests { + got, err := agentBrowserPlatform(tt.goos, tt.arch) + if err != nil { + t.Fatalf("agentBrowserPlatform(%q, %q): %v", tt.goos, tt.arch, err) + } + if got != tt.want { + t.Fatalf("agentBrowserPlatform(%q, %q) = %q, want %q", tt.goos, tt.arch, got, tt.want) + } + } +} + +func TestAgentBrowserExtract(t *testing.T) { + dir := t.TempDir() + install := NewAgentBrowserInstall(dir) + if err := install.Update(context.Background()); err != nil { + t.Fatal(err) + } + fi, err := os.Stat(install.binPath()) + if err != nil { + t.Fatal(err) + } + if fi.Size() == 0 { + t.Fatal("extracted binary is empty") + } + meta, err := install.ReadMeta() + if err != nil { + t.Fatal(err) + } + if meta == nil || meta.Source != "embedded" || meta.Version != AgentBrowserVersion { + t.Fatalf("meta = %#v", meta) + } +} diff --git a/browser/lightpanda.go b/browser/lightpanda.go deleted file mode 100644 index 2e27c7e..0000000 --- a/browser/lightpanda.go +++ /dev/null @@ -1,317 +0,0 @@ -// Package browser manages alternative browser backends for CDP-based automation. -package browser - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "log" - "net" - "net/http" - "os" - "os/exec" - "path/filepath" - "runtime" - "time" -) - -const ( - lightpandaReleaseURL = "https://github.com/lightpanda-io/browser/releases/download/nightly" - lightpandaDefaultPort = "9224" - lightpandaTimeout = "180" -) - -// Lightpanda manages the Lightpanda browser process. -type Lightpanda struct { - cmd *exec.Cmd - binDir string - port string -} - -// NewLightpanda creates a new Lightpanda manager. -// binDir is where the binary is stored; if empty, defaults to ~/.cache/tap/lightpanda/. -// port is the CDP port; if empty, defaults to 9222. -func NewLightpanda(binDir, port string) *Lightpanda { - if binDir == "" { - home, _ := os.UserHomeDir() - binDir = filepath.Join(home, ".cache", "tap", "lightpanda") - } - if port == "" { - port = lightpandaDefaultPort - } - return &Lightpanda{binDir: binDir, port: port} -} - -// WSURL returns the WebSocket URL for the running Lightpanda instance. -func (lp *Lightpanda) WSURL() string { - return "ws://127.0.0.1:" + lp.port + "/" -} - -// EnsureInstalled downloads the Lightpanda binary if it doesn't exist or is empty. -func (lp *Lightpanda) EnsureInstalled(ctx context.Context) error { - bin := lp.binPath() - if fi, err := os.Stat(bin); err == nil && fi.Size() > 0 { - return nil // already installed - } - - log.Println("downloading lightpanda browser") - return lp.download(ctx) -} - -// Running reports whether the Lightpanda server is currently running. -func (lp *Lightpanda) Running() bool { - return lp.cmd != nil -} - -// Start launches the Lightpanda server and waits for it to be ready. -// It is safe to call multiple times — subsequent calls are no-ops if already running. -func (lp *Lightpanda) Start(ctx context.Context) error { - if lp.cmd != nil { - return nil - } - - if err := lp.EnsureInstalled(ctx); err != nil { - return fmt.Errorf("ensure installed: %w", err) - } - - // If the configured port is already in use, pick a free one. - if isPortInUse(lp.port) { - free, err := freePort() - if err != nil { - return fmt.Errorf("find free port: %w", err) - } - log.Printf("port %s in use, using %s", lp.port, free) - lp.port = free - } - - bin := lp.binPath() - cmd := exec.CommandContext(ctx, bin, "--port", lp.port, "--timeout", lightpandaTimeout) - cmd.Stdout = io.Discard - cmd.Stderr = io.Discard - - if err := cmd.Start(); err != nil { - return fmt.Errorf("start lightpanda: %w", err) - } - - // Monitor process exit to detect early failures (e.g. port already in use). - exited := make(chan error, 1) - go func() { exited <- cmd.Wait() }() - - if err := waitForPortOrExit(ctx, lp.port, 10*time.Second, exited); err != nil { - _ = cmd.Process.Kill() - <-exited - return err - } - - lp.cmd = cmd - log.Printf("lightpanda browser ready: %s", lp.WSURL()) - return nil -} - -// Stop terminates the Lightpanda process. -func (lp *Lightpanda) Stop() { - if lp.cmd == nil || lp.cmd.Process == nil { - return - } - _ = lp.cmd.Process.Kill() - _ = lp.cmd.Wait() - lp.cmd = nil - log.Println("lightpanda stopped") -} - -// Cleanup removes the downloaded binary and its metadata. -func (lp *Lightpanda) Cleanup() error { - bin := lp.binPath() - if err := os.Remove(bin); err != nil && !errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("remove lightpanda binary: %w", err) - } - // Best-effort removal of metadata file. - _ = os.Remove(lp.metaPath()) - return nil -} - -// Meta holds metadata about the installed Lightpanda binary. -type Meta struct { - DownloadedAt time.Time `json:"downloaded_at"` - URL string `json:"url"` -} - -// Installed reports whether the Lightpanda binary exists and is non-empty. -func (lp *Lightpanda) Installed() bool { - fi, err := os.Stat(lp.binPath()) - return err == nil && fi.Size() > 0 -} - -// ReadMeta reads the metadata file for the installed binary. -// Returns nil if no metadata file exists. -func (lp *Lightpanda) ReadMeta() (*Meta, error) { - data, err := os.ReadFile(lp.metaPath()) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return nil, nil - } - return nil, err - } - var m Meta - if err := json.Unmarshal(data, &m); err != nil { - return nil, err - } - return &m, nil -} - -// Update re-downloads the Lightpanda binary regardless of whether it exists. -func (lp *Lightpanda) Update(ctx context.Context) error { - log.Println("updating lightpanda browser") - return lp.download(ctx) -} - -func (lp *Lightpanda) writeMeta(url string) error { - m := Meta{ - DownloadedAt: time.Now().UTC(), - URL: url, - } - data, err := json.MarshalIndent(m, "", " ") - if err != nil { - return err - } - return os.WriteFile(lp.metaPath(), data, 0o644) -} - -func (lp *Lightpanda) metaPath() string { - return lp.binPath() + ".meta.json" -} - -func (lp *Lightpanda) binPath() string { - return filepath.Join(lp.binDir, "lightpanda") -} - -func (lp *Lightpanda) download(ctx context.Context) error { - url, err := lightpandaNightlyURL() - if err != nil { - return err - } - - // Create dir if needed. - if err := os.MkdirAll(lp.binDir, 0o755); err != nil { - return fmt.Errorf("create dir: %w", err) - } - - bin := lp.binPath() - tmp := bin + ".tmp" - - f, err := os.OpenFile(tmp, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o755) - if err != nil { - return fmt.Errorf("open file: %w", err) - } - defer func() { - _ = f.Close() - _ = os.Remove(tmp) // clean up on failure; no-op after successful rename - }() - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) - if err != nil { - return fmt.Errorf("create request: %w", err) - } - - client := &http.Client{} - defer client.CloseIdleConnections() - - resp, err := client.Do(req) - if err != nil { - return fmt.Errorf("download: %w", err) - } - defer func() { _ = resp.Body.Close() }() - - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("download failed: HTTP %d", resp.StatusCode) - } - - if _, err := io.Copy(f, resp.Body); err != nil { - return fmt.Errorf("write binary: %w", err) - } - - if err := f.Close(); err != nil { - return fmt.Errorf("close file: %w", err) - } - - if err := os.Rename(tmp, bin); err != nil { - return fmt.Errorf("rename binary: %w", err) - } - - if err := lp.writeMeta(url); err != nil { - log.Printf("warning: failed to write metadata: %v", err) - } - - log.Printf("lightpanda browser downloaded: %s", bin) - return nil -} - -// lightpandaNightlyURL returns the download URL for the current OS/arch. -func lightpandaNightlyURL() (string, error) { - var osName string - switch runtime.GOOS { - case "linux": - osName = "linux" - case "darwin": - osName = "macos" - default: - return "", fmt.Errorf("unsupported OS for lightpanda: %s", runtime.GOOS) - } - - var arch string - switch runtime.GOARCH { - case "amd64": - arch = "x86_64" - case "arm64": - arch = "aarch64" - default: - return "", fmt.Errorf("unsupported arch for lightpanda: %s", runtime.GOARCH) - } - - return fmt.Sprintf("%s/lightpanda-%s-%s", lightpandaReleaseURL, arch, osName), nil -} - -func isPortInUse(port string) bool { - conn, err := net.DialTimeout("tcp", "127.0.0.1:"+port, 500*time.Millisecond) - if err != nil { - return false - } - _ = conn.Close() - return true -} - -func freePort() (string, error) { - ln, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - return "", err - } - defer func() { _ = ln.Close() }() - addr := ln.Addr().(*net.TCPAddr) - return fmt.Sprintf("%d", addr.Port), nil -} - -// waitForPortOrExit polls until the port accepts connections or the process exits. -func waitForPortOrExit(ctx context.Context, port string, timeout time.Duration, exited <-chan error) error { - deadline := time.Now().Add(timeout) - for time.Now().Before(deadline) { - select { - case err := <-exited: - if err != nil { - return fmt.Errorf("lightpanda exited: %w", err) - } - return fmt.Errorf("lightpanda exited unexpectedly") - case <-ctx.Done(): - return ctx.Err() - default: - } - conn, err := net.DialTimeout("tcp", "127.0.0.1:"+port, 500*time.Millisecond) - if err == nil { - _ = conn.Close() - return nil - } - time.Sleep(200 * time.Millisecond) - } - return fmt.Errorf("timeout waiting for port %s", port) -} diff --git a/browser/lock.go b/browser/lock.go deleted file mode 100644 index a5fefe7..0000000 --- a/browser/lock.go +++ /dev/null @@ -1,62 +0,0 @@ -//go:build !windows - -package browser - -import ( - "fmt" - "os" - "path/filepath" - "syscall" - "time" -) - -type fileLock struct { - file *os.File -} - -// lockFile acquires an exclusive file lock. It retries with LOCK_NB to avoid -// blocking indefinitely if another process holds the lock and never releases it. -func lockFile(path string) (*fileLock, error) { - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return nil, fmt.Errorf("create lock dir: %w", err) - } - - file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) - if err != nil { - return nil, fmt.Errorf("open lock file: %w", err) - } - - // Retry with non-blocking flock to avoid hanging forever. - deadline := time.After(30 * time.Second) - ticker := time.NewTicker(50 * time.Millisecond) - defer ticker.Stop() - - for { - err := syscall.Flock(int(file.Fd()), syscall.LOCK_EX|syscall.LOCK_NB) - if err == nil { - return &fileLock{file: file}, nil - } - if err != syscall.EWOULDBLOCK { - _ = file.Close() - return nil, fmt.Errorf("lock file: %w", err) - } - select { - case <-deadline: - _ = file.Close() - return nil, fmt.Errorf("lock file %s: timed out after 30s", filepath.Base(path)) - case <-ticker.C: - } - } -} - -func (l *fileLock) Unlock() error { - if l == nil || l.file == nil { - return nil - } - err := syscall.Flock(int(l.file.Fd()), syscall.LOCK_UN) - closeErr := l.file.Close() - if err != nil { - return err - } - return closeErr -} diff --git a/browser/lock_test.go b/browser/lock_test.go deleted file mode 100644 index 638a8cc..0000000 --- a/browser/lock_test.go +++ /dev/null @@ -1,54 +0,0 @@ -//go:build !windows - -package browser - -import ( - "testing" - "time" -) - -func TestSessionLockSerializesConcurrentAccess(t *testing.T) { - store, err := NewStore(t.TempDir()) - if err != nil { - t.Fatalf("NewStore failed: %v", err) - } - - firstLocked := make(chan struct{}) - releaseFirst := make(chan struct{}) - secondDone := make(chan struct{}) - - go func() { - if err := store.WithSessionLock("alpha", func() error { - close(firstLocked) - <-releaseFirst - return nil - }); err != nil { - t.Errorf("first WithSessionLock failed: %v", err) - } - }() - - <-firstLocked - - go func() { - if err := store.WithSessionLock("alpha", func() error { - close(secondDone) - return nil - }); err != nil { - t.Errorf("second WithSessionLock failed: %v", err) - } - }() - - select { - case <-secondDone: - t.Fatal("second session lock acquired before first lock released") - case <-time.After(75 * time.Millisecond): - } - - close(releaseFirst) - - select { - case <-secondDone: - case <-time.After(2 * time.Second): - t.Fatal("second session lock did not acquire after release") - } -} diff --git a/browser/lock_windows.go b/browser/lock_windows.go deleted file mode 100644 index bde887f..0000000 --- a/browser/lock_windows.go +++ /dev/null @@ -1,83 +0,0 @@ -//go:build windows - -package browser - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "runtime" - "time" - - "golang.org/x/sys/windows" -) - -type fileLock struct { - file *os.File -} - -// lockFile acquires an exclusive file lock on Windows using LockFileEx. -// It retries with LOCKFILE_FAIL_IMMEDIATELY to avoid blocking indefinitely. -func lockFile(path string) (*fileLock, error) { - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return nil, fmt.Errorf("create lock dir: %w", err) - } - - file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) - if err != nil { - return nil, fmt.Errorf("open lock file: %w", err) - } - - // Overlay for LockFileEx — lock the entire file. - ol := new(windows.Overlapped) - - deadline := time.After(30 * time.Second) - ticker := time.NewTicker(50 * time.Millisecond) - defer ticker.Stop() - - for { - err := windows.LockFileEx( - windows.Handle(file.Fd()), - windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY, - 0, // reserved - 1, // lock 1 byte - 0, // high-order bytes - ol, - ) - if err == nil { - // Prevent GC from finalizing file before LockFileEx completes. - runtime.KeepAlive(file) - return &fileLock{file: file}, nil - } - - // Only retry on ERROR_LOCK_VIOLATION (lock held by another process). - // Any other error is unexpected and should be returned immediately. - if !errors.Is(err, windows.ERROR_LOCK_VIOLATION) { - _ = file.Close() - return nil, fmt.Errorf("lock file: %w", err) - } - - select { - case <-deadline: - _ = file.Close() - return nil, fmt.Errorf("lock file %s: timed out after 30s", filepath.Base(path)) - case <-ticker.C: - } - } -} - -func (l *fileLock) Unlock() error { - if l == nil || l.file == nil { - return nil - } - ol := new(windows.Overlapped) - err := windows.UnlockFileEx(windows.Handle(l.file.Fd()), 0, 1, 0, ol) - // Prevent GC from finalizing file before UnlockFileEx completes. - runtime.KeepAlive(l.file) - closeErr := l.file.Close() - if err != nil { - return err - } - return closeErr -} diff --git a/browser/manager.go b/browser/manager.go deleted file mode 100644 index a6ef3dc..0000000 --- a/browser/manager.go +++ /dev/null @@ -1,1236 +0,0 @@ -package browser - -import ( - "context" - "fmt" - "os" - "path/filepath" - "sort" - "sync" - "time" - - "github.com/chromedp/cdproto/cdp" -) - -// SessionOptions holds optional settings for session creation. -type SessionOptions struct { - // Headless controls whether a local browser launches headlessly. - Headless bool - // WSURL is the remote browser WebSocket endpoint (remote mode only). - WSURL string -} - -// Manager coordinates browser session lifecycle, tab management, and -// browser actions using the metadata store and CDP transport layer. -type Manager struct { - store *Store - - // interceptMu guards interceptCancel for concurrent access. - interceptMu sync.Mutex - // interceptCancel tracks the active Fetch domain interception cancel func - // per target (keyed by "session:tab"). When new rules are set, the previous - // cancel is called first to prevent goroutine leaks. - interceptCancel map[string]func() -} - -// NewManager creates a session manager backed by the given store. -func NewManager(store *Store) *Manager { - return &Manager{store: store, interceptCancel: make(map[string]func())} -} - -// --------------------------------------------------------------------------- -// Session lifecycle -// --------------------------------------------------------------------------- - -// CreateSession launches or connects to a browser and persists session metadata. -func (m *Manager) CreateSession(ctx context.Context, name string, mode Mode, opts SessionOptions) error { - if err := ValidateSessionName(name); err != nil { - return fmt.Errorf("create session: %w", err) - } - now := time.Now() - - switch mode { - case ModeLocal: - profileDir := filepath.Join(m.store.Root(), "profiles", name) - - proc, err := LaunchBrowser(ctx, LocalConfig{ProfileDir: profileDir, Headless: opts.Headless}) - if err != nil { - return fmt.Errorf("create session: %w", err) - } - - session, err := NewLocalSession(name, profileDir, opts.Headless, now) - if err != nil { - // Best-effort cleanup of the just-launched browser. - _ = KillProcess(proc) - return fmt.Errorf("create session: %w", err) - } - session.Process = proc - - if err := m.store.Update(func(state *State) error { - if err := state.CreateSession(session); err != nil { - return err - } - if name == DefaultSessionName { - if err := state.SetDefaultContext(name, DefaultContextManaged, now); err != nil { - return err - } - } - return nil - }); err != nil { - _ = KillProcess(proc) - return fmt.Errorf("create session: %w", err) - } - - case ModeRemote: - resolvedURL, err := ResolveDebugURL(ctx, opts.WSURL) - if err != nil { - return fmt.Errorf("create session: %w", err) - } - if err := checkDebugEndpoint(ctx, resolvedURL); err != nil { - return fmt.Errorf("create session: %w", err) - } - - session, err := NewRemoteSession(name, resolvedURL, now) - if err != nil { - return fmt.Errorf("create session: %w", err) - } - session.Process = &ProcessRecord{DebugURL: resolvedURL} - - if err := m.store.Update(func(state *State) error { - if err := state.CreateSession(session); err != nil { - return err - } - if name == DefaultSessionName { - if err := state.SetDefaultContext(name, DefaultContextAttached, now); err != nil { - return err - } - } - return nil - }); err != nil { - return fmt.Errorf("create session: %w", err) - } - - default: - return fmt.Errorf("create session: unsupported mode %q", mode) - } - - return nil -} - -// CloseSession terminates a browser session, kills the local process if -// applicable, and removes all related metadata. -func (m *Manager) CloseSession(ctx context.Context, name string) error { - resolved, err := m.resolveSessionName(ctx, name, false) - if err != nil { - return fmt.Errorf("close session: %w", err) - } - name = resolved - return m.store.WithSessionLock(name, func() error { - // Phase 1: read process info under state lock. - var proc *ProcessRecord - var profileDir string - var isLocal bool - err := m.store.Update(func(state *State) error { - session, err := state.ResolveSession(name) - if err != nil { - return err - } - isLocal = session.Mode == ModeLocal - if session.Process != nil { - p := *session.Process - proc = &p - } - if session.Local != nil { - profileDir = session.Local.ProfileDir - } - return nil - }) - if err != nil { - return fmt.Errorf("close session: %w", err) - } - - // Phase 2: kill the local browser process outside the state lock. - if isLocal && proc != nil { - // Best-effort: don't fail if the process is already dead. - _ = KillProcess(proc) - // Only remove the profile after confirming the process is gone, - // so we don't delete files Chrome is still writing to. - if profileDir != "" && (proc.PID <= 0 || !isProcessAlive(proc.PID)) { - removeProfileDir(profileDir) - } - } - - // Phase 3: atomically remove session from state. - return m.store.Update(func(state *State) error { - if err := state.DeleteSession(name); err != nil { - return fmt.Errorf("close session: %w", err) - } - return nil - }) - }) -} - -// SessionList holds the result of listing sessions. -type SessionList struct { - Sessions []*SessionRecord -} - -// ListSessions returns all tracked sessions sorted by name. -func (m *Manager) ListSessions(_ context.Context) (*SessionList, error) { - state, err := m.store.Load() - if err != nil { - return nil, fmt.Errorf("list sessions: %w", err) - } - - sessions := make([]*SessionRecord, 0, len(state.Sessions)) - for _, s := range state.Sessions { - sessions = append(sessions, s) - } - sort.Slice(sessions, func(i, j int) bool { - return sessions[i].Name < sessions[j].Name - }) - return &SessionList{Sessions: sessions}, nil -} - -// DefaultContext returns the persisted default browser context metadata. -func (m *Manager) DefaultContext(_ context.Context) (*DefaultContextRecord, error) { - state, err := m.store.Load() - if err != nil { - return nil, fmt.Errorf("default context: %w", err) - } - return state.DefaultContext, nil -} - -// SetDefaultContext persists the default browser context resolution. -func (m *Manager) SetDefaultContext(_ context.Context, sessionName string, kind DefaultContextKind) error { - return m.store.Update(func(state *State) error { - return state.SetDefaultContext(sessionName, kind, time.Now()) - }) -} - -// ClearDefaultContext removes the persisted default browser context. -func (m *Manager) ClearDefaultContext(_ context.Context) error { - return m.store.Update(func(state *State) error { - state.ClearDefaultContext() - return nil - }) -} - -// GetSession resolves a session by name or the persisted default context. -func (m *Manager) GetSession(_ context.Context, name string) (*SessionRecord, error) { - state, err := m.store.Load() - if err != nil { - return nil, fmt.Errorf("get session: %w", err) - } - - session, err := state.ResolveSessionByPreference(name) - if err != nil { - return nil, fmt.Errorf("get session: %w", err) - } - return session, nil -} - -// --------------------------------------------------------------------------- -// Tab lifecycle -// --------------------------------------------------------------------------- - -// CreateTab opens a new browser tab and tracks it in session metadata. -func (m *Manager) CreateTab(ctx context.Context, sessionName string, tabName string, url string) error { - resolved, err := m.resolveSessionName(ctx, sessionName, true) - if err != nil { - return fmt.Errorf("create tab: %w", err) - } - sessionName = resolved - - // Phase 1: resolve debug URL and check for duplicates under lock. - var debugURL string - err = m.store.UpdateSession(sessionName, func(_ *State, session *SessionRecord) error { - if _, exists := session.Tabs[tabName]; exists { - return fmt.Errorf("create tab: tab %q already exists in session %q", tabName, sessionName) - } - du, err := resolveDebugURL(session) - if err != nil { - return fmt.Errorf("create tab: %w", err) - } - debugURL = du - return nil - }) - if err != nil { - return err - } - - // Phase 2: create CDP target outside the state lock. - targetID, err := CreateTarget(ctx, debugURL, url) - if err != nil { - return fmt.Errorf("create tab: %w", err) - } - - // Phase 3: persist tab metadata under lock. Re-check for duplicates - // since another CreateTab could have raced between Phase 1 and Phase 3. - // On any failure, best-effort close the orphaned CDP target. - err = m.store.UpdateSession(sessionName, func(state *State, session *SessionRecord) error { - if _, exists := session.Tabs[tabName]; exists { - return fmt.Errorf("create tab: tab %q already exists in session %q", tabName, sessionName) - } - now := time.Now() - tab, err := NewTab(tabName, targetID, url, now) - if err != nil { - return fmt.Errorf("create tab: %w", err) - } - - if err := state.UpsertTab(sessionName, tab); err != nil { - return fmt.Errorf("create tab: %w", err) - } - return nil - }) - if err != nil { - // Best-effort cleanup of the orphaned CDP target. - _ = CloseTarget(ctx, debugURL, targetID) - return err - } - return nil -} - -// AdoptTab registers an existing CDP target as a tracked tab without creating -// a new browser target. Use this to import pre-existing Electron windows or -// other targets that were not launched by tap. targetID must be a live target -// reachable through the session's debug URL. -func (m *Manager) AdoptTab(ctx context.Context, sessionName string, tabName string, targetID string, url string) error { - resolved, err := m.resolveSessionName(ctx, sessionName, true) - if err != nil { - return fmt.Errorf("adopt tab: %w", err) - } - sessionName = resolved - - now := time.Now() - return m.store.UpdateSession(sessionName, func(state *State, session *SessionRecord) error { - if _, exists := session.Tabs[tabName]; exists { - return fmt.Errorf("adopt tab: tab %q already exists in session %q", tabName, sessionName) - } - tab, err := NewTab(tabName, targetID, url, now) - if err != nil { - return fmt.Errorf("adopt tab: %w", err) - } - if err := state.UpsertTab(sessionName, tab); err != nil { - return fmt.Errorf("adopt tab: %w", err) - } - return nil - }) -} - -// CloseTab closes a browser tab and removes it from session metadata. -func (m *Manager) CloseTab(ctx context.Context, sessionName string, tabName string) error { - resolved, err := m.resolveSessionName(ctx, sessionName, true) - if err != nil { - return fmt.Errorf("close tab: %w", err) - } - sessionName = resolved - - // Phase 1: resolve tab info under lock. - var debugURL, targetID, resolvedTab string - var isLive bool - err = m.store.UpdateSession(sessionName, func(_ *State, session *SessionRecord) error { - tab, err := session.ResolveTab(tabName) - if err != nil { - return fmt.Errorf("close tab: %w", err) - } - resolvedTab = tab.Name - isLive = tab.Status == TabStatusLive && tab.TargetID != "" - if isLive { - du, err := resolveDebugURL(session) - if err != nil { - return fmt.Errorf("close tab: %w", err) - } - debugURL = du - targetID = tab.TargetID - } - return nil - }) - if err != nil { - return err - } - - // Phase 2: close the CDP target outside the lock. - if isLive { - if err := CloseTarget(ctx, debugURL, targetID); err != nil { - return fmt.Errorf("close tab: %w", err) - } - } - - // Phase 3: remove tab metadata under lock. - return m.store.UpdateSession(sessionName, func(state *State, _ *SessionRecord) error { - if err := state.DeleteTab(sessionName, resolvedTab); err != nil { - return fmt.Errorf("close tab: %w", err) - } - return nil - }) -} - -// TabList holds the result of listing tabs including the current selection. -type TabList struct { - Tabs []*TabRecord - SelectedTab string -} - -// ListTabs returns all tracked tabs for a session sorted by creation time. -func (m *Manager) ListTabs(_ context.Context, sessionName string) (*TabList, error) { - state, err := m.store.Load() - if err != nil { - return nil, fmt.Errorf("list tabs: %w", err) - } - - session, err := state.ResolveSessionByPreference(sessionName) - if err != nil { - return nil, fmt.Errorf("list tabs: %w", err) - } - - tabs := make([]*TabRecord, 0, len(session.Tabs)) - for _, t := range session.Tabs { - tabs = append(tabs, t) - } - sort.Slice(tabs, func(i, j int) bool { - if tabs[i].CreatedAt.Equal(tabs[j].CreatedAt) { - return tabs[i].Name < tabs[j].Name - } - return tabs[i].CreatedAt.Before(tabs[j].CreatedAt) - }) - return &TabList{Tabs: tabs, SelectedTab: session.SelectedTab}, nil -} - -// SelectTab persists the default tab used when --tab is omitted. -func (m *Manager) SelectTab(ctx context.Context, sessionName string, tabName string) error { - resolved, err := m.resolveSessionName(ctx, sessionName, true) - if err != nil { - return fmt.Errorf("select tab: %w", err) - } - sessionName = resolved - return m.store.UpdateSession(sessionName, func(state *State, _ *SessionRecord) error { - if err := state.SelectTab(sessionName, tabName); err != nil { - return fmt.Errorf("select tab: %w", err) - } - return nil - }) -} - -// --------------------------------------------------------------------------- -// Browser actions -// --------------------------------------------------------------------------- - -// Navigate changes the URL of a tracked tab. -func (m *Manager) Navigate(ctx context.Context, sessionName string, tabName string, url string) error { - resolved, err := m.resolveSessionName(ctx, sessionName, true) - if err != nil { - return fmt.Errorf("navigate: %w", err) - } - sessionName = resolved - // Phase 1: resolve session/tab under lock, release before CDP I/O. - var debugURL, targetID, resolvedSession, resolvedTab string - err = m.store.UpdateSession(sessionName, func(_ *State, session *SessionRecord) error { - tab, err := session.ResolveTab(tabName) - if err != nil { - return fmt.Errorf("navigate: %w", err) - } - if err := requireLiveTab(tab); err != nil { - return fmt.Errorf("navigate: %w", err) - } - du, err := resolveDebugURL(session) - if err != nil { - return fmt.Errorf("navigate: %w", err) - } - debugURL = du - targetID = tab.TargetID - resolvedSession = session.Name - resolvedTab = tab.Name - return nil - }) - if err != nil { - return err - } - - // Phase 2: CDP navigation outside any lock. - if err := NavigateTarget(ctx, debugURL, targetID, url); err != nil { - return fmt.Errorf("navigate: %w", err) - } - - // Phase 3: persist URL update under lock. - return m.store.UpdateSession(resolvedSession, func(_ *State, session *SessionRecord) error { - tab, ok := session.Tabs[resolvedTab] - if !ok { - // Tab was deleted between Phase 2 and Phase 3. Navigation - // succeeded in the browser but we can't update metadata. - return fmt.Errorf("navigate: tab %q was removed during navigation", resolvedTab) - } - tab.URL = url - tab.UpdatedAt = time.Now().UTC() - return nil - }) -} - -// Evaluate runs JavaScript in a tracked tab and returns the result. -func (m *Manager) Evaluate(ctx context.Context, sessionName string, tabName string, js string) (any, error) { - // Resolve session/tab under lock, then release before CDP I/O. - rt, err := m.resolveTarget(ctx, sessionName, tabName, "evaluate") - if err != nil { - return nil, err - } - - result, err := EvalTarget(ctx, rt.DebugURL, rt.TargetID, js) - if err != nil { - return nil, fmt.Errorf("evaluate: %w", err) - } - return result, nil -} - -// ScreenshotResult holds the screenshot data and resolved names. -type ScreenshotResult struct { - Data []byte - SessionName string - TabName string -} - -// Screenshot captures a full-page PNG of a tracked tab. -func (m *Manager) Screenshot(ctx context.Context, sessionName string, tabName string) (*ScreenshotResult, error) { - // Resolve session/tab under lock, then release before CDP I/O. - rt, err := m.resolveTarget(ctx, sessionName, tabName, "screenshot") - if err != nil { - return nil, err - } - - buf, err := ScreenshotTarget(ctx, rt.DebugURL, rt.TargetID) - if err != nil { - return nil, fmt.Errorf("screenshot: %w", err) - } - return &ScreenshotResult{Data: buf, SessionName: rt.SessionName, TabName: rt.TabName}, nil -} - -// Forms discovers fillable form elements in a tracked tab. -func (m *Manager) Forms(ctx context.Context, sessionName string, tabName string) ([]FormField, error) { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "forms") - if err != nil { - return nil, err - } - - fields, err := FormsTarget(ctx, rt.DebugURL, rt.TargetID) - if err != nil { - return nil, fmt.Errorf("forms: %w", err) - } - return fields, nil -} - -// Fill sets values in form fields of a tracked tab. -func (m *Manager) Fill(ctx context.Context, sessionName string, tabName string, fields []FillField, submitSelector string) error { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "fill") - if err != nil { - return err - } - - if err := FillTarget(ctx, rt.DebugURL, rt.TargetID, fields, submitSelector); err != nil { - return fmt.Errorf("fill: %w", err) - } - return nil -} - -// FillInput represents a fill target (CSS selector or snapshot ref) and value. -type FillInput struct { - Target string - Value string -} - -// Snapshot captures and persists the latest semantic page snapshot for a tab. -func (m *Manager) Snapshot(ctx context.Context, sessionName string, tabName string, opts SnapshotOptions) (*SnapshotResult, error) { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "snapshot") - if err != nil { - return nil, err - } - result, err := SnapshotTarget(ctx, rt.DebugURL, rt.TargetID, opts) - if err != nil { - return nil, fmt.Errorf("snapshot: %w", err) - } - result.GeneratedAt = time.Now().UTC() - if err := m.saveSnapshot(rt.SessionName, rt.TabName, result); err != nil { - return nil, err - } - return result, nil -} - -// PDFResult holds the PDF data and resolved names. -type PDFResult struct { - Data []byte - SessionName string - TabName string -} - -// PDF saves the current page as PDF from a tracked tab. -func (m *Manager) PDF(ctx context.Context, sessionName string, tabName string, landscape bool, printBackground bool, scale float64) (*PDFResult, error) { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "pdf") - if err != nil { - return nil, err - } - buf, err := PDFTarget(ctx, rt.DebugURL, rt.TargetID, landscape, printBackground, scale) - if err != nil { - return nil, fmt.Errorf("pdf: %w", err) - } - return &PDFResult{Data: buf, SessionName: rt.SessionName, TabName: rt.TabName}, nil -} - -// Click dispatches a real mouse click on the element matching sel in a tracked tab. -func (m *Manager) Click(ctx context.Context, sessionName string, tabName string, sel string) error { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "click") - if err != nil { - return err - } - if err := ClickTarget(ctx, rt.DebugURL, rt.TargetID, sel); err != nil { - return fmt.Errorf("click: %w", err) - } - return nil -} - -// ClickElement clicks an element by CSS selector or snapshot ref (e.g. @e12). -func (m *Manager) ClickElement(ctx context.Context, sessionName, tabName, arg string) error { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "click") - if err != nil { - return err - } - selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) - if err != nil { - return err - } - if backendNodeID > 0 { - if err := ClickTargetByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID); err != nil { - return fmt.Errorf("click: %w", err) - } - return nil - } - if err := ClickTarget(ctx, rt.DebugURL, rt.TargetID, selector); err != nil { - return fmt.Errorf("click: %w", err) - } - return nil -} - -// Type sends individual key events to the element matching sel in a tracked tab. -func (m *Manager) Type(ctx context.Context, sessionName string, tabName string, sel string, text string) error { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "type") - if err != nil { - return err - } - if err := TypeTarget(ctx, rt.DebugURL, rt.TargetID, sel, text); err != nil { - return fmt.Errorf("type: %w", err) - } - return nil -} - -// TypeElement types into an element by CSS selector or snapshot ref. -func (m *Manager) TypeElement(ctx context.Context, sessionName, tabName, arg, text string) error { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "type") - if err != nil { - return err - } - selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) - if err != nil { - return err - } - if backendNodeID > 0 { - if err := TypeTargetByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID, text); err != nil { - return fmt.Errorf("type: %w", err) - } - return nil - } - if err := TypeTarget(ctx, rt.DebugURL, rt.TargetID, selector, text); err != nil { - return fmt.Errorf("type: %w", err) - } - return nil -} - -// Hover moves the mouse to the element matching sel in a tracked tab. -func (m *Manager) Hover(ctx context.Context, sessionName string, tabName string, sel string) error { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "hover") - if err != nil { - return err - } - if err := HoverTarget(ctx, rt.DebugURL, rt.TargetID, sel); err != nil { - return fmt.Errorf("hover: %w", err) - } - return nil -} - -// Scroll scrolls to the element matching sel, or to absolute x,y if sel is empty. -func (m *Manager) Scroll(ctx context.Context, sessionName string, tabName string, sel string, x, y float64) error { - rt, err := m.resolveTarget(ctx, sessionName, tabName, "scroll") - if err != nil { - return err - } - if err := ScrollTarget(ctx, rt.DebugURL, rt.TargetID, sel, x, y); err != nil { - return fmt.Errorf("scroll: %w", err) - } - return nil -} - -// Select selects an option by value in a element", ArgsUsage: " ", Flags: browserActionFlags(false), - Description: `Select an option by value in a