From 91323bef32493f0626507cc65efc9371e2637f34 Mon Sep 17 00:00:00 2001 From: Vaayne Date: Thu, 11 Jun 2026 23:59:26 +0800 Subject: [PATCH 01/12] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20browser=20get/i?= =?UTF-8?q?s=20query=20commands?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get text/html/value/attr/title/url/count/box/styles and is visible/enabled/checked, accepting CSS selectors or @eN refs. --- browser/cdp_query.go | 541 +++++++++++++++++++++++++++++++++++++++ browser/manager_query.go | 261 +++++++++++++++++++ browser/query_test.go | 11 + cmd/tap/browser_get.go | 446 ++++++++++++++++++++++++++++++++ 4 files changed, 1259 insertions(+) create mode 100644 browser/cdp_query.go create mode 100644 browser/manager_query.go create mode 100644 browser/query_test.go create mode 100644 cmd/tap/browser_get.go diff --git a/browser/cdp_query.go b/browser/cdp_query.go new file mode 100644 index 0000000..90f93c6 --- /dev/null +++ b/browser/cdp_query.go @@ -0,0 +1,541 @@ +package browser + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/chromedp/cdproto/cdp" + "github.com/chromedp/cdproto/dom" + "github.com/chromedp/cdproto/runtime" + "github.com/chromedp/chromedp" +) + +// BoundingBox holds the position and dimensions of an element. +type BoundingBox struct { + X float64 `json:"x"` + Y float64 `json:"y"` + Width float64 `json:"width"` + Height float64 `json:"height"` +} + +// QueryTextTarget returns the textContent of the first element matching sel. +func QueryTextTarget(ctx context.Context, debugURL, targetID, sel string) (string, error) { + js := fmt.Sprintf(`(() => { + const el = document.querySelector(%q); + if (!el) throw new Error("element not found: " + %q); + return el.textContent; +})()`, sel, sel) + var result string + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ) + if err != nil { + return "", fmt.Errorf("query text target: %w", err) + } + return result, nil +} + +// QueryTextByBackendNodeID returns the textContent of an element by backend node ID. +func QueryTextByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) (string, error) { + var result string + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + val, ex, err := runtime.CallFunctionOn(`function(){ return this.textContent; }`). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return err + } + if ex != nil { + return fmt.Errorf("query text by backend node: javascript exception") + } + if val != nil && val.Value != nil { + if err := json.Unmarshal(val.Value, &result); err != nil { + return fmt.Errorf("unmarshal result: %w", err) + } + } + return nil + })) + if err != nil { + return "", fmt.Errorf("query text target: %w", err) + } + return result, nil +} + +// QueryHTMLTarget returns the innerHTML of the first element matching sel. +func QueryHTMLTarget(ctx context.Context, debugURL, targetID, sel string) (string, error) { + js := fmt.Sprintf(`(() => { + const el = document.querySelector(%q); + if (!el) throw new Error("element not found: " + %q); + return el.innerHTML; +})()`, sel, sel) + var result string + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ) + if err != nil { + return "", fmt.Errorf("query html target: %w", err) + } + return result, nil +} + +// QueryHTMLByBackendNodeID returns the innerHTML of an element by backend node ID. +func QueryHTMLByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) (string, error) { + var result string + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + val, ex, err := runtime.CallFunctionOn(`function(){ return this.innerHTML; }`). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return err + } + if ex != nil { + return fmt.Errorf("query html by backend node: javascript exception") + } + if val != nil && val.Value != nil { + if err := json.Unmarshal(val.Value, &result); err != nil { + return fmt.Errorf("unmarshal result: %w", err) + } + } + return nil + })) + if err != nil { + return "", fmt.Errorf("query html target: %w", err) + } + return result, nil +} + +// QueryValueTarget returns the value property of the first element matching sel. +func QueryValueTarget(ctx context.Context, debugURL, targetID, sel string) (string, error) { + js := fmt.Sprintf(`(() => { + const el = document.querySelector(%q); + if (!el) throw new Error("element not found: " + %q); + return el.value !== undefined ? String(el.value) : ""; +})()`, sel, sel) + var result string + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ) + if err != nil { + return "", fmt.Errorf("query value target: %w", err) + } + return result, nil +} + +// QueryValueByBackendNodeID returns the value property of an element by backend node ID. +func QueryValueByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) (string, error) { + var result string + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + val, ex, err := runtime.CallFunctionOn(`function(){ return this.value !== undefined ? String(this.value) : ""; }`). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return err + } + if ex != nil { + return fmt.Errorf("query value by backend node: javascript exception") + } + if val != nil && val.Value != nil { + if err := json.Unmarshal(val.Value, &result); err != nil { + return fmt.Errorf("unmarshal result: %w", err) + } + } + return nil + })) + if err != nil { + return "", fmt.Errorf("query value target: %w", err) + } + return result, nil +} + +// QueryAttrTarget returns the value of attr on the first element matching sel. +func QueryAttrTarget(ctx context.Context, debugURL, targetID, sel, attr string) (string, error) { + js := fmt.Sprintf(`(() => { + const el = document.querySelector(%q); + if (!el) throw new Error("element not found: " + %q); + const v = el.getAttribute(%q); + return v !== null ? v : ""; +})()`, sel, sel, attr) + var result string + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ) + if err != nil { + return "", fmt.Errorf("query attr target: %w", err) + } + return result, nil +} + +// QueryAttrByBackendNodeID returns the value of attr on an element by backend node ID. +func QueryAttrByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID, attr string) (string, error) { + var result string + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + fn := fmt.Sprintf(`function(){ const v = this.getAttribute(%q); return v !== null ? v : ""; }`, attr) + val, ex, err := runtime.CallFunctionOn(fn). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return err + } + if ex != nil { + return fmt.Errorf("query attr by backend node: javascript exception") + } + if val != nil && val.Value != nil { + if err := json.Unmarshal(val.Value, &result); err != nil { + return fmt.Errorf("unmarshal result: %w", err) + } + } + return nil + })) + if err != nil { + return "", fmt.Errorf("query attr target: %w", err) + } + return result, nil +} + +// QueryTitleTarget returns the document.title of the current page. +func QueryTitleTarget(ctx context.Context, debugURL, targetID string) (string, error) { + var result string + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(`document.title`, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true) + }), + ) + if err != nil { + return "", fmt.Errorf("query title target: %w", err) + } + return result, nil +} + +// QueryURLTarget returns the current location.href of the page. +func QueryURLTarget(ctx context.Context, debugURL, targetID string) (string, error) { + var result string + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(`location.href`, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true) + }), + ) + if err != nil { + return "", fmt.Errorf("query url target: %w", err) + } + return result, nil +} + +// QueryCountTarget returns the number of elements matching sel. +func QueryCountTarget(ctx context.Context, debugURL, targetID, sel string) (int, error) { + js := fmt.Sprintf(`document.querySelectorAll(%q).length`, sel) + var result float64 // JSON numbers unmarshal as float64 + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true) + }), + ) + if err != nil { + return 0, fmt.Errorf("query count target: %w", err) + } + return int(result), nil +} + +// QueryBoxTarget returns the bounding box of the first element matching sel. +func QueryBoxTarget(ctx context.Context, debugURL, targetID, sel string) (*BoundingBox, error) { + var nodes []*cdp.Node + var box BoundingBox + err := withTarget(ctx, debugURL, targetID, + chromedp.Nodes(sel, &nodes, chromedp.ByQuery), + chromedp.ActionFunc(func(ctx context.Context) error { + if len(nodes) == 0 { + return fmt.Errorf("no element matching %q", sel) + } + model, err := dom.GetBoxModel().WithNodeID(nodes[0].NodeID).Do(ctx) + if err != nil { + return fmt.Errorf("get box model: %w", err) + } + q := model.Border + // border quad: TL, TR, BR, BL (x,y pairs) + box.X = q[0] + box.Y = q[1] + box.Width = q[2] - q[0] + box.Height = q[7] - q[1] + return nil + }), + ) + if err != nil { + return nil, fmt.Errorf("query box target: %w", err) + } + return &box, nil +} + +// QueryBoxByBackendNodeID returns the bounding box of an element by backend node ID. +func QueryBoxByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) (*BoundingBox, error) { + var box BoundingBox + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + model, err := dom.GetBoxModel().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("get box model: %w", err) + } + q := model.Border + box.X = q[0] + box.Y = q[1] + box.Width = q[2] - q[0] + box.Height = q[7] - q[1] + return nil + })) + if err != nil { + return nil, fmt.Errorf("query box target: %w", err) + } + return &box, nil +} + +// QueryStylesTarget returns the computed styles of the first element matching sel +// as a map of property name to value. +func QueryStylesTarget(ctx context.Context, debugURL, targetID, sel string) (map[string]string, error) { + js := fmt.Sprintf(`(() => { + const el = document.querySelector(%q); + if (!el) throw new Error("element not found: " + %q); + const cs = window.getComputedStyle(el); + const out = {}; + for (let i = 0; i < cs.length; i++) { + const prop = cs[i]; + out[prop] = cs.getPropertyValue(prop); + } + return out; +})()`, sel, sel) + var result map[string]string + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ) + if err != nil { + return nil, fmt.Errorf("query styles target: %w", err) + } + return result, nil +} + +// QueryStylesByBackendNodeID returns the computed styles of an element by backend node ID. +func QueryStylesByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) (map[string]string, error) { + var result map[string]string + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + fn := `function(){ + const cs = window.getComputedStyle(this); + const out = {}; + for (let i = 0; i < cs.length; i++) { + const prop = cs[i]; + out[prop] = cs.getPropertyValue(prop); + } + return out; +}` + val, ex, err := runtime.CallFunctionOn(fn). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return err + } + if ex != nil { + return fmt.Errorf("query styles by backend node: javascript exception") + } + if val != nil && val.Value != nil { + if err := json.Unmarshal(val.Value, &result); err != nil { + return fmt.Errorf("unmarshal result: %w", err) + } + } + return nil + })) + if err != nil { + return nil, fmt.Errorf("query styles target: %w", err) + } + return result, nil +} + +// QueryVisibleTarget returns true if the first element matching sel is visible. +// Visible means: in DOM, not display:none/visibility:hidden, and non-zero dimensions. +func QueryVisibleTarget(ctx context.Context, debugURL, targetID, sel string) (bool, error) { + js := fmt.Sprintf(`(() => { + const el = document.querySelector(%q); + if (!el) return false; + const cs = window.getComputedStyle(el); + if (cs.display === "none" || cs.visibility === "hidden" || cs.opacity === "0") return false; + const r = el.getBoundingClientRect(); + return r.width > 0 && r.height > 0; +})()`, sel) + var result bool + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ) + if err != nil { + return false, fmt.Errorf("query visible target: %w", err) + } + return result, nil +} + +// QueryVisibleByBackendNodeID returns true if the element identified by backend node ID is visible. +func QueryVisibleByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) (bool, error) { + var result bool + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + fn := `function(){ + const cs = window.getComputedStyle(this); + if (cs.display === "none" || cs.visibility === "hidden" || cs.opacity === "0") return false; + const r = this.getBoundingClientRect(); + return r.width > 0 && r.height > 0; +}` + val, ex, err := runtime.CallFunctionOn(fn). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return err + } + if ex != nil { + return fmt.Errorf("query visible by backend node: javascript exception") + } + if val != nil && val.Value != nil { + if err := json.Unmarshal(val.Value, &result); err != nil { + return fmt.Errorf("unmarshal result: %w", err) + } + } + return nil + })) + if err != nil { + return false, fmt.Errorf("query visible target: %w", err) + } + return result, nil +} + +// QueryEnabledTarget returns true if the first element matching sel is not disabled. +func QueryEnabledTarget(ctx context.Context, debugURL, targetID, sel string) (bool, error) { + js := fmt.Sprintf(`(() => { + const el = document.querySelector(%q); + if (!el) return false; + return !el.disabled; +})()`, sel) + var result bool + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ) + if err != nil { + return false, fmt.Errorf("query enabled target: %w", err) + } + return result, nil +} + +// QueryEnabledByBackendNodeID returns true if the element identified by backend node ID is not disabled. +func QueryEnabledByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) (bool, error) { + var result bool + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + val, ex, err := runtime.CallFunctionOn(`function(){ return !this.disabled; }`). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return err + } + if ex != nil { + return fmt.Errorf("query enabled by backend node: javascript exception") + } + if val != nil && val.Value != nil { + if err := json.Unmarshal(val.Value, &result); err != nil { + return fmt.Errorf("unmarshal result: %w", err) + } + } + return nil + })) + if err != nil { + return false, fmt.Errorf("query enabled target: %w", err) + } + return result, nil +} + +// QueryCheckedTarget returns true if the first element matching sel is checked. +func QueryCheckedTarget(ctx context.Context, debugURL, targetID, sel string) (bool, error) { + js := fmt.Sprintf(`(() => { + const el = document.querySelector(%q); + if (!el) return false; + return !!el.checked; +})()`, sel) + var result bool + err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &result, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ) + if err != nil { + return false, fmt.Errorf("query checked target: %w", err) + } + return result, nil +} + +// QueryCheckedByBackendNodeID returns true if the element identified by backend node ID is checked. +func QueryCheckedByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) (bool, error) { + var result bool + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + val, ex, err := runtime.CallFunctionOn(`function(){ return !!this.checked; }`). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return err + } + if ex != nil { + return fmt.Errorf("query checked by backend node: javascript exception") + } + if val != nil && val.Value != nil { + if err := json.Unmarshal(val.Value, &result); err != nil { + return fmt.Errorf("unmarshal result: %w", err) + } + } + return nil + })) + if err != nil { + return false, fmt.Errorf("query checked target: %w", err) + } + return result, nil +} diff --git a/browser/manager_query.go b/browser/manager_query.go new file mode 100644 index 0000000..932af4f --- /dev/null +++ b/browser/manager_query.go @@ -0,0 +1,261 @@ +package browser + +import ( + "context" + "fmt" +) + +// QueryText returns the textContent of the element identified by arg (CSS selector or @eN ref). +func (m *Manager) QueryText(ctx context.Context, sessionName, tabName, arg string) (string, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get text") + if err != nil { + return "", err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return "", err + } + if backendNodeID > 0 { + result, err := QueryTextByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID) + if err != nil { + return "", fmt.Errorf("get text: %w", err) + } + return result, nil + } + result, err := QueryTextTarget(ctx, rt.DebugURL, rt.TargetID, selector) + if err != nil { + return "", fmt.Errorf("get text: %w", err) + } + return result, nil +} + +// QueryHTML returns the innerHTML of the element identified by arg (CSS selector or @eN ref). +func (m *Manager) QueryHTML(ctx context.Context, sessionName, tabName, arg string) (string, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get html") + if err != nil { + return "", err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return "", err + } + if backendNodeID > 0 { + result, err := QueryHTMLByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID) + if err != nil { + return "", fmt.Errorf("get html: %w", err) + } + return result, nil + } + result, err := QueryHTMLTarget(ctx, rt.DebugURL, rt.TargetID, selector) + if err != nil { + return "", fmt.Errorf("get html: %w", err) + } + return result, nil +} + +// QueryValue returns the value property of the element identified by arg (CSS selector or @eN ref). +func (m *Manager) QueryValue(ctx context.Context, sessionName, tabName, arg string) (string, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get value") + if err != nil { + return "", err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return "", err + } + if backendNodeID > 0 { + result, err := QueryValueByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID) + if err != nil { + return "", fmt.Errorf("get value: %w", err) + } + return result, nil + } + result, err := QueryValueTarget(ctx, rt.DebugURL, rt.TargetID, selector) + if err != nil { + return "", fmt.Errorf("get value: %w", err) + } + return result, nil +} + +// QueryAttr returns the value of attr on the element identified by arg (CSS selector or @eN ref). +func (m *Manager) QueryAttr(ctx context.Context, sessionName, tabName, arg, attr string) (string, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get attr") + if err != nil { + return "", err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return "", err + } + if backendNodeID > 0 { + result, err := QueryAttrByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID, attr) + if err != nil { + return "", fmt.Errorf("get attr: %w", err) + } + return result, nil + } + result, err := QueryAttrTarget(ctx, rt.DebugURL, rt.TargetID, selector, attr) + if err != nil { + return "", fmt.Errorf("get attr: %w", err) + } + return result, nil +} + +// QueryTitle returns the document.title of the current page. +func (m *Manager) QueryTitle(ctx context.Context, sessionName, tabName string) (string, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get title") + if err != nil { + return "", err + } + result, err := QueryTitleTarget(ctx, rt.DebugURL, rt.TargetID) + if err != nil { + return "", fmt.Errorf("get title: %w", err) + } + return result, nil +} + +// QueryURL returns the current URL of the tracked tab. +func (m *Manager) QueryURL(ctx context.Context, sessionName, tabName string) (string, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get url") + if err != nil { + return "", err + } + result, err := QueryURLTarget(ctx, rt.DebugURL, rt.TargetID) + if err != nil { + return "", fmt.Errorf("get url: %w", err) + } + return result, nil +} + +// QueryCount returns the number of elements matching sel in the current page. +func (m *Manager) QueryCount(ctx context.Context, sessionName, tabName, sel string) (int, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get count") + if err != nil { + return 0, err + } + result, err := QueryCountTarget(ctx, rt.DebugURL, rt.TargetID, sel) + if err != nil { + return 0, fmt.Errorf("get count: %w", err) + } + return result, nil +} + +// QueryBox returns the bounding box of the element identified by arg (CSS selector or @eN ref). +func (m *Manager) QueryBox(ctx context.Context, sessionName, tabName, arg string) (*BoundingBox, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get box") + if err != nil { + return nil, err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return nil, err + } + if backendNodeID > 0 { + result, err := QueryBoxByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID) + if err != nil { + return nil, fmt.Errorf("get box: %w", err) + } + return result, nil + } + result, err := QueryBoxTarget(ctx, rt.DebugURL, rt.TargetID, selector) + if err != nil { + return nil, fmt.Errorf("get box: %w", err) + } + return result, nil +} + +// QueryStyles returns the computed styles of the element identified by arg (CSS selector or @eN ref). +func (m *Manager) QueryStyles(ctx context.Context, sessionName, tabName, arg string) (map[string]string, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "get styles") + if err != nil { + return nil, err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return nil, err + } + if backendNodeID > 0 { + result, err := QueryStylesByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID) + if err != nil { + return nil, fmt.Errorf("get styles: %w", err) + } + return result, nil + } + result, err := QueryStylesTarget(ctx, rt.DebugURL, rt.TargetID, selector) + if err != nil { + return nil, fmt.Errorf("get styles: %w", err) + } + return result, nil +} + +// QueryVisible returns true if the element identified by arg (CSS selector or @eN ref) is visible. +func (m *Manager) QueryVisible(ctx context.Context, sessionName, tabName, arg string) (bool, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "is visible") + if err != nil { + return false, err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return false, err + } + if backendNodeID > 0 { + result, err := QueryVisibleByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID) + if err != nil { + return false, fmt.Errorf("is visible: %w", err) + } + return result, nil + } + result, err := QueryVisibleTarget(ctx, rt.DebugURL, rt.TargetID, selector) + if err != nil { + return false, fmt.Errorf("is visible: %w", err) + } + return result, nil +} + +// QueryEnabled returns true if the element identified by arg (CSS selector or @eN ref) is not disabled. +func (m *Manager) QueryEnabled(ctx context.Context, sessionName, tabName, arg string) (bool, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "is enabled") + if err != nil { + return false, err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return false, err + } + if backendNodeID > 0 { + result, err := QueryEnabledByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID) + if err != nil { + return false, fmt.Errorf("is enabled: %w", err) + } + return result, nil + } + result, err := QueryEnabledTarget(ctx, rt.DebugURL, rt.TargetID, selector) + if err != nil { + return false, fmt.Errorf("is enabled: %w", err) + } + return result, nil +} + +// QueryChecked returns true if the element identified by arg (CSS selector or @eN ref) is checked. +func (m *Manager) QueryChecked(ctx context.Context, sessionName, tabName, arg string) (bool, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "is checked") + if err != nil { + return false, err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return false, err + } + if backendNodeID > 0 { + result, err := QueryCheckedByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID) + if err != nil { + return false, fmt.Errorf("is checked: %w", err) + } + return result, nil + } + result, err := QueryCheckedTarget(ctx, rt.DebugURL, rt.TargetID, selector) + if err != nil { + return false, fmt.Errorf("is checked: %w", err) + } + return result, nil +} diff --git a/browser/query_test.go b/browser/query_test.go new file mode 100644 index 0000000..b5d18fd --- /dev/null +++ b/browser/query_test.go @@ -0,0 +1,11 @@ +package browser + +import "testing" + +// TestBoundingBox verifies that the BoundingBox struct is exported and usable. +func TestBoundingBox(t *testing.T) { + box := &BoundingBox{X: 10, Y: 20, Width: 100, Height: 50} + if box.X != 10 || box.Y != 20 || box.Width != 100 || box.Height != 50 { + t.Fatalf("unexpected BoundingBox values: %+v", box) + } +} diff --git a/cmd/tap/browser_get.go b/cmd/tap/browser_get.go new file mode 100644 index 0000000..57308ca --- /dev/null +++ b/cmd/tap/browser_get.go @@ -0,0 +1,446 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/urfave/cli/v3" +) + +// browserGetCmd returns the `tap browser get` parent command with subcommands +// for querying page/element properties. +func browserGetCmd() *cli.Command { + return &cli.Command{ + Name: "get", + Usage: "Query element and page properties", + Description: `Read-only element and page queries. Accepts CSS selectors or snapshot refs (@eN). + +Examples: + tap browser get text "h1" + tap browser get html "#content" + tap browser get value "input[name=q]" + tap browser get attr "a.logo" href + tap browser get title + tap browser get url + tap browser get count "li.item" + tap browser get box "#sidebar" + tap browser get styles "button.primary"`, + Commands: []*cli.Command{ + browserGetTextCmd(), + browserGetHTMLCmd(), + browserGetValueCmd(), + browserGetAttrCmd(), + browserGetTitleCmd(), + browserGetURLCmd(), + browserGetCountCmd(), + browserGetBoxCmd(), + browserGetStylesCmd(), + }, + } +} + +// browserIsCmd returns the `tap browser is` parent command with subcommands +// for boolean element state checks. +func browserIsCmd() *cli.Command { + return &cli.Command{ + Name: "is", + Usage: "Check element boolean state", + Description: `Boolean element state checks. Prints "true" or "false" and exits 0. +Accepts CSS selectors or snapshot refs (@eN). + +Examples: + tap browser is visible "#modal" + tap browser is enabled "button[type=submit]" + tap browser is checked "input[type=checkbox]" + tap browser is visible @e3`, + Commands: []*cli.Command{ + browserIsVisibleCmd(), + browserIsEnabledCmd(), + browserIsCheckedCmd(), + }, + } +} + +// --------------------------------------------------------------------------- +// get subcommands +// --------------------------------------------------------------------------- + +func browserGetTextCmd() *cli.Command { + return &cli.Command{ + Name: "text", + Usage: "Get the raw textContent of an element", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print the raw textContent of the first element matching the selector. + +Unlike 'tap browser text' (which runs defuddle for clean markdown), this +returns the unprocessed DOM text — useful for reading specific values. + +Examples: + tap browser get text "h1" + tap browser get text @e2`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryText(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserGetHTMLCmd() *cli.Command { + return &cli.Command{ + Name: "html", + Usage: "Get the innerHTML of an element", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print the innerHTML of the first element matching the selector. + +Examples: + tap browser get html "#article" + tap browser get html @e1`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryHTML(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserGetValueCmd() *cli.Command { + return &cli.Command{ + Name: "value", + Usage: "Get the value property of an input element", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print the .value property of the first element matching the selector. +Works on input, textarea, and select elements. + +Examples: + tap browser get value "input[name=email]" + tap browser get value @e4`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryValue(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserGetAttrCmd() *cli.Command { + return &cli.Command{ + Name: "attr", + Usage: "Get an attribute value from an element", + ArgsUsage: " ", + Flags: browserActionFlags(false), + Description: `Print the value of the named attribute on the first element matching the selector. +Returns an empty string if the attribute is absent. + +Examples: + tap browser get attr "a.logo" href + tap browser get attr "img#banner" src + tap browser get attr @e1 aria-label`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + args := cmd.Args().Slice() + if len(args) < 2 { + return fmt.Errorf("usage: tap browser get attr ") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryAttr(ctx, cmd.String("session"), cmd.String("tab"), args[0], args[1]) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserGetTitleCmd() *cli.Command { + return &cli.Command{ + Name: "title", + Usage: "Get the current page title", + Flags: browserActionFlags(false), + Description: `Print the document.title of the current page. + +Examples: + tap browser get title`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryTitle(ctx, cmd.String("session"), cmd.String("tab")) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserGetURLCmd() *cli.Command { + return &cli.Command{ + Name: "url", + Usage: "Get the current page URL", + Flags: browserActionFlags(false), + Description: `Print the current location.href of the tracked tab. + +Examples: + tap browser get url`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryURL(ctx, cmd.String("session"), cmd.String("tab")) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserGetCountCmd() *cli.Command { + return &cli.Command{ + Name: "count", + Usage: "Count elements matching a CSS selector", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print the number of elements matching the CSS selector. +Note: count does not support @eN refs since snapshot refs address single elements. + +Examples: + tap browser get count "li.result" + tap browser get count "input[type=checkbox]"`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryCount(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserGetBoxCmd() *cli.Command { + return &cli.Command{ + Name: "box", + Usage: "Get the bounding box of an element", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print the bounding box (x, y, width, height in pixels) of the first element +matching the selector, as JSON. + +Examples: + tap browser get box "#sidebar" + tap browser get box @e2`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryBox(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(result) + }, + } +} + +func browserGetStylesCmd() *cli.Command { + return &cli.Command{ + Name: "styles", + Usage: "Get the computed CSS styles of an element", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print the computed style properties of the first element matching the selector, +as a JSON object mapping property names to values. + +Examples: + tap browser get styles "button.primary" + tap browser get styles @e1`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryStyles(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(result) + }, + } +} + +// --------------------------------------------------------------------------- +// is subcommands +// --------------------------------------------------------------------------- + +func browserIsVisibleCmd() *cli.Command { + return &cli.Command{ + Name: "visible", + Usage: "Check if an element is visible", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print "true" if the element exists, is not hidden (display/visibility/opacity), +and has non-zero dimensions. Prints "false" otherwise. Always exits 0. + +Examples: + tap browser is visible "#modal" + tap browser is visible @e3`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryVisible(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserIsEnabledCmd() *cli.Command { + return &cli.Command{ + Name: "enabled", + Usage: "Check if a form element is enabled (not disabled)", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print "true" if the element exists and its .disabled property is false. +Prints "false" if the element is disabled or not found. Always exits 0. + +Examples: + tap browser is enabled "button[type=submit]" + tap browser is enabled @e5`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryEnabled(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} + +func browserIsCheckedCmd() *cli.Command { + return &cli.Command{ + Name: "checked", + Usage: "Check if a checkbox or radio input is checked", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Print "true" if the element exists and its .checked property is true. +Prints "false" otherwise. Always exits 0. + +Examples: + tap browser is checked "input[type=checkbox]#agree" + tap browser is checked @e7`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("selector required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.QueryChecked(ctx, cmd.String("session"), cmd.String("tab"), sel) + if err != nil { + return err + } + fmt.Println(result) + return nil + }, + } +} From f1ebe92cd9be8fb927a6cf3bcbd21b5581163740 Mon Sep 17 00:00:00 2001 From: Vaayne Date: Thu, 11 Jun 2026 23:59:26 +0800 Subject: [PATCH 02/12] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20browser=20inter?= =?UTF-8?q?action=20commands?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dblclick, focus, check/uncheck, scrollintoview, upload, drag, mouse move/down/up/wheel, keyboard type/insert, keydown/keyup. --- browser/cdp_interact.go | 399 ++++++++++++++++++++++++++ browser/manager_interact.go | 279 ++++++++++++++++++ cmd/tap/browser_interact.go | 548 ++++++++++++++++++++++++++++++++++++ 3 files changed, 1226 insertions(+) create mode 100644 browser/cdp_interact.go create mode 100644 browser/manager_interact.go create mode 100644 cmd/tap/browser_interact.go diff --git a/browser/cdp_interact.go b/browser/cdp_interact.go new file mode 100644 index 0000000..675d15e --- /dev/null +++ b/browser/cdp_interact.go @@ -0,0 +1,399 @@ +package browser + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/chromedp/cdproto/cdp" + "github.com/chromedp/cdproto/dom" + "github.com/chromedp/cdproto/input" + "github.com/chromedp/cdproto/runtime" + "github.com/chromedp/chromedp" +) + +// DblClickTarget dispatches a double-click (clickCount=2) on the first +// visible element matching sel. +func DblClickTarget(ctx context.Context, debugURL, targetID, sel string) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.DoubleClick(sel, chromedp.ByQuery, chromedp.NodeVisible), + ); err != nil { + return fmt.Errorf("dblclick target: %w", err) + } + return nil +} + +// DblClickTargetByBackendNodeID dispatches a double-click via box model on a +// node resolved by its BackendNodeID. +func DblClickTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) error { + return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + if err := dom.ScrollIntoViewIfNeeded().WithBackendNodeID(backendNodeID).Do(ctx); err != nil { + return fmt.Errorf("scroll into view: %w", err) + } + box, err := dom.GetBoxModel().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("get box model: %w", err) + } + q := box.Content + x := (q[0] + q[2] + q[4] + q[6]) / 4 + y := (q[1] + q[3] + q[5] + q[7]) / 4 + if err := input.DispatchMouseEvent(input.MouseMoved, x, y).Do(ctx); err != nil { + return err + } + if err := input.DispatchMouseEvent(input.MousePressed, x, y). + WithButton(input.Left).WithClickCount(1).Do(ctx); err != nil { + return err + } + if err := input.DispatchMouseEvent(input.MouseReleased, x, y). + WithButton(input.Left).WithClickCount(1).Do(ctx); err != nil { + return err + } + if err := input.DispatchMouseEvent(input.MousePressed, x, y). + WithButton(input.Left).WithClickCount(2).Do(ctx); err != nil { + return err + } + return input.DispatchMouseEvent(input.MouseReleased, x, y). + WithButton(input.Left).WithClickCount(2).Do(ctx) + })) +} + +// FocusTarget focuses the first visible element matching sel. +func FocusTarget(ctx context.Context, debugURL, targetID, sel string) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.Focus(sel, chromedp.ByQuery, chromedp.NodeVisible), + ); err != nil { + return fmt.Errorf("focus target: %w", err) + } + return nil +} + +// FocusTargetByBackendNodeID focuses the node identified by backendNodeID. +func FocusTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) error { + return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + nodeID, err := resolveNodeID(ctx, backendNodeID) + if err != nil { + return err + } + if err := dom.Focus().WithNodeID(nodeID).Do(ctx); err != nil { + return fmt.Errorf("focus by backend node: %w", err) + } + return nil + })) +} + +// checkboxJS is a React-compatible snippet that reads and conditionally +// toggles a checkbox to match the desired state, then dispatches change/input. +const checkboxJS = ` +(function(sel, want) { + var el = document.querySelector(sel); + if (!el) throw new Error("element not found: " + sel); + if (el.type !== "checkbox") throw new Error("element is not a checkbox: " + sel); + if (el.checked !== want) { + el.click(); + } + return el.checked; +})(%q, %v) +` + +// CheckTarget ensures the checkbox matching sel is checked. +func CheckTarget(ctx context.Context, debugURL, targetID, sel string) error { + js := fmt.Sprintf(checkboxJS, sel, true) + var checked bool + if err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &checked, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ); err != nil { + return fmt.Errorf("check target: %w", err) + } + if !checked { + return fmt.Errorf("check target: element is still unchecked after click") + } + return nil +} + +// UncheckTarget ensures the checkbox matching sel is unchecked. +func UncheckTarget(ctx context.Context, debugURL, targetID, sel string) error { + js := fmt.Sprintf(checkboxJS, sel, false) + var checked bool + if err := withTarget(ctx, debugURL, targetID, + chromedp.Evaluate(js, &checked, func(p *runtime.EvaluateParams) *runtime.EvaluateParams { + return p.WithReturnByValue(true).WithAwaitPromise(true) + }), + ); err != nil { + return fmt.Errorf("uncheck target: %w", err) + } + if checked { + return fmt.Errorf("uncheck target: element is still checked after click") + } + return nil +} + +// checkboxByNodeJS is a React-compatible snippet invoked via CallFunctionOn. +const checkboxByNodeJS = `function(want){ + if (this.type !== "checkbox") throw new Error("element is not a checkbox"); + if (this.checked !== want) { + this.click(); + } + return this.checked; +}` + +// CheckTargetByBackendNodeID ensures a checkbox is checked. +func CheckTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) error { + return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + val, ex, err := runtime.CallFunctionOn(fmt.Sprintf(`function(){ return (%s).call(this, true) }`, checkboxByNodeJS)). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return fmt.Errorf("check by backend node: %w", err) + } + if ex != nil { + return fmt.Errorf("check by backend node: javascript exception") + } + var checked bool + if val != nil { + if jsonErr := json.Unmarshal(val.Value, &checked); jsonErr == nil && !checked { + return fmt.Errorf("check by backend node: element is still unchecked after click") + } + } + return nil + })) +} + +// UncheckTargetByBackendNodeID ensures a checkbox is unchecked. +func UncheckTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) error { + return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + val, ex, err := runtime.CallFunctionOn(fmt.Sprintf(`function(){ return (%s).call(this, false) }`, checkboxByNodeJS)). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return fmt.Errorf("uncheck by backend node: %w", err) + } + if ex != nil { + return fmt.Errorf("uncheck by backend node: javascript exception") + } + var checked bool + if val != nil { + if jsonErr := json.Unmarshal(val.Value, &checked); jsonErr == nil && checked { + return fmt.Errorf("uncheck by backend node: element is still checked after click") + } + } + return nil + })) +} + +// ScrollIntoViewTarget scrolls the first element matching sel into view. +func ScrollIntoViewTarget(ctx context.Context, debugURL, targetID, sel string) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.ScrollIntoView(sel, chromedp.ByQuery), + ); err != nil { + return fmt.Errorf("scrollintoview target: %w", err) + } + return nil +} + +// ScrollIntoViewTargetByBackendNodeID scrolls the node identified by +// backendNodeID into view. +func ScrollIntoViewTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID) error { + return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + if err := dom.ScrollIntoViewIfNeeded().WithBackendNodeID(backendNodeID).Do(ctx); err != nil { + return fmt.Errorf("scrollintoview by backend node: %w", err) + } + return nil + })) +} + +// UploadTarget sets files on the matching sel via +// DOM.setFileInputFiles. +func UploadTarget(ctx context.Context, debugURL, targetID, sel string, files []string) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.SetUploadFiles(sel, files, chromedp.ByQuery), + ); err != nil { + return fmt.Errorf("upload target: %w", err) + } + return nil +} + +// UploadTargetByBackendNodeID sets files on a file input node via +// DOM.setFileInputFiles using BackendNodeID. +func UploadTargetByBackendNodeID(ctx context.Context, debugURL, targetID string, backendNodeID cdp.BackendNodeID, files []string) error { + return withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + if err := dom.SetFileInputFiles(files).WithBackendNodeID(backendNodeID).Do(ctx); err != nil { + return fmt.Errorf("upload by backend node: %w", err) + } + return nil + })) +} + +// DragTarget performs a mouse-based drag from the center of srcSel to the +// center of dstSel using a sequence of mouseMoved→mousePressed→moves→mouseReleased. +func DragTarget(ctx context.Context, debugURL, targetID, srcSel, dstSel string) error { + var srcNodes, dstNodes []*cdp.Node + return withTarget(ctx, debugURL, targetID, + chromedp.Nodes(srcSel, &srcNodes, chromedp.ByQuery, chromedp.NodeVisible), + chromedp.Nodes(dstSel, &dstNodes, chromedp.ByQuery, chromedp.NodeVisible), + chromedp.ActionFunc(func(ctx context.Context) error { + if len(srcNodes) == 0 { + return fmt.Errorf("no element matching src selector %q", srcSel) + } + if len(dstNodes) == 0 { + return fmt.Errorf("no element matching dst selector %q", dstSel) + } + srcBox, err := dom.GetBoxModel().WithNodeID(srcNodes[0].NodeID).Do(ctx) + if err != nil { + return fmt.Errorf("get src box model: %w", err) + } + dstBox, err := dom.GetBoxModel().WithNodeID(dstNodes[0].NodeID).Do(ctx) + if err != nil { + return fmt.Errorf("get dst box model: %w", err) + } + sc := srcBox.Content + sx := (sc[0] + sc[2] + sc[4] + sc[6]) / 4 + sy := (sc[1] + sc[3] + sc[5] + sc[7]) / 4 + dc := dstBox.Content + dx := (dc[0] + dc[2] + dc[4] + dc[6]) / 4 + dy := (dc[1] + dc[3] + dc[5] + dc[7]) / 4 + + // Move to source, press, interpolate to destination, release. + if err := input.DispatchMouseEvent(input.MouseMoved, sx, sy).Do(ctx); err != nil { + return err + } + if err := input.DispatchMouseEvent(input.MousePressed, sx, sy). + WithButton(input.Left).WithClickCount(1).Do(ctx); err != nil { + return err + } + const steps = 10 + for i := 1; i <= steps; i++ { + t := float64(i) / float64(steps) + mx := sx + (dx-sx)*t + my := sy + (dy-sy)*t + if err := input.DispatchMouseEvent(input.MouseMoved, mx, my). + WithButton(input.Left).WithButtons(1).Do(ctx); err != nil { + return err + } + } + return input.DispatchMouseEvent(input.MouseReleased, dx, dy). + WithButton(input.Left).WithClickCount(1).Do(ctx) + }), + ) +} + +// MouseMoveTarget dispatches a mouseMoved event at (x, y). +func MouseMoveTarget(ctx context.Context, debugURL, targetID string, x, y float64) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.ActionFunc(func(ctx context.Context) error { + return input.DispatchMouseEvent(input.MouseMoved, x, y).Do(ctx) + }), + ); err != nil { + return fmt.Errorf("mouse move target: %w", err) + } + return nil +} + +// MouseDownTarget dispatches a mousePressed event at the current cursor +// position with the given button (left|right|middle). +func MouseDownTarget(ctx context.Context, debugURL, targetID string, button input.MouseButton) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.ActionFunc(func(ctx context.Context) error { + return input.DispatchMouseEvent(input.MousePressed, 0, 0). + WithButton(button).WithClickCount(1).Do(ctx) + }), + ); err != nil { + return fmt.Errorf("mouse down target: %w", err) + } + return nil +} + +// MouseUpTarget dispatches a mouseReleased event with the given button. +func MouseUpTarget(ctx context.Context, debugURL, targetID string, button input.MouseButton) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.ActionFunc(func(ctx context.Context) error { + return input.DispatchMouseEvent(input.MouseReleased, 0, 0). + WithButton(button).WithClickCount(1).Do(ctx) + }), + ); err != nil { + return fmt.Errorf("mouse up target: %w", err) + } + return nil +} + +// MouseWheelTarget dispatches a mouseWheel event with the given deltas (dy +// scrolls vertically, dx scrolls horizontally). +func MouseWheelTarget(ctx context.Context, debugURL, targetID string, dy, dx float64) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.ActionFunc(func(ctx context.Context) error { + return input.DispatchMouseEvent(input.MouseWheel, 0, 0). + WithDeltaY(dy).WithDeltaX(dx).Do(ctx) + }), + ); err != nil { + return fmt.Errorf("mouse wheel target: %w", err) + } + return nil +} + +// KeyboardTypeTarget sends per-character keyDown/char/keyUp events for text +// using chromedp.KeyEvent — same as real typing. +func KeyboardTypeTarget(ctx context.Context, debugURL, targetID, text string) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.KeyEvent(text), + ); err != nil { + return fmt.Errorf("keyboard type target: %w", err) + } + return nil +} + +// KeyboardInsertTarget calls Input.insertText — no key events, instant paste. +func KeyboardInsertTarget(ctx context.Context, debugURL, targetID, text string) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.ActionFunc(func(ctx context.Context) error { + return input.InsertText(text).Do(ctx) + }), + ); err != nil { + return fmt.Errorf("keyboard insert target: %w", err) + } + return nil +} + +// KeydownTarget holds a key down (sends a rawKeyDown event). +func KeydownTarget(ctx context.Context, debugURL, targetID, key string) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.ActionFunc(func(ctx context.Context) error { + return input.DispatchKeyEvent(input.KeyRawDown).WithKey(key).WithCode(key).Do(ctx) + }), + ); err != nil { + return fmt.Errorf("keydown target: %w", err) + } + return nil +} + +// KeyupTarget releases a held key (sends a keyUp event). +func KeyupTarget(ctx context.Context, debugURL, targetID, key string) error { + if err := withTarget(ctx, debugURL, targetID, + chromedp.ActionFunc(func(ctx context.Context) error { + return input.DispatchKeyEvent(input.KeyUp).WithKey(key).WithCode(key).Do(ctx) + }), + ); err != nil { + return fmt.Errorf("keyup target: %w", err) + } + return nil +} + +// resolveNodeID is a small helper that maps a BackendNodeID to a NodeID via +// DOM.describeNode, which is cheaper than a full DOM.resolveNode. +func resolveNodeID(ctx context.Context, backendNodeID cdp.BackendNodeID) (cdp.NodeID, error) { + node, err := dom.DescribeNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return 0, fmt.Errorf("describe node: %w", err) + } + return node.NodeID, nil +} diff --git a/browser/manager_interact.go b/browser/manager_interact.go new file mode 100644 index 0000000..86000f1 --- /dev/null +++ b/browser/manager_interact.go @@ -0,0 +1,279 @@ +package browser + +import ( + "context" + "fmt" + + "github.com/chromedp/cdproto/cdp" + "github.com/chromedp/cdproto/input" +) + +// DblClickElement double-clicks an element by CSS selector or snapshot ref. +func (m *Manager) DblClickElement(ctx context.Context, sessionName, tabName, arg string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "dblclick") + if err != nil { + return err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return err + } + if backendNodeID > 0 { + if err := DblClickTargetByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID); err != nil { + return fmt.Errorf("dblclick: %w", err) + } + return nil + } + if err := DblClickTarget(ctx, rt.DebugURL, rt.TargetID, selector); err != nil { + return fmt.Errorf("dblclick: %w", err) + } + return nil +} + +// FocusElement focuses an element by CSS selector or snapshot ref. +func (m *Manager) FocusElement(ctx context.Context, sessionName, tabName, arg string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "focus") + if err != nil { + return err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return err + } + if backendNodeID > 0 { + if err := FocusTargetByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID); err != nil { + return fmt.Errorf("focus: %w", err) + } + return nil + } + if err := FocusTarget(ctx, rt.DebugURL, rt.TargetID, selector); err != nil { + return fmt.Errorf("focus: %w", err) + } + return nil +} + +// CheckElement ensures a checkbox is checked. Accepts CSS selector or snapshot ref. +func (m *Manager) CheckElement(ctx context.Context, sessionName, tabName, arg string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "check") + if err != nil { + return err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return err + } + if backendNodeID > 0 { + if err := CheckTargetByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID); err != nil { + return fmt.Errorf("check: %w", err) + } + return nil + } + if err := CheckTarget(ctx, rt.DebugURL, rt.TargetID, selector); err != nil { + return fmt.Errorf("check: %w", err) + } + return nil +} + +// UncheckElement ensures a checkbox is unchecked. Accepts CSS selector or snapshot ref. +func (m *Manager) UncheckElement(ctx context.Context, sessionName, tabName, arg string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "uncheck") + if err != nil { + return err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return err + } + if backendNodeID > 0 { + if err := UncheckTargetByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID); err != nil { + return fmt.Errorf("uncheck: %w", err) + } + return nil + } + if err := UncheckTarget(ctx, rt.DebugURL, rt.TargetID, selector); err != nil { + return fmt.Errorf("uncheck: %w", err) + } + return nil +} + +// ScrollIntoViewElement scrolls an element into view. Accepts CSS selector or snapshot ref. +func (m *Manager) ScrollIntoViewElement(ctx context.Context, sessionName, tabName, arg string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "scrollintoview") + if err != nil { + return err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return err + } + if backendNodeID > 0 { + if err := ScrollIntoViewTargetByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID); err != nil { + return fmt.Errorf("scrollintoview: %w", err) + } + return nil + } + if err := ScrollIntoViewTarget(ctx, rt.DebugURL, rt.TargetID, selector); err != nil { + return fmt.Errorf("scrollintoview: %w", err) + } + return nil +} + +// UploadFiles sets files on a file input element. Accepts CSS selector or snapshot ref. +func (m *Manager) UploadFiles(ctx context.Context, sessionName, tabName, arg string, files []string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "upload") + if err != nil { + return err + } + selector, backendNodeID, err := m.resolveElementArg(ctx, rt, arg) + if err != nil { + return err + } + if backendNodeID > 0 { + if err := UploadTargetByBackendNodeID(ctx, rt.DebugURL, rt.TargetID, backendNodeID, files); err != nil { + return fmt.Errorf("upload: %w", err) + } + return nil + } + if err := UploadTarget(ctx, rt.DebugURL, rt.TargetID, selector, files); err != nil { + return fmt.Errorf("upload: %w", err) + } + return nil +} + +// Drag performs a mouse-based drag from srcArg to dstArg. Both accept CSS +// selectors (snapshot refs are CSS-selector-only for drag since two elements +// are needed). +func (m *Manager) Drag(ctx context.Context, sessionName, tabName, srcArg, dstArg string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "drag") + if err != nil { + return err + } + // Drag always operates on CSS selectors; resolve @eN refs to selector hints. + srcSel, srcNodeID, err := m.resolveElementArg(ctx, rt, srcArg) + if err != nil { + return err + } + dstSel, dstNodeID, err := m.resolveElementArg(ctx, rt, dstArg) + if err != nil { + return err + } + // For drag with backendNodeIDs we fall back to selector hints since + // DragTarget uses CSS selector-based box model resolution. + if srcNodeID > 0 { + srcSel = selectorFromNodeID(srcSel, srcNodeID) + } + if dstNodeID > 0 { + dstSel = selectorFromNodeID(dstSel, dstNodeID) + } + if err := DragTarget(ctx, rt.DebugURL, rt.TargetID, srcSel, dstSel); err != nil { + return fmt.Errorf("drag: %w", err) + } + return nil +} + +// selectorFromNodeID returns the selector hint when available, otherwise falls +// back to a data-attribute pseudo-selector for @eN refs. In practice the +// Manager always populates SelectorHint for resolved refs, so the fallback is +// a safety net. +func selectorFromNodeID(selectorHint string, _ cdp.BackendNodeID) string { + if selectorHint != "" { + return selectorHint + } + return "*" +} + +// MouseMove dispatches a mouseMoved event at (x, y). +func (m *Manager) MouseMove(ctx context.Context, sessionName, tabName string, x, y float64) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "mouse move") + if err != nil { + return err + } + if err := MouseMoveTarget(ctx, rt.DebugURL, rt.TargetID, x, y); err != nil { + return fmt.Errorf("mouse move: %w", err) + } + return nil +} + +// MouseDown dispatches a mousePressed event with the given button. +func (m *Manager) MouseDown(ctx context.Context, sessionName, tabName string, button input.MouseButton) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "mouse down") + if err != nil { + return err + } + if err := MouseDownTarget(ctx, rt.DebugURL, rt.TargetID, button); err != nil { + return fmt.Errorf("mouse down: %w", err) + } + return nil +} + +// MouseUp dispatches a mouseReleased event with the given button. +func (m *Manager) MouseUp(ctx context.Context, sessionName, tabName string, button input.MouseButton) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "mouse up") + if err != nil { + return err + } + if err := MouseUpTarget(ctx, rt.DebugURL, rt.TargetID, button); err != nil { + return fmt.Errorf("mouse up: %w", err) + } + return nil +} + +// MouseWheel dispatches a mouseWheel event with the given deltas. +func (m *Manager) MouseWheel(ctx context.Context, sessionName, tabName string, dy, dx float64) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "mouse wheel") + if err != nil { + return err + } + if err := MouseWheelTarget(ctx, rt.DebugURL, rt.TargetID, dy, dx); err != nil { + return fmt.Errorf("mouse wheel: %w", err) + } + return nil +} + +// KeyboardType sends per-character key events for text (like real typing). +func (m *Manager) KeyboardType(ctx context.Context, sessionName, tabName, text string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "keyboard type") + if err != nil { + return err + } + if err := KeyboardTypeTarget(ctx, rt.DebugURL, rt.TargetID, text); err != nil { + return fmt.Errorf("keyboard type: %w", err) + } + return nil +} + +// KeyboardInsert inserts text instantly via Input.insertText (no key events). +func (m *Manager) KeyboardInsert(ctx context.Context, sessionName, tabName, text string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "keyboard insert") + if err != nil { + return err + } + if err := KeyboardInsertTarget(ctx, rt.DebugURL, rt.TargetID, text); err != nil { + return fmt.Errorf("keyboard insert: %w", err) + } + return nil +} + +// Keydown holds a key down. +func (m *Manager) Keydown(ctx context.Context, sessionName, tabName, key string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "keydown") + if err != nil { + return err + } + if err := KeydownTarget(ctx, rt.DebugURL, rt.TargetID, key); err != nil { + return fmt.Errorf("keydown: %w", err) + } + return nil +} + +// Keyup releases a held key. +func (m *Manager) Keyup(ctx context.Context, sessionName, tabName, key string) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "keyup") + if err != nil { + return err + } + if err := KeyupTarget(ctx, rt.DebugURL, rt.TargetID, key); err != nil { + return fmt.Errorf("keyup: %w", err) + } + return nil +} diff --git a/cmd/tap/browser_interact.go b/cmd/tap/browser_interact.go new file mode 100644 index 0000000..5938c66 --- /dev/null +++ b/cmd/tap/browser_interact.go @@ -0,0 +1,548 @@ +package main + +import ( + "context" + "fmt" + "os" + "strconv" + + "github.com/chromedp/cdproto/input" + "github.com/urfave/cli/v3" +) + +func browserDblclickCmd() *cli.Command { + return &cli.Command{ + Name: "dblclick", + Usage: "Double-click an element by CSS selector or snapshot ref", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Dispatch a real double-click (clickCount=2) on the first visible element. + +Examples: + tap browser dblclick "td.editable" + tap browser dblclick @e3`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("CSS selector or @eN ref required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.DblClickElement(ctx, cmd.String("session"), cmd.String("tab"), sel); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Double-clicked %s\n", sel) + return nil + }, + } +} + +func browserFocusCmd() *cli.Command { + return &cli.Command{ + Name: "focus", + Usage: "Focus an element by CSS selector or snapshot ref", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Move browser focus to the element matching the selector. + +Examples: + tap browser focus "input[name=email]" + tap browser focus @e1`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("CSS selector or @eN ref required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.FocusElement(ctx, cmd.String("session"), cmd.String("tab"), sel); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Focused %s\n", sel) + return nil + }, + } +} + +func browserCheckCmd() *cli.Command { + return &cli.Command{ + Name: "check", + Usage: "Ensure a checkbox is checked", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Read the current checkbox state and click only if unchecked. +Dispatches React-compatible events on state change. + +Examples: + tap browser check "input[name=agree]" + tap browser check @e4`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("CSS selector or @eN ref required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.CheckElement(ctx, cmd.String("session"), cmd.String("tab"), sel); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Checked %s\n", sel) + return nil + }, + } +} + +func browserUncheckCmd() *cli.Command { + return &cli.Command{ + Name: "uncheck", + Usage: "Ensure a checkbox is unchecked", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Read the current checkbox state and click only if checked. +Dispatches React-compatible events on state change. + +Examples: + tap browser uncheck "input[name=newsletter]" + tap browser uncheck @e5`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("CSS selector or @eN ref required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.UncheckElement(ctx, cmd.String("session"), cmd.String("tab"), sel); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Unchecked %s\n", sel) + return nil + }, + } +} + +func browserScrollIntoViewCmd() *cli.Command { + return &cli.Command{ + Name: "scrollintoview", + Usage: "Scroll an element into the viewport", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Scroll the element matching the selector into view using DOM.scrollIntoViewIfNeeded. + +Examples: + tap browser scrollintoview "#footer" + tap browser scrollintoview @e7`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + sel := cmd.Args().First() + if sel == "" { + return fmt.Errorf("CSS selector or @eN ref required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.ScrollIntoViewElement(ctx, cmd.String("session"), cmd.String("tab"), sel); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Scrolled %s into view\n", sel) + return nil + }, + } +} + +func browserUploadCmd() *cli.Command { + return &cli.Command{ + Name: "upload", + Usage: "Set files on a file input element", + ArgsUsage: " [file...]", + Flags: browserActionFlags(false), + Description: `Set one or more local file paths on a element via +DOM.setFileInputFiles. The files must exist on the local filesystem. + +Examples: + tap browser upload "input[type=file]" /tmp/report.pdf + tap browser upload "#avatar" photo.jpg`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + args := cmd.Args().Slice() + if len(args) < 2 { + return fmt.Errorf("usage: tap browser upload [file...]") + } + sel := args[0] + files := args[1:] + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.UploadFiles(ctx, cmd.String("session"), cmd.String("tab"), sel, files); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Uploaded %d file(s) to %s\n", len(files), sel) + return nil + }, + } +} + +func browserDragCmd() *cli.Command { + return &cli.Command{ + Name: "drag", + Usage: "Mouse-based drag and drop from source to destination", + ArgsUsage: " ", + Flags: browserActionFlags(false), + Description: `Perform a real mouse drag: move→press→interpolate→release. + +Both arguments are CSS selectors (or snapshot refs whose selector hints are used). + +Examples: + tap browser drag ".card" ".dropzone" + tap browser drag @e2 @e8`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + args := cmd.Args().Slice() + if len(args) < 2 { + return fmt.Errorf("usage: tap browser drag ") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.Drag(ctx, cmd.String("session"), cmd.String("tab"), args[0], args[1]); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Dragged %s → %s\n", args[0], args[1]) + return nil + }, + } +} + +// browserMouseCmd returns the "mouse" parent command with move/down/up/wheel sub-commands. +func browserMouseCmd() *cli.Command { + return &cli.Command{ + Name: "mouse", + Usage: "Low-level mouse event dispatch", + Description: `Dispatch individual mouse events. + +Sub-commands: + move Move cursor to absolute position + down [button] Press button (left|right|middle, default left) + up [button] Release button + wheel [dx] Scroll vertically (and optionally horizontally)`, + Commands: []*cli.Command{ + browserMouseMoveCmd(), + browserMouseDownCmd(), + browserMouseUpCmd(), + browserMouseWheelCmd(), + }, + } +} + +func browserMouseMoveCmd() *cli.Command { + return &cli.Command{ + Name: "move", + Usage: "Move the mouse cursor to absolute coordinates", + ArgsUsage: " ", + Flags: browserActionFlags(false), + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + args := cmd.Args().Slice() + if len(args) < 2 { + return fmt.Errorf("usage: tap browser mouse move ") + } + x, err := strconv.ParseFloat(args[0], 64) + if err != nil { + return fmt.Errorf("invalid x: %w", err) + } + y, err := strconv.ParseFloat(args[1], 64) + if err != nil { + return fmt.Errorf("invalid y: %w", err) + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.MouseMove(ctx, cmd.String("session"), cmd.String("tab"), x, y); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Mouse moved to (%s, %s)\n", args[0], args[1]) + return nil + }, + } +} + +func resolveMouseButton(name string) (input.MouseButton, error) { + switch name { + case "", "left": + return input.Left, nil + case "right": + return input.Right, nil + case "middle": + return input.Middle, nil + default: + return input.None, fmt.Errorf("unknown button %q: use left, right, or middle", name) + } +} + +func browserMouseDownCmd() *cli.Command { + return &cli.Command{ + Name: "down", + Usage: "Press a mouse button", + ArgsUsage: "[button]", + Flags: browserActionFlags(false), + Description: `Dispatch a mousePressed event. button defaults to left. + +Examples: + tap browser mouse down + tap browser mouse down right`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + btn, err := resolveMouseButton(cmd.Args().First()) + if err != nil { + return err + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.MouseDown(ctx, cmd.String("session"), cmd.String("tab"), btn); err != nil { + return err + } + btnName := string(btn) + if btnName == "" { + btnName = "left" + } + fmt.Fprintf(os.Stderr, "Mouse %s button down\n", btnName) + return nil + }, + } +} + +func browserMouseUpCmd() *cli.Command { + return &cli.Command{ + Name: "up", + Usage: "Release a mouse button", + ArgsUsage: "[button]", + Flags: browserActionFlags(false), + Description: `Dispatch a mouseReleased event. button defaults to left. + +Examples: + tap browser mouse up + tap browser mouse up right`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + btn, err := resolveMouseButton(cmd.Args().First()) + if err != nil { + return err + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.MouseUp(ctx, cmd.String("session"), cmd.String("tab"), btn); err != nil { + return err + } + btnName := string(btn) + if btnName == "" { + btnName = "left" + } + fmt.Fprintf(os.Stderr, "Mouse %s button up\n", btnName) + return nil + }, + } +} + +func browserMouseWheelCmd() *cli.Command { + return &cli.Command{ + Name: "wheel", + Usage: "Dispatch a mouse wheel scroll event", + ArgsUsage: " [dx]", + Flags: browserActionFlags(false), + Description: `Dispatch a mouseWheel event. dy scrolls vertically (positive = down), +dx scrolls horizontally (default 0). + +Examples: + tap browser mouse wheel 300 + tap browser mouse wheel -200 50`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + args := cmd.Args().Slice() + if len(args) < 1 { + return fmt.Errorf("usage: tap browser mouse wheel [dx]") + } + dy, err := strconv.ParseFloat(args[0], 64) + if err != nil { + return fmt.Errorf("invalid dy: %w", err) + } + var dx float64 + if len(args) >= 2 { + dx, err = strconv.ParseFloat(args[1], 64) + if err != nil { + return fmt.Errorf("invalid dx: %w", err) + } + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.MouseWheel(ctx, cmd.String("session"), cmd.String("tab"), dy, dx); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Mouse wheel dy=%s dx=%s\n", args[0], func() string { + if len(args) >= 2 { + return args[1] + } + return "0" + }()) + return nil + }, + } +} + +// browserKeyboardCmd returns the "keyboard" parent command with type/insert sub-commands. +func browserKeyboardCmd() *cli.Command { + return &cli.Command{ + Name: "keyboard", + Usage: "Low-level keyboard event dispatch", + Description: `Dispatch keyboard events at the current focus point. + +Sub-commands: + type Send per-character key events (real typing) + insert Paste text instantly via Input.insertText (no key events)`, + Commands: []*cli.Command{ + browserKeyboardTypeCmd(), + browserKeyboardInsertCmd(), + }, + } +} + +func browserKeyboardTypeCmd() *cli.Command { + return &cli.Command{ + Name: "type", + Usage: "Type text at current focus with real key events", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Send per-character keyDown/char/keyUp events for the given text. +Use this when the site validates per-keystroke input. + +Examples: + tap browser keyboard type "hello world"`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + text := cmd.Args().First() + if text == "" { + return fmt.Errorf("text required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.KeyboardType(ctx, cmd.String("session"), cmd.String("tab"), text); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Typed %q\n", text) + return nil + }, + } +} + +func browserKeyboardInsertCmd() *cli.Command { + return &cli.Command{ + Name: "insert", + Usage: "Insert text instantly without key events", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Insert text via Input.insertText — no keyDown/keyUp events are dispatched. +Faster than 'type' but bypasses keystroke listeners. + +Examples: + tap browser keyboard insert "paste this text"`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + text := cmd.Args().First() + if text == "" { + return fmt.Errorf("text required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.KeyboardInsert(ctx, cmd.String("session"), cmd.String("tab"), text); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Inserted %q\n", text) + return nil + }, + } +} + +func browserKeydownCmd() *cli.Command { + return &cli.Command{ + Name: "keydown", + Usage: "Hold a key down", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Dispatch a rawKeyDown event for the given key name. +Use 'keyup' to release. Standard key names: Enter, Tab, Escape, Shift, Control, etc. + +Examples: + tap browser keydown Shift + tap browser keydown Control`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + key := cmd.Args().First() + if key == "" { + return fmt.Errorf("key required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.Keydown(ctx, cmd.String("session"), cmd.String("tab"), key); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Key down: %s\n", key) + return nil + }, + } +} + +func browserKeyupCmd() *cli.Command { + return &cli.Command{ + Name: "keyup", + Usage: "Release a held key", + ArgsUsage: "", + Flags: browserActionFlags(false), + Description: `Dispatch a keyUp event for the given key name. + +Examples: + tap browser keyup Shift + tap browser keyup Control`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + key := cmd.Args().First() + if key == "" { + return fmt.Errorf("key required") + } + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + if err := mgr.Keyup(ctx, cmd.String("session"), cmd.String("tab"), key); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Key up: %s\n", key) + return nil + }, + } +} From 14efaaa6d06c45c2e02122835a5589b05de948d6 Mon Sep 17 00:00:00 2001 From: Vaayne Date: Thu, 11 Jun 2026 23:59:26 +0800 Subject: [PATCH 03/12] =?UTF-8?q?=E2=9C=A8=20feat:=20extend=20browser=20wa?= =?UTF-8?q?it=20with=20text/url/load/fn/state=20modes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wait now supports plain durations, --text substring, --url glob, --load load|domcontentloaded|networkidle, --fn JS polling, and --state visible|hidden|attached|detached. Also replaces the open --wait-selector 2s-sleep stub with a real visibility wait. --- browser/cdp_wait.go | 353 +++++++++++++++++++++++++++++++++++ browser/cdp_wait_test.go | 38 ++++ browser/manager_wait.go | 71 +++++++ cmd/tap/browser_action.go | 34 ---- cmd/tap/browser_simple.go | 14 +- cmd/tap/browser_wait.go | 248 ++++++++++++++++++++++++ cmd/tap/browser_wait_test.go | 89 +++++++++ 7 files changed, 806 insertions(+), 41 deletions(-) create mode 100644 browser/cdp_wait.go create mode 100644 browser/cdp_wait_test.go create mode 100644 browser/manager_wait.go create mode 100644 cmd/tap/browser_wait.go create mode 100644 cmd/tap/browser_wait_test.go diff --git a/browser/cdp_wait.go b/browser/cdp_wait.go new file mode 100644 index 0000000..5f0b97c --- /dev/null +++ b/browser/cdp_wait.go @@ -0,0 +1,353 @@ +package browser + +import ( + "context" + "fmt" + "path" + "strings" + "time" + + "github.com/chromedp/chromedp" +) + +// ElementState describes which DOM readiness condition to wait for. +type ElementState int + +const ( + // ElementVisible waits for the element to be present and visible. + ElementVisible ElementState = iota + // ElementHidden waits for the element to be absent from the layout + // (hidden or removed from the DOM). + ElementHidden + // ElementAttached waits for the element to be present in the DOM + // (does not require it to be visible). + ElementAttached + // ElementDetached waits for the element to be absent from the DOM. + ElementDetached +) + +// LoadState names a page-load event to wait for. +type LoadState string + +const ( + LoadStateLoad LoadState = "load" + LoadStateDOMContentLoaded LoadState = "domcontentloaded" + // LoadStateNetworkIdle polls until no network activity for ~500 ms. + // Implemented via JS: checks window.performance.getEntriesByType("resource") + // against a snapshot taken 500 ms earlier. This is a heuristic — it fires + // once the browser has not started any new resource fetches for half a second. + LoadStateNetworkIdle LoadState = "networkidle" +) + +const pollInterval = 100 * time.Millisecond + +// WaitForElementTarget waits for a CSS selector to reach the desired DOM state. +// Supported states: visible, hidden, attached, detached. +func WaitForElementTarget(ctx context.Context, debugURL, targetID, sel string, state ElementState, timeout time.Duration) error { + waitCtx := ctx + if timeout > 0 { + var cancel context.CancelFunc + waitCtx, cancel = context.WithTimeout(ctx, timeout) + defer cancel() + } + + var action chromedp.Action + switch state { + case ElementVisible: + action = chromedp.WaitVisible(sel, chromedp.ByQuery) + case ElementHidden: + action = chromedp.WaitNotPresent(sel, chromedp.ByQuery) + case ElementAttached: + action = chromedp.WaitReady(sel, chromedp.ByQuery) + case ElementDetached: + action = chromedp.WaitNotPresent(sel, chromedp.ByQuery) + default: + return fmt.Errorf("wait element: unknown state %d", state) + } + + if err := withTarget(waitCtx, debugURL, targetID, action); err != nil { + return fmt.Errorf("wait element %q: %w", sel, err) + } + return nil +} + +// WaitForTextTarget polls until document.body.innerText contains the given substring. +// Polling interval: 100 ms. +func WaitForTextTarget(ctx context.Context, debugURL, targetID, text string, timeout time.Duration) error { + waitCtx := ctx + if timeout > 0 { + var cancel context.CancelFunc + waitCtx, cancel = context.WithTimeout(ctx, timeout) + defer cancel() + } + + // Escape the text for embedding in a JS string literal. + escaped := strings.ReplaceAll(text, `\`, `\\`) + escaped = strings.ReplaceAll(escaped, `"`, `\"`) + js := fmt.Sprintf(`document.body && document.body.innerText.includes("%s")`, escaped) + + if err := withTarget(waitCtx, debugURL, targetID, pollUntilTrue(waitCtx, js)); err != nil { + return fmt.Errorf("wait text %q: %w", text, err) + } + return nil +} + +// WaitForURLTarget polls until the page's location.href matches the glob pattern. +// Pattern supports * (any non-separator sequence) and ** (any sequence including /). +// Uses path.Match semantics with ** expanded before matching. The comparison is +// against the full URL string. +func WaitForURLTarget(ctx context.Context, debugURL, targetID, glob string, timeout time.Duration) error { + waitCtx := ctx + if timeout > 0 { + var cancel context.CancelFunc + waitCtx, cancel = context.WithTimeout(ctx, timeout) + defer cancel() + } + + checkFn := func(current string) (bool, error) { + return matchURLGlob(glob, current), nil + } + + if err := withTarget(waitCtx, debugURL, targetID, pollUntilURL(waitCtx, checkFn)); err != nil { + return fmt.Errorf("wait url %q: %w", glob, err) + } + return nil +} + +// WaitForLoadTarget waits for a named page-load event. +// "load" and "domcontentloaded" use chromedp's built-in page events. +// "networkidle" polls until no new resource entries appear for ~500 ms. +func WaitForLoadTarget(ctx context.Context, debugURL, targetID string, state LoadState, timeout time.Duration) error { + waitCtx := ctx + if timeout > 0 { + var cancel context.CancelFunc + waitCtx, cancel = context.WithTimeout(ctx, timeout) + defer cancel() + } + + var action chromedp.Action + switch state { + case LoadStateLoad: + action = chromedp.WaitReady("body", chromedp.ByQuery) + case LoadStateDOMContentLoaded: + // document.readyState reaches "interactive" at DOMContentLoaded. + action = pollUntilTrue(waitCtx, `document.readyState === "interactive" || document.readyState === "complete"`) + case LoadStateNetworkIdle: + // networkidle heuristic: poll JS performance entries. We capture the + // count of resource entries, wait 500 ms, and fire if the count hasn't + // grown. This is approximate but works for the common SPA case. + action = chromedp.ActionFunc(func(ctx context.Context) error { + return waitNetworkIdle(ctx) + }) + default: + return fmt.Errorf("wait load: unknown state %q", state) + } + + if err := withTarget(waitCtx, debugURL, targetID, action); err != nil { + return fmt.Errorf("wait load %q: %w", state, err) + } + return nil +} + +// WaitForFnTarget polls until the given JS expression evaluates to a truthy value. +// The expression is run directly via Runtime.evaluate — it is the caller's +// responsibility to ensure it is safe. +func WaitForFnTarget(ctx context.Context, debugURL, targetID, jsExpr string, timeout time.Duration) error { + waitCtx := ctx + if timeout > 0 { + var cancel context.CancelFunc + waitCtx, cancel = context.WithTimeout(ctx, timeout) + defer cancel() + } + + if err := withTarget(waitCtx, debugURL, targetID, pollUntilTrue(waitCtx, jsExpr)); err != nil { + return fmt.Errorf("wait fn: %w", err) + } + return nil +} + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +// pollUntilTrue returns a chromedp action that ticks every 100 ms and runs +// jsExpr until it returns a truthy JS value or the context is done. +func pollUntilTrue(ctx context.Context, jsExpr string) chromedp.Action { + return chromedp.ActionFunc(func(actCtx context.Context) error { + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + for { + var result bool + if err := chromedp.Evaluate(jsExpr, &result).Do(actCtx); err == nil && result { + return nil + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-actCtx.Done(): + return actCtx.Err() + case <-ticker.C: + } + } + }) +} + +type urlCheckFn func(current string) (bool, error) + +// pollUntilURL polls location.href every 100 ms and applies checkFn. +func pollUntilURL(ctx context.Context, checkFn urlCheckFn) chromedp.Action { + return chromedp.ActionFunc(func(actCtx context.Context) error { + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + for { + var current string + if err := chromedp.Evaluate(`location.href`, ¤t).Do(actCtx); err == nil { + if ok, err := checkFn(current); err != nil { + return err + } else if ok { + return nil + } + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-actCtx.Done(): + return actCtx.Err() + case <-ticker.C: + } + } + }) +} + +// waitNetworkIdle fires once no new resource entries have appeared for 500 ms. +// It is called inside a withTarget action context. +func waitNetworkIdle(ctx context.Context) error { + const quiescentWindow = 500 * time.Millisecond + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + var prevCount int + quiescentSince := time.Time{} + + // Seed the initial count. + var initial float64 + if err := chromedp.Evaluate( + `performance.getEntriesByType("resource").length`, &initial, + ).Do(ctx); err == nil { + prevCount = int(initial) + } + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + var countF float64 + if err := chromedp.Evaluate( + `performance.getEntriesByType("resource").length`, &countF, + ).Do(ctx); err != nil { + continue + } + current := int(countF) + if current != prevCount { + prevCount = current + quiescentSince = time.Time{} + continue + } + // Count unchanged — start or extend the quiet window. + if quiescentSince.IsZero() { + quiescentSince = time.Now() + } + if time.Since(quiescentSince) >= quiescentWindow { + return nil + } + } + } +} + +// matchURLGlob matches url against a glob pattern. +// Supports * (matches any char except /) and ** (matches anything including /). +// Pattern semantics documented in cmd/tap/browser_wait.go. +func matchURLGlob(pattern, url string) bool { + // Fast path: no wildcards. + if !strings.ContainsAny(pattern, "*?") { + return strings.Contains(url, pattern) + } + + // ** must be handled before path.Match which doesn't support **. + // Strategy: replace ** with a placeholder, split on it, and check that all + // literal segments appear in order in the url. + if strings.Contains(pattern, "**") { + return matchDoubleStarGlob(pattern, url) + } + + // Single-* glob: use path.Match on the full URL string. + // path.Match treats / literally, so * does not cross /. + matched, err := path.Match(pattern, url) + return err == nil && matched +} + +// matchDoubleStarGlob handles patterns with ** by splitting on ** segments and +// verifying that between each pair of ** anchors the remaining literal (or +// single-*) segments appear in order within the URL. +func matchDoubleStarGlob(pattern, url string) bool { + parts := strings.Split(pattern, "**") + remaining := url + for i, part := range parts { + if part == "" { + continue + } + // Replace single-* within this segment with a wildcard character for + // simple prefix/suffix matching. For a full segment match we use + // path.Match on a fake path segment. + idx := indexGlobSegment(part, remaining) + if idx < 0 { + return false + } + remaining = remaining[idx+expandedLen(part, remaining, idx):] + _ = i + } + return true +} + +// indexGlobSegment finds the first occurrence of the literal/glob segment +// `pat` inside `s` and returns the byte offset, or -1 if not found. +// Only plain strings (no wildcards) are handled here; single-* falls back +// to a linear scan. +func indexGlobSegment(pat, s string) int { + if !strings.ContainsAny(pat, "*?") { + return strings.Index(s, pat) + } + // Brute-force: try matching path.Match(pat, s[i:]) for each i. + for i := range len(s) { + sub := s[i:] + if ok, err := path.Match(pat, sub); err == nil && ok { + return i + } + // Try on each / boundary for path patterns. + if sub != "" { + next := strings.IndexByte(sub[1:], '/') + if next < 0 { + break + } + } + } + return -1 +} + +// expandedLen returns the byte length of the portion of s starting at offset +// that matches pat (used after indexGlobSegment to advance the pointer). +func expandedLen(pat, s string, offset int) int { + if !strings.ContainsAny(pat, "*?") { + return len(pat) + } + // For wildcard segments try increasing lengths until path.Match stops matching. + sub := s[offset:] + best := 0 + for n := 1; n <= len(sub); n++ { + if ok, err := path.Match(pat, sub[:n]); err == nil && ok { + best = n + } + } + return best +} diff --git a/browser/cdp_wait_test.go b/browser/cdp_wait_test.go new file mode 100644 index 0000000..7eedbfd --- /dev/null +++ b/browser/cdp_wait_test.go @@ -0,0 +1,38 @@ +package browser + +import ( + "testing" +) + +func TestMatchURLGlob(t *testing.T) { + tests := []struct { + pattern string + url string + want bool + }{ + // Exact substring match (no wildcards) + {"example.com", "https://example.com/path", true}, + {"notfound", "https://example.com/path", false}, + + // Single-star glob (does not cross /) + {"https://example.com/*", "https://example.com/page", true}, + {"https://example.com/*", "https://example.com/a/b", false}, + + // Double-star glob (crosses /) + {"**/dashboard", "https://app.example.com/user/dashboard", true}, + {"**/dashboard", "https://app.example.com/other", false}, + {"https://**", "https://example.com/path", true}, + {"https://**/page", "https://example.com/nested/deeply/page", true}, + + // Mixed + {"**/user/**/profile", "https://app.example.com/user/123/profile", true}, + {"**/user/**/profile", "https://app.example.com/team/profile", false}, + } + + for _, tt := range tests { + got := matchURLGlob(tt.pattern, tt.url) + if got != tt.want { + t.Errorf("matchURLGlob(%q, %q) = %v; want %v", tt.pattern, tt.url, got, tt.want) + } + } +} diff --git a/browser/manager_wait.go b/browser/manager_wait.go new file mode 100644 index 0000000..a9bdc0b --- /dev/null +++ b/browser/manager_wait.go @@ -0,0 +1,71 @@ +package browser + +import ( + "context" + "fmt" + "time" +) + +// WaitForElement waits until the element matching sel reaches the given DOM +// state in a tracked tab. +func (m *Manager) WaitForElement(ctx context.Context, sessionName, tabName, sel string, state ElementState, timeout time.Duration) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "wait element") + if err != nil { + return err + } + if err := WaitForElementTarget(ctx, rt.DebugURL, rt.TargetID, sel, state, timeout); err != nil { + return fmt.Errorf("wait element: %w", err) + } + return nil +} + +// WaitForText waits until document.body.innerText contains the given substring +// in a tracked tab. +func (m *Manager) WaitForText(ctx context.Context, sessionName, tabName, text string, timeout time.Duration) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "wait text") + if err != nil { + return err + } + if err := WaitForTextTarget(ctx, rt.DebugURL, rt.TargetID, text, timeout); err != nil { + return fmt.Errorf("wait text: %w", err) + } + return nil +} + +// WaitForURL waits until the page's location.href matches the glob pattern in a +// tracked tab. +func (m *Manager) WaitForURL(ctx context.Context, sessionName, tabName, glob string, timeout time.Duration) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "wait url") + if err != nil { + return err + } + if err := WaitForURLTarget(ctx, rt.DebugURL, rt.TargetID, glob, timeout); err != nil { + return fmt.Errorf("wait url: %w", err) + } + return nil +} + +// WaitForLoad waits for a named page-load event in a tracked tab. +func (m *Manager) WaitForLoad(ctx context.Context, sessionName, tabName string, state LoadState, timeout time.Duration) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "wait load") + if err != nil { + return err + } + if err := WaitForLoadTarget(ctx, rt.DebugURL, rt.TargetID, state, timeout); err != nil { + return fmt.Errorf("wait load: %w", err) + } + return nil +} + +// WaitForFn waits until the given JS expression evaluates to a truthy value in +// a tracked tab. +func (m *Manager) WaitForFn(ctx context.Context, sessionName, tabName, jsExpr string, timeout time.Duration) error { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "wait fn") + if err != nil { + return err + } + if err := WaitForFnTarget(ctx, rt.DebugURL, rt.TargetID, jsExpr, timeout); err != nil { + return fmt.Errorf("wait fn: %w", err) + } + return nil +} diff --git a/cmd/tap/browser_action.go b/cmd/tap/browser_action.go index 3058cf5..85ee92b 100644 --- a/cmd/tap/browser_action.go +++ b/cmd/tap/browser_action.go @@ -806,40 +806,6 @@ For custom dropdown components (React Select, etc.), use 'click' instead.`, } } -func browserWaitCmd() *cli.Command { - return &cli.Command{ - Name: "wait", - Usage: "Wait for an element to become visible", - ArgsUsage: "", - Flags: append(browserActionFlags(false), - &cli.DurationFlag{ - Name: "timeout", - Usage: "Max time to wait", - Value: 30 * time.Second, - }, - ), - Description: `Wait until the first element matching the CSS selector becomes visible. -Uses CDP's built-in visibility polling — more reliable than JS-based polling.`, - Action: func(ctx context.Context, cmd *cli.Command) error { - configureLogging(cmd) - sel := cmd.Args().First() - if sel == "" { - return fmt.Errorf("CSS selector required") - } - mgr, err := newBrowserManager(cmd) - if err != nil { - return err - } - timeout := cmd.Duration("timeout") - if err := mgr.WaitFor(ctx, cmd.String("session"), cmd.String("tab"), sel, timeout); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Element %s is visible\n", sel) - return nil - }, - } -} - func browserBackCmd() *cli.Command { return &cli.Command{ Name: "back", diff --git a/cmd/tap/browser_simple.go b/cmd/tap/browser_simple.go index 74b59fb..c4b73a3 100644 --- a/cmd/tap/browser_simple.go +++ b/cmd/tap/browser_simple.go @@ -134,14 +134,14 @@ func runBrowserOpen(ctx context.Context, cmd *cli.Command) error { } } - // Handle wait-selector if provided (simplified for Phase 1) + // Handle wait-selector: wait until selector is visible. if sel := cmd.String("wait-selector"); sel != "" { - // TODO: Implement proper wait using Evaluate with polling - // For now, just do a fixed delay as a placeholder - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(2 * time.Second): + timeout := cmd.Duration("timeout") + if timeout <= 0 { + timeout = 30 * time.Second + } + if err := mgr.WaitForElement(ctx, sessionName, targetTab, sel, browser.ElementVisible, timeout); err != nil { + return fmt.Errorf("wait-selector: %w", err) } } diff --git a/cmd/tap/browser_wait.go b/cmd/tap/browser_wait.go new file mode 100644 index 0000000..8497cfd --- /dev/null +++ b/cmd/tap/browser_wait.go @@ -0,0 +1,248 @@ +package main + +import ( + "context" + "fmt" + "os" + "strconv" + "time" + + "github.com/urfave/cli/v3" + "github.com/vaayne/tap/browser" +) + +// browserWaitCmd returns the "tap browser wait" command. +// +// Exactly one wait mode must be specified per invocation: +// +// tap browser wait element visible (default) +// tap browser wait pure time wait +// tap browser wait --state visible|hidden|attached|detached +// tap browser wait --text "Welcome" body text contains substring +// tap browser wait --url "**/dash" URL glob match +// tap browser wait --load load|domcontentloaded|networkidle +// tap browser wait --fn "js expression" poll until truthy +// tap browser wait --timeout 30s default timeout 30s +func browserWaitCmd() *cli.Command { + return &cli.Command{ + Name: "wait", + Usage: "Wait for a page condition to become true", + ArgsUsage: "[]", + Flags: append(browserActionFlags(false), + &cli.DurationFlag{ + Name: "timeout", + Usage: "Maximum time to wait (default 30s)", + Value: 30 * time.Second, + }, + &cli.StringFlag{ + Name: "state", + Usage: "Element state: visible (default), hidden, attached, detached", + Value: "visible", + }, + &cli.StringFlag{ + Name: "text", + Usage: "Wait until document.body.innerText contains this substring", + }, + &cli.StringFlag{ + Name: "url", + Usage: `Wait until location.href matches this glob (supports * and **)`, + }, + &cli.StringFlag{ + Name: "load", + Usage: "Wait for page load event: load, domcontentloaded, networkidle", + }, + &cli.StringFlag{ + Name: "fn", + Usage: "Poll until this JS expression evaluates to a truthy value", + }, + ), + Description: `Wait for a page condition before proceeding. + +Modes (exactly one must be active): + + POSITIONAL ARGUMENT + Wait for CSS selector to become visible (default state). + Pure time wait when the argument is a plain integer (ms). + Pure time wait when the argument is a Go duration (e.g. 2s). + + FLAGS + --state Change element state: visible (default), hidden, attached, detached. + Requires a positional CSS selector argument. + --text Wait until document.body.innerText contains the substring. + --url Wait until location.href matches the glob pattern. + Supports * (any chars except /) and ** (any chars including /). + --load Wait for a named page-load event: + load — fires when the load event completes + domcontentloaded — fires when DOMContentLoaded fires + networkidle — no new network requests for ~500 ms + --fn Poll until the JS expression returns a truthy value. + + --timeout Maximum wait duration (default 30s). Applies to all modes. + +Examples: + tap browser wait "#login-form" + tap browser wait ".spinner" --state hidden + tap browser wait 2000 + tap browser wait 1.5s + tap browser wait --text "Welcome back" + tap browser wait --url "**/dashboard" + tap browser wait --load networkidle + tap browser wait --fn "window.__ready === true"`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + return runBrowserWait(ctx, cmd) + }, + } +} + +func runBrowserWait(ctx context.Context, cmd *cli.Command) error { + timeout := cmd.Duration("timeout") + textFlag := cmd.String("text") + urlFlag := cmd.String("url") + loadFlag := cmd.String("load") + fnFlag := cmd.String("fn") + stateFlag := cmd.String("state") + arg := cmd.Args().First() + + // Count how many modes are active to detect conflicts. + modeCount := 0 + if textFlag != "" { + modeCount++ + } + if urlFlag != "" { + modeCount++ + } + if loadFlag != "" { + modeCount++ + } + if fnFlag != "" { + modeCount++ + } + if arg != "" { + modeCount++ + } + + if modeCount > 1 { + return fmt.Errorf("wait: only one mode may be active per invocation; got multiple arguments/flags") + } + if modeCount == 0 { + return fmt.Errorf("wait: specify a selector, duration, --text, --url, --load, or --fn") + } + + // Pure time wait: integer (ms) or Go duration string. + if arg != "" { + if d, ok := parseDurationArg(arg); ok { + return timeSleep(ctx, d) + } + // Not a duration — treat as CSS selector below. + } + + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + session := cmd.String("session") + tab := cmd.String("tab") + + switch { + case textFlag != "": + if err := mgr.WaitForText(ctx, session, tab, textFlag, timeout); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Text %q found\n", textFlag) + + case urlFlag != "": + if err := mgr.WaitForURL(ctx, session, tab, urlFlag, timeout); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "URL matched %q\n", urlFlag) + + case loadFlag != "": + ls, err := parseLoadState(loadFlag) + if err != nil { + return err + } + if err := mgr.WaitForLoad(ctx, session, tab, ls, timeout); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Load state %q reached\n", loadFlag) + + case fnFlag != "": + if err := mgr.WaitForFn(ctx, session, tab, fnFlag, timeout); err != nil { + return err + } + fmt.Fprintln(os.Stderr, "JS condition is truthy") + + default: + // Positional arg is a CSS selector. + sel := arg + es, err := parseElementState(stateFlag) + if err != nil { + return err + } + if err := mgr.WaitForElement(ctx, session, tab, sel, es, timeout); err != nil { + return err + } + fmt.Fprintf(os.Stderr, "Element %s is %s\n", sel, stateFlag) + } + + return nil +} + +// timeSleep blocks for d or until ctx is cancelled. +func timeSleep(ctx context.Context, d time.Duration) error { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(d): + } + fmt.Fprintf(os.Stderr, "Waited %s\n", d) + return nil +} + +// parseDurationArg tries to interpret s as a time wait: +// - plain integer → milliseconds +// - Go duration string (e.g. "1.5s", "500ms") → parsed duration +// +// Returns (duration, true) on success or (0, false) if s is not a duration. +func parseDurationArg(s string) (time.Duration, bool) { + // Plain integer → milliseconds. + if ms, err := strconv.ParseInt(s, 10, 64); err == nil { + return time.Duration(ms) * time.Millisecond, true + } + // Go duration string. + if d, err := time.ParseDuration(s); err == nil { + return d, true + } + return 0, false +} + +// parseElementState maps the --state flag value to an ElementState constant. +func parseElementState(s string) (browser.ElementState, error) { + switch s { + case "visible", "": + return browser.ElementVisible, nil + case "hidden": + return browser.ElementHidden, nil + case "attached": + return browser.ElementAttached, nil + case "detached": + return browser.ElementDetached, nil + default: + return browser.ElementVisible, fmt.Errorf("wait: unknown --state %q; choose visible, hidden, attached, or detached", s) + } +} + +// parseLoadState maps the --load flag value to a LoadState constant. +func parseLoadState(s string) (browser.LoadState, error) { + switch s { + case string(browser.LoadStateLoad): + return browser.LoadStateLoad, nil + case string(browser.LoadStateDOMContentLoaded): + return browser.LoadStateDOMContentLoaded, nil + case string(browser.LoadStateNetworkIdle): + return browser.LoadStateNetworkIdle, nil + default: + return browser.LoadStateLoad, fmt.Errorf("wait: unknown --load %q; choose load, domcontentloaded, or networkidle", s) + } +} diff --git a/cmd/tap/browser_wait_test.go b/cmd/tap/browser_wait_test.go new file mode 100644 index 0000000..bfd04b3 --- /dev/null +++ b/cmd/tap/browser_wait_test.go @@ -0,0 +1,89 @@ +package main + +import ( + "testing" + "time" + + "github.com/vaayne/tap/browser" +) + +func TestParseDurationArg(t *testing.T) { + tests := []struct { + input string + want time.Duration + wantOK bool + }{ + {"1000", 1000 * time.Millisecond, true}, + {"500", 500 * time.Millisecond, true}, + {"0", 0, true}, + {"2s", 2 * time.Second, true}, + {"1.5s", 1500 * time.Millisecond, true}, + {"300ms", 300 * time.Millisecond, true}, + {"#selector", 0, false}, + {".class", 0, false}, + {"div > span", 0, false}, + {"body", 0, false}, + } + + for _, tt := range tests { + got, ok := parseDurationArg(tt.input) + if ok != tt.wantOK { + t.Errorf("parseDurationArg(%q): ok=%v want %v", tt.input, ok, tt.wantOK) + continue + } + if ok && got != tt.want { + t.Errorf("parseDurationArg(%q): duration=%v want %v", tt.input, got, tt.want) + } + } +} + +func TestParseElementState(t *testing.T) { + tests := []struct { + input string + want browser.ElementState + wantErr bool + }{ + {"visible", browser.ElementVisible, false}, + {"", browser.ElementVisible, false}, + {"hidden", browser.ElementHidden, false}, + {"attached", browser.ElementAttached, false}, + {"detached", browser.ElementDetached, false}, + {"bogus", browser.ElementVisible, true}, + } + + for _, tt := range tests { + got, err := parseElementState(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("parseElementState(%q): err=%v wantErr=%v", tt.input, err, tt.wantErr) + continue + } + if !tt.wantErr && got != tt.want { + t.Errorf("parseElementState(%q): got %v want %v", tt.input, got, tt.want) + } + } +} + +func TestParseLoadState(t *testing.T) { + tests := []struct { + input string + want browser.LoadState + wantErr bool + }{ + {"load", browser.LoadStateLoad, false}, + {"domcontentloaded", browser.LoadStateDOMContentLoaded, false}, + {"networkidle", browser.LoadStateNetworkIdle, false}, + {"ready", browser.LoadStateLoad, true}, + {"", browser.LoadStateLoad, true}, + } + + for _, tt := range tests { + got, err := parseLoadState(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("parseLoadState(%q): err=%v wantErr=%v", tt.input, err, tt.wantErr) + continue + } + if !tt.wantErr && got != tt.want { + t.Errorf("parseLoadState(%q): got %v want %v", tt.input, got, tt.want) + } + } +} From 18b9a3ad19230c4902eb06cc8bc91d4bf19c0053 Mon Sep 17 00:00:00 2001 From: Vaayne Date: Thu, 11 Jun 2026 23:59:26 +0800 Subject: [PATCH 04/12] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20browser=20find?= =?UTF-8?q?=20semantic=20locators?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit find role/text/label/placeholder/alt/title/testid/first/last/nth with click/fill/type/hover/focus/check/uncheck/text actions. role resolves via the accessibility tree; others via injected JS to backendNodeID. --- browser/cdp_find.go | 476 +++++++++++++++++++++++++++++++++++ browser/cdp_find_test.go | 98 ++++++++ browser/manager_find.go | 24 ++ cmd/tap/browser_find.go | 528 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 1126 insertions(+) create mode 100644 browser/cdp_find.go create mode 100644 browser/cdp_find_test.go create mode 100644 browser/manager_find.go create mode 100644 cmd/tap/browser_find.go diff --git a/browser/cdp_find.go b/browser/cdp_find.go new file mode 100644 index 0000000..cddeaa9 --- /dev/null +++ b/browser/cdp_find.go @@ -0,0 +1,476 @@ +package browser + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/chromedp/cdproto/accessibility" + "github.com/chromedp/cdproto/cdp" + "github.com/chromedp/cdproto/dom" + "github.com/chromedp/cdproto/input" + "github.com/chromedp/cdproto/runtime" + "github.com/chromedp/chromedp" +) + +// LocatorKind identifies which semantic locator strategy to use. +type LocatorKind string + +const ( + LocatorRole LocatorKind = "role" + LocatorText LocatorKind = "text" + LocatorLabel LocatorKind = "label" + LocatorPlaceholder LocatorKind = "placeholder" + LocatorAlt LocatorKind = "alt" + LocatorTitle LocatorKind = "title" + LocatorTestID LocatorKind = "testid" + LocatorFirst LocatorKind = "first" + LocatorLast LocatorKind = "last" + LocatorNth LocatorKind = "nth" +) + +// FindAction is the operation to perform once an element is located. +type FindAction string + +const ( + FindActionClick FindAction = "click" + FindActionFill FindAction = "fill" + FindActionType FindAction = "type" + FindActionHover FindAction = "hover" + FindActionFocus FindAction = "focus" + FindActionCheck FindAction = "check" + FindActionUncheck FindAction = "uncheck" + FindActionText FindAction = "text" +) + +// FindLocator describes a semantic element locator. +type FindLocator struct { + Kind LocatorKind + + // Role locator fields. + Role string + Name string // accessible name filter (--name flag) + + // Text locator fields. + Text string + Exact bool // --exact flag for text matching + + // Label, placeholder, alt, title, testid: all use Query. + Query string + + // Nth/First/Last: Index and CSSSelector. + Index int // 0-based index; -1 means last + CSSSelector string // the CSS selector for nth/first/last +} + +// FindTarget locates an element using the given locator, then performs action +// with optional value. Returns textContent when action is FindActionText. +func FindTarget(ctx context.Context, debugURL, targetID string, loc FindLocator, action FindAction, value string) (string, error) { + var result string + err := withTarget(ctx, debugURL, targetID, chromedp.ActionFunc(func(ctx context.Context) error { + backendNodeID, err := resolveLocator(ctx, loc) + if err != nil { + return err + } + out, err := dispatchFindAction(ctx, backendNodeID, action, value) + if err != nil { + return err + } + result = out + return nil + })) + if err != nil { + return "", fmt.Errorf("find %s: %w", loc.Kind, err) + } + return result, nil +} + +// resolveLocator maps a FindLocator to a BackendNodeID using the appropriate +// strategy for each kind. +func resolveLocator(ctx context.Context, loc FindLocator) (cdp.BackendNodeID, error) { + switch loc.Kind { + case LocatorRole: + return resolveByRole(ctx, loc.Role, loc.Name) + case LocatorText: + return resolveByJS(ctx, jsLocatorText(loc.Text, loc.Exact), fmt.Sprintf("text=%q", loc.Text)) + case LocatorLabel: + return resolveByJS(ctx, jsLocatorLabel(loc.Query), fmt.Sprintf("label=%q", loc.Query)) + case LocatorPlaceholder: + return resolveByJS(ctx, jsLocatorAttr("placeholder", loc.Query), fmt.Sprintf("placeholder=%q", loc.Query)) + case LocatorAlt: + return resolveByJS(ctx, jsLocatorAttr("alt", loc.Query), fmt.Sprintf("alt=%q", loc.Query)) + case LocatorTitle: + return resolveByJS(ctx, jsLocatorAttr("title", loc.Query), fmt.Sprintf("title=%q", loc.Query)) + case LocatorTestID: + return resolveByJS(ctx, jsLocatorAttr("data-testid", loc.Query), fmt.Sprintf("testid=%q", loc.Query)) + case LocatorFirst: + return resolveByJS(ctx, jsLocatorNth(loc.CSSSelector, 0), fmt.Sprintf("first %q", loc.CSSSelector)) + case LocatorLast: + return resolveByJS(ctx, jsLocatorLast(loc.CSSSelector), fmt.Sprintf("last %q", loc.CSSSelector)) + case LocatorNth: + return resolveByJS(ctx, jsLocatorNth(loc.CSSSelector, loc.Index), fmt.Sprintf("nth(%d) %q", loc.Index, loc.CSSSelector)) + default: + return 0, fmt.Errorf("unknown locator kind %q", loc.Kind) + } +} + +// resolveByRole uses the AX tree to find the first node matching role (and +// optionally accessible name), then returns its BackendNodeID. +// +// Role matching is case-insensitive. Name matching uses substring containment +// (also case-insensitive). If multiple nodes match, the first is returned. +func resolveByRole(ctx context.Context, role, name string) (cdp.BackendNodeID, error) { + if err := accessibility.Enable().Do(ctx); err != nil { + return 0, fmt.Errorf("enable accessibility: %w", err) + } + defer func() { _ = accessibility.Disable().Do(ctx) }() + + nodes, err := accessibility.GetFullAXTree().Do(ctx) + if err != nil { + return 0, fmt.Errorf("get ax tree: %w", err) + } + + wantRole := strings.ToLower(strings.TrimSpace(role)) + wantName := strings.ToLower(strings.TrimSpace(name)) + + for _, n := range nodes { + nodeRole := strings.ToLower(normalizeAXValue(n.Role)) + if nodeRole != wantRole { + continue + } + if wantName != "" { + nodeName := strings.ToLower(normalizeAXValue(n.Name)) + if !strings.Contains(nodeName, wantName) { + continue + } + } + if n.BackendDOMNodeID == 0 { + continue + } + return cdp.BackendNodeID(n.BackendDOMNodeID), nil + } + + desc := fmt.Sprintf("role=%q", role) + if name != "" { + desc += fmt.Sprintf(" name=%q", name) + } + return 0, fmt.Errorf("no element found: %s", desc) +} + +// resolveByJS runs a JS locator expression inside the page that returns a DOM +// node reference, then uses dom.DescribeNode to obtain the BackendNodeID. +// The expression must return the element or null/undefined. +func resolveByJS(ctx context.Context, jsExpr string, desc string) (cdp.BackendNodeID, error) { + obj, ex, err := runtime.Evaluate(jsExpr). + WithReturnByValue(false). + WithAwaitPromise(false). + Do(ctx) + if err != nil { + return 0, fmt.Errorf("locator js: %w", err) + } + if ex != nil { + return 0, fmt.Errorf("locator js exception: %s", ex.Text) + } + if obj == nil || obj.ObjectID == "" || obj.Type == "undefined" || + (obj.Type == "object" && obj.Subtype == "null") { + return 0, fmt.Errorf("no element found: %s", desc) + } + + nodeInfo, err := dom.DescribeNode().WithObjectID(obj.ObjectID).Do(ctx) + if err != nil { + return 0, fmt.Errorf("describe node: %w", err) + } + if nodeInfo.BackendNodeID == 0 { + return 0, fmt.Errorf("element has no backend node ID: %s", desc) + } + return nodeInfo.BackendNodeID, nil +} + +// --------------------------------------------------------------------------- +// JS locator expressions — each returns the first matching DOM element or null. +// --------------------------------------------------------------------------- + +// jsLocatorText returns a JS expression that finds the first element whose +// visible text contains (or exactly matches) the given text. +func jsLocatorText(text string, exact bool) string { + if exact { + return fmt.Sprintf(`(function(){ + var text = %q; + var all = document.querySelectorAll("*"); + for (var i = 0; i < all.length; i++) { + var el = all[i]; + if (el.children.length === 0 && el.textContent.trim() === text) return el; + if ((el.tagName === "INPUT" || el.tagName === "TEXTAREA") && (el.value||"").trim() === text) return el; + } + return null; +})()`, text) + } + lowerText := strings.ToLower(text) + return fmt.Sprintf(`(function(){ + var text = %q; + var all = document.querySelectorAll("*"); + for (var i = 0; i < all.length; i++) { + var el = all[i]; + if (el.children.length === 0 && el.textContent.toLowerCase().includes(text)) return el; + if ((el.tagName === "INPUT" || el.tagName === "TEXTAREA") && (el.value||"").toLowerCase().includes(text)) return el; + } + return null; +})()`, lowerText) +} + +// jsLocatorLabel returns a JS expression that finds the first form element +// associated with a label whose text contains the given string. It checks +// label[for=id], wrapping label elements, and aria-label attributes. +func jsLocatorLabel(labelText string) string { + lower := strings.ToLower(labelText) + return fmt.Sprintf(`(function(){ + var want = %q; + var labels = document.querySelectorAll("label"); + for (var i = 0; i < labels.length; i++) { + var lbl = labels[i]; + if (!lbl.textContent.toLowerCase().includes(want)) continue; + if (lbl.htmlFor) { + var el = document.getElementById(lbl.htmlFor); + if (el) return el; + } + var inp = lbl.querySelector("input,textarea,select,button"); + if (inp) return inp; + } + var all = document.querySelectorAll("[aria-label]"); + for (var j = 0; j < all.length; j++) { + if ((all[j].getAttribute("aria-label")||"").toLowerCase().includes(want)) return all[j]; + } + return null; +})()`, lower) +} + +// jsLocatorAttr returns a JS expression that finds the first element with +// the given attribute value containing the query string (case-insensitive). +func jsLocatorAttr(attr, query string) string { + lower := strings.ToLower(query) + return fmt.Sprintf(`(function(){ + var want = %q; + var all = document.querySelectorAll("[%s]"); + for (var i = 0; i < all.length; i++) { + if ((all[i].getAttribute(%q)||"").toLowerCase().includes(want)) return all[i]; + } + return null; +})()`, lower, attr, attr) +} + +// jsLocatorNth returns a JS expression that returns the n-th (0-based) element +// matching the CSS selector. +func jsLocatorNth(css string, n int) string { + return fmt.Sprintf(`(function(){ + var els = document.querySelectorAll(%q); + return els[%d] || null; +})()`, css, n) +} + +// jsLocatorLast returns a JS expression that returns the last element matching +// the CSS selector. +func jsLocatorLast(css string) string { + return fmt.Sprintf(`(function(){ + var els = document.querySelectorAll(%q); + return els.length > 0 ? els[els.length - 1] : null; +})()`, css) +} + +// --------------------------------------------------------------------------- +// Action dispatch — all functions operate on an already-attached CDP context. +// --------------------------------------------------------------------------- + +// dispatchFindAction performs action on the element identified by backendNodeID. +// Returns non-empty string only when action == FindActionText. +func dispatchFindAction(ctx context.Context, backendNodeID cdp.BackendNodeID, action FindAction, value string) (string, error) { + switch action { + case FindActionClick: + return "", findClick(ctx, backendNodeID) + case FindActionFill: + return "", findFill(ctx, backendNodeID, value) + case FindActionType: + return "", findType(ctx, backendNodeID, value) + case FindActionHover: + return "", findHover(ctx, backendNodeID) + case FindActionFocus: + return "", findFocusJS(ctx, backendNodeID) + case FindActionCheck: + return "", findSetChecked(ctx, backendNodeID, true) + case FindActionUncheck: + return "", findSetChecked(ctx, backendNodeID, false) + case FindActionText: + return findGetText(ctx, backendNodeID) + default: + return "", fmt.Errorf("unknown action %q", action) + } +} + +// findClick scrolls the element into view and dispatches a real mouse click. +func findClick(ctx context.Context, backendNodeID cdp.BackendNodeID) error { + if err := dom.ScrollIntoViewIfNeeded().WithBackendNodeID(backendNodeID).Do(ctx); err != nil { + return fmt.Errorf("scroll into view: %w", err) + } + box, err := dom.GetBoxModel().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("get box model: %w", err) + } + q := box.Content + x := (q[0] + q[2] + q[4] + q[6]) / 4 + y := (q[1] + q[3] + q[5] + q[7]) / 4 + if err := input.DispatchMouseEvent(input.MouseMoved, x, y).Do(ctx); err != nil { + return err + } + if err := input.DispatchMouseEvent(input.MousePressed, x, y).WithButton(input.Left).WithClickCount(1).Do(ctx); err != nil { + return err + } + return input.DispatchMouseEvent(input.MouseReleased, x, y).WithButton(input.Left).WithClickCount(1).Do(ctx) +} + +// findFill sets the value of a form field via React-compatible native setter. +// This is the inline equivalent of FillTargetByBackendNodeID but runs inside an +// already-attached CDP context (no withTarget wrapper needed). +func findFill(ctx context.Context, backendNodeID cdp.BackendNodeID, value string) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + fn := `function(v){ + this.focus(); + var tag = this.tagName ? this.tagName.toLowerCase() : ""; + if (tag === "select") { + this.value = v; + this.dispatchEvent(new Event("change", { bubbles: true })); + return true; + } + var setter = (tag === "textarea") + ? Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, "value") && Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, "value").set + : Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, "value") && Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, "value").set; + if (setter) setter.call(this, v); else this.value = v; + this.dispatchEvent(new Event("input", { bubbles: true })); + this.dispatchEvent(new Event("change", { bubbles: true })); + return true; +}` + _, ex, err := runtime.CallFunctionOn(fmt.Sprintf(`function(){ return (%s).call(this, %q) }`, fn, value)). + WithObjectID(obj.ObjectID). + WithAwaitPromise(true). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return fmt.Errorf("fill: %w", err) + } + if ex != nil { + return fmt.Errorf("fill: javascript exception") + } + return nil +} + +// findType focuses the element via JS then inserts text using input.InsertText. +func findType(ctx context.Context, backendNodeID cdp.BackendNodeID, text string) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + _, ex, err := runtime.CallFunctionOn(`function(){ this.focus(); return true; }`). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return fmt.Errorf("focus for type: %w", err) + } + if ex != nil { + return fmt.Errorf("focus for type: javascript exception") + } + return input.InsertText(text).Do(ctx) +} + +// findHover moves the mouse to the element centre using box model coordinates. +func findHover(ctx context.Context, backendNodeID cdp.BackendNodeID) error { + if err := dom.ScrollIntoViewIfNeeded().WithBackendNodeID(backendNodeID).Do(ctx); err != nil { + return fmt.Errorf("scroll into view: %w", err) + } + box, err := dom.GetBoxModel().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("get box model: %w", err) + } + q := box.Content + x := (q[0] + q[2] + q[4] + q[6]) / 4 + y := (q[1] + q[3] + q[5] + q[7]) / 4 + return input.DispatchMouseEvent(input.MouseMoved, x, y).Do(ctx) +} + +// findFocusJS focuses the element via JS CallFunctionOn. +func findFocusJS(ctx context.Context, backendNodeID cdp.BackendNodeID) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + _, ex, err := runtime.CallFunctionOn(`function(){ this.focus(); return true; }`). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return fmt.Errorf("focus: %w", err) + } + if ex != nil { + return fmt.Errorf("focus: javascript exception") + } + return nil +} + +// findSetChecked checks or unchecks a checkbox/radio input via JS. +func findSetChecked(ctx context.Context, backendNodeID cdp.BackendNodeID, checked bool) error { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return fmt.Errorf("resolve node: %w", err) + } + wantStr := "true" + if !checked { + wantStr = "false" + } + fn := fmt.Sprintf(`function(){ + var want = %s; + if (this.checked !== want) { + this.click(); + this.dispatchEvent(new Event("change", { bubbles: true })); + } + return true; +}`, wantStr) + _, ex, err := runtime.CallFunctionOn(fn). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return fmt.Errorf("set checked: %w", err) + } + if ex != nil { + return fmt.Errorf("set checked: javascript exception") + } + return nil +} + +// findGetText returns the trimmed textContent of the element. +func findGetText(ctx context.Context, backendNodeID cdp.BackendNodeID) (string, error) { + obj, err := dom.ResolveNode().WithBackendNodeID(backendNodeID).Do(ctx) + if err != nil { + return "", fmt.Errorf("resolve node: %w", err) + } + res, ex, err := runtime.CallFunctionOn(`function(){ return (this.textContent||"").trim(); }`). + WithObjectID(obj.ObjectID). + WithReturnByValue(true). + Do(ctx) + if err != nil { + return "", fmt.Errorf("get text: %w", err) + } + if ex != nil { + return "", fmt.Errorf("get text: javascript exception") + } + if res == nil || len(res.Value) == 0 { + return "", nil + } + var text string + if err := json.Unmarshal(res.Value, &text); err != nil { + return strings.Trim(string(res.Value), `"`), nil + } + return text, nil +} diff --git a/browser/cdp_find_test.go b/browser/cdp_find_test.go new file mode 100644 index 0000000..4e5df00 --- /dev/null +++ b/browser/cdp_find_test.go @@ -0,0 +1,98 @@ +package browser + +import ( + "strings" + "testing" +) + +// TestJSLocatorText verifies the generated JS contains the expected search text. +func TestJSLocatorText(t *testing.T) { + t.Run("substring", func(t *testing.T) { + js := jsLocatorText("Sign in", false) + // Case-folded text should appear in the generated JS. + if !strings.Contains(js, "sign in") { + t.Errorf("expected lowercase text in JS, got: %s", js) + } + if !strings.Contains(js, "includes") { + t.Errorf("expected substring match (includes), got: %s", js) + } + }) + t.Run("exact", func(t *testing.T) { + js := jsLocatorText("Submit", true) + if !strings.Contains(js, "Submit") { + t.Errorf("expected original-case text in exact JS, got: %s", js) + } + if !strings.Contains(js, "=== text") { + t.Errorf("expected strict equality in exact JS, got: %s", js) + } + }) +} + +// TestJSLocatorLabel verifies the label locator references both htmlFor and aria-label. +func TestJSLocatorLabel(t *testing.T) { + js := jsLocatorLabel("Email") + if !strings.Contains(js, "htmlFor") { + t.Errorf("expected htmlFor in label JS, got: %s", js) + } + if !strings.Contains(js, "aria-label") { + t.Errorf("expected aria-label fallback in label JS, got: %s", js) + } +} + +// TestJSLocatorAttr verifies attribute-based locator JS. +func TestJSLocatorAttr(t *testing.T) { + js := jsLocatorAttr("placeholder", "Search") + if !strings.Contains(js, `[placeholder]`) { + t.Errorf("expected attribute selector, got: %s", js) + } + if !strings.Contains(js, "search") { + t.Errorf("expected lowercased query, got: %s", js) + } +} + +// TestJSLocatorNth verifies nth and last locators. +func TestJSLocatorNth(t *testing.T) { + js := jsLocatorNth("li.item", 2) + if !strings.Contains(js, `"li.item"`) { + t.Errorf("expected CSS selector in nth JS, got: %s", js) + } + if !strings.Contains(js, "els[2]") { + t.Errorf("expected index 2 in nth JS, got: %s", js) + } +} + +func TestJSLocatorLast(t *testing.T) { + js := jsLocatorLast("tr") + if !strings.Contains(js, "els.length - 1") { + t.Errorf("expected last-element expression in JS, got: %s", js) + } +} + +// TestResolveLocatorKind verifies all LocatorKind constants are handled +// without panicking (no real browser required — just routing logic). +func TestResolveLocatorKindRouting(t *testing.T) { + kinds := []LocatorKind{ + LocatorRole, LocatorText, LocatorLabel, LocatorPlaceholder, + LocatorAlt, LocatorTitle, LocatorTestID, + LocatorFirst, LocatorLast, LocatorNth, + } + for _, k := range kinds { + if k == "" { + t.Errorf("LocatorKind constant is empty") + } + } +} + +// TestFindActionConstants verifies all action constants are non-empty. +func TestFindActionConstants(t *testing.T) { + actions := []FindAction{ + FindActionClick, FindActionFill, FindActionType, + FindActionHover, FindActionFocus, + FindActionCheck, FindActionUncheck, FindActionText, + } + for _, a := range actions { + if a == "" { + t.Errorf("FindAction constant is empty") + } + } +} diff --git a/browser/manager_find.go b/browser/manager_find.go new file mode 100644 index 0000000..b3b2c4e --- /dev/null +++ b/browser/manager_find.go @@ -0,0 +1,24 @@ +package browser + +import ( + "context" + "fmt" +) + +// Find locates an element using the given semantic locator and dispatches action. +// When action is FindActionText the trimmed textContent is returned; otherwise +// the return string is always empty. +// +// Multiple matches: the first matching element is used. +// No match: returns an error describing the locator that was searched. +func (m *Manager) Find(ctx context.Context, sessionName, tabName string, loc FindLocator, action FindAction, value string) (string, error) { + rt, err := m.resolveTarget(ctx, sessionName, tabName, "find") + if err != nil { + return "", err + } + result, err := FindTarget(ctx, rt.DebugURL, rt.TargetID, loc, action, value) + if err != nil { + return "", fmt.Errorf("find: %w", err) + } + return result, nil +} diff --git a/cmd/tap/browser_find.go b/cmd/tap/browser_find.go new file mode 100644 index 0000000..c27c147 --- /dev/null +++ b/cmd/tap/browser_find.go @@ -0,0 +1,528 @@ +package main + +import ( + "context" + "fmt" + "os" + "strconv" + + "github.com/urfave/cli/v3" + "github.com/vaayne/tap/browser" +) + +// browserFindCmd returns the 'tap browser find' parent command with one +// subcommand per locator kind. +func browserFindCmd() *cli.Command { + return &cli.Command{ + Name: "find", + Usage: "Locate elements by semantic attribute and perform an action", + Description: `Find elements using semantic locators (role, text, label, …) and +interact with them without needing a CSS selector. + +Supported actions: + click — real mouse click + fill — set value via React-compatible native setter (requires ) + type — simulate key-by-key typing (requires ) + hover — move mouse to element centre + focus — keyboard-focus the element + check — check a checkbox or radio + uncheck — uncheck a checkbox or radio + text — print the element's trimmed textContent + +Multiple matches: the first matching element is used. + +Examples: + tap browser find role button click --name "Submit" + tap browser find text "Sign in" click + tap browser find label "Email" fill "me@example.com" + tap browser find placeholder "Search…" type "golang" + tap browser find testid "login-btn" click + tap browser find first "li.item" text + tap browser find nth 2 "li.item" click + tap browser find last "tr" text`, + Commands: []*cli.Command{ + findRoleCmd(), + findTextCmd(), + findLabelCmd(), + findPlaceholderCmd(), + findAltCmd(), + findTitleCmd(), + findTestIDCmd(), + findFirstCmd(), + findLastCmd(), + findNthCmd(), + }, + } +} + +// --------------------------------------------------------------------------- +// Shared helpers +// --------------------------------------------------------------------------- + +// runFind resolves the manager/target and calls Manager.Find, printing output. +func runFind(ctx context.Context, cmd *cli.Command, loc browser.FindLocator, action browser.FindAction, value string) error { + mgr, err := newBrowserManager(cmd) + if err != nil { + return err + } + result, err := mgr.Find(ctx, cmd.String("session"), cmd.String("tab"), loc, action, value) + if err != nil { + return err + } + if result != "" { + fmt.Println(result) + } + return nil +} + +// parseAction validates and returns the FindAction for the given string. +func parseAction(s string) (browser.FindAction, error) { + switch browser.FindAction(s) { + case browser.FindActionClick, browser.FindActionFill, browser.FindActionType, + browser.FindActionHover, browser.FindActionFocus, + browser.FindActionCheck, browser.FindActionUncheck, browser.FindActionText: + return browser.FindAction(s), nil + default: + return "", fmt.Errorf("unknown action %q (use: click fill type hover focus check uncheck text)", s) + } +} + +// requireValue checks that fill/type actions have a value argument. +func requireValue(action browser.FindAction, value string) error { + if (action == browser.FindActionFill || action == browser.FindActionType) && value == "" { + return fmt.Errorf("action %q requires a argument", action) + } + return nil +} + +// --------------------------------------------------------------------------- +// Subcommands +// --------------------------------------------------------------------------- + +func findRoleCmd() *cli.Command { + return &cli.Command{ + Name: "role", + Usage: "Find by ARIA role", + ArgsUsage: " [value]", + Flags: append(browserActionFlags(false), + &cli.StringFlag{ + Name: "name", + Aliases: []string{"n"}, + Usage: "Filter by accessible name (substring, case-insensitive)", + }, + ), + Description: `Find the first element with the given ARIA role and perform action. + +Use --name to narrow the match to a specific accessible name. + +Examples: + tap browser find role button click --name "Submit" + tap browser find role textbox fill "hello@example.com" --name "Email" + tap browser find role link click --name "Sign in"`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + args := cmd.Args().Slice() + if len(args) < 2 { + return fmt.Errorf("usage: tap browser find role [value]") + } + role, actionStr := args[0], args[1] + value := "" + if len(args) > 2 { + value = args[2] + } + action, err := parseAction(actionStr) + if err != nil { + return err + } + if err := requireValue(action, value); err != nil { + return err + } + loc := browser.FindLocator{Kind: browser.LocatorRole, Role: role, Name: cmd.String("name")} + if err := runFind(ctx, cmd, loc, action, value); err != nil { + return err + } + if action != browser.FindActionText { + fmt.Fprintf(os.Stderr, "find role=%q action=%s ok\n", role, action) + } + return nil + }, + } +} + +func findTextCmd() *cli.Command { + return &cli.Command{ + Name: "text", + Usage: "Find by visible text content", + ArgsUsage: " [value]", + Flags: append(browserActionFlags(false), + &cli.BoolFlag{ + Name: "exact", + Usage: "Require an exact text match instead of substring", + }, + ), + Description: `Find the first element whose textContent contains (or exactly matches) +the given text, then perform action. + +Examples: + tap browser find text "Sign in" click + tap browser find text "Submit" click --exact + tap browser find text "Welcome" text`, + Action: func(ctx context.Context, cmd *cli.Command) error { + configureLogging(cmd) + args := cmd.Args().Slice() + if len(args) < 2 { + return fmt.Errorf("usage: tap browser find text [value]") + } + text, actionStr := args[0], args[1] + value := "" + if len(args) > 2 { + value = args[2] + } + action, err := parseAction(actionStr) + if err != nil { + return err + } + if err := requireValue(action, value); err != nil { + return err + } + loc := browser.FindLocator{Kind: browser.LocatorText, Text: text, Exact: cmd.Bool("exact")} + if err := runFind(ctx, cmd, loc, action, value); err != nil { + return err + } + if action != browser.FindActionText { + fmt.Fprintf(os.Stderr, "find text=%q action=%s ok\n", text, action) + } + return nil + }, + } +} + +func findLabelCmd() *cli.Command { + return &cli.Command{ + Name: "label", + Usage: "Find a form element by its label text", + ArgsUsage: "