Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,10 @@ The *durable* shape of the site — the map, not the diary. Focus on what the ne
- Clicking: capture_screenshot() → read the pixel off the image → click_at_xy(x, y) → capture_screenshot() to verify. Suppress the Playwright-habit reflex of "locate first, then click" — no getBoundingClientRect, no selector hunt. Drop to DOM only when the target has no visible geometry (hidden input, 0×0 node). Hit-testing happens in Chrome's browser process, so clicks go through iframes / shadow DOM / cross-origin without extra work.
- Bulk HTTP: http_get(url) + ThreadPoolExecutor. No browser for static pages (249 Netflix pages in 2.8s).
- After goto: wait_for_load().
- React/SPA readiness: use wait_for_selector(...) or wait_for_js(...) for the hydrated UI state you need instead of fixed sleeps after wait_for_load().
- Wrong/stale tab: ensure_real_tab(). Use it when the current tab is stale or internal; the daemon also auto-recovers from stale sessions on the next call.
- Verification: print(page_info()) is the simplest "is this alive?" check, but screenshots are the default way to verify whether a visible action actually worked.
- Agent observation: page_outline() returns a compact list of visible interactive elements with text, roles, labels, hrefs, and rects.
- DOM reads: use js(...) for inspection and extraction when the screenshot shows that coordinates are the wrong tool.
- Iframe sites (Azure blades, Salesforce): click_at_xy(x, y) passes through; only drop to iframe DOM work when coordinate clicks are the wrong tool.
- Auth wall: redirected to login → stop and ask the user. Don't type credentials from screenshots.
Expand Down
68 changes: 68 additions & 0 deletions helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,40 @@ def wait_for_load(timeout=15.0):
time.sleep(0.3)
return False

def wait_for_js(expression, timeout=10.0, interval=0.2):
"""Poll a JS expression until it returns a truthy value, then return that value.

Useful for React/SPA pages where document.readyState is complete before the
component the agent needs has hydrated or rendered."""
deadline = time.time() + timeout
while time.time() < deadline:
value = js(expression)
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
if value:
return value
time.sleep(interval)
return False

def wait_for_selector(selector, timeout=10.0, visible=False, interval=0.2):
"""Wait for `document.querySelector(selector)`.

Pass visible=True when the agent needs a painted target, not just a mounted
DOM node."""
expression = f"""
const el = document.querySelector({json.dumps(selector)});
if (!el) return false;
if (!{json.dumps(bool(visible))}) return true;
const r = el.getBoundingClientRect();
const s = getComputedStyle(el);
return !!(r.width && r.height && s.visibility !== 'hidden' && s.display !== 'none' &&
r.bottom >= 0 && r.right >= 0 && r.top <= innerHeight && r.left <= innerWidth);
"""
return bool(wait_for_js(expression, timeout=timeout, interval=interval))

def _js_exception_text(r):
d = r.get("exceptionDetails") or {}
e = d.get("exception") or {}
return "\n".join(str(x) for x in (d.get("text"), e.get("description"), e.get("value")) if x)

def js(expression, target_id=None):
"""Run JS in the attached tab (default) or inside an iframe target (via iframe_target()).

Expand All @@ -207,8 +241,42 @@ def js(expression, target_id=None):
if "return " in expression and not expression.strip().startswith("("):
expression = f"(function(){{{expression}}})()"
r = cdp("Runtime.evaluate", session_id=sid, expression=expression, returnByValue=True, awaitPromise=True)
if "exceptionDetails" in r:
raise RuntimeError(f"JavaScript evaluation failed: {_js_exception_text(r) or 'unknown error'}")
return r.get("result", {}).get("value")

def page_outline(limit=80):
"""Compact summary of visible interactive elements for agent observation."""
limit = max(0, int(limit))
expression = f"""
const limit = {limit};
const q = 'a,button,input,textarea,select,[role],[aria-label],[contenteditable="true"]';
const out = [];
for (const el of document.querySelectorAll(q)) {{
if (out.length >= limit) break;
const r = el.getBoundingClientRect();
const s = getComputedStyle(el);
if (!r.width || !r.height || s.visibility === 'hidden' || s.display === 'none') continue;
if (r.bottom < 0 || r.right < 0 || r.top > innerHeight || r.left > innerWidth) continue;
const text = (el.innerText || el.value || el.getAttribute('aria-label') ||
el.getAttribute('placeholder') || el.getAttribute('title') || '')
.replace(/\\s+/g, ' ').trim().slice(0, 140);
out.push({{
i: out.length,
tag: el.tagName.toLowerCase(),
text,
role: el.getAttribute('role'),
aria: el.getAttribute('aria-label'),
type: el.getAttribute('type'),
href: el.href || null,
disabled: !!el.disabled || el.getAttribute('aria-disabled') === 'true',
rect: [Math.round(r.x), Math.round(r.y), Math.round(r.width), Math.round(r.height)]
}});
}}
return out;
"""
return js(expression) or []


_KC = {"Enter": 13, "Tab": 9, "Escape": 27, "Backspace": 8, " ": 32, "ArrowLeft": 37, "ArrowUp": 38, "ArrowRight": 39, "ArrowDown": 40}

Expand Down
61 changes: 61 additions & 0 deletions test_js.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pytest
from unittest.mock import patch
import helpers

Expand Down Expand Up @@ -33,3 +34,63 @@ def test_iife_with_internal_return_is_not_double_wrapped():
with patch("helpers.cdp", side_effect=fake_cdp):
helpers.js("(function(){ return document.title; })()")
assert _evaluated_expression(captured) == "(function(){ return document.title; })()"


def test_js_raises_on_runtime_exception():
def fake_cdp(method, **kwargs):
return {
"exceptionDetails": {
"text": "Uncaught ReferenceError",
"exception": {"description": "ReferenceError: missing is not defined"},
}
}

with patch("helpers.cdp", side_effect=fake_cdp):
with pytest.raises(RuntimeError, match="missing is not defined"):
helpers.js("missing.value")


def test_wait_for_js_returns_first_truthy_value():
with patch("helpers.js", side_effect=[False, None, {"ready": True}]), \
patch("helpers.time.sleep") as sleep:
assert helpers.wait_for_js("window.__ready", timeout=1, interval=0.01) == {"ready": True}

assert sleep.call_count == 2


def test_wait_for_js_propagates_js_errors():
with patch("helpers.js", side_effect=RuntimeError("JavaScript evaluation failed")), \
patch("helpers.time.sleep") as sleep:
with pytest.raises(RuntimeError, match="JavaScript evaluation failed"):
helpers.wait_for_js("missing.value", timeout=1, interval=0.01)

sleep.assert_not_called()


def test_wait_for_selector_uses_visible_predicate():
with patch("helpers.wait_for_js", return_value=True) as wait:
assert helpers.wait_for_selector("button[aria-label='Save']", timeout=3, visible=True)

expression = wait.call_args[0][0]
assert "button[aria-label='Save']" in expression
assert "getBoundingClientRect" in expression
assert "visibility !== 'hidden'" in expression
assert wait.call_args.kwargs == {"timeout": 3, "interval": 0.2}


def test_page_outline_returns_agent_summary():
outline = [{"tag": "button", "text": "Save", "rect": [10, 20, 80, 30]}]
with patch("helpers.js", return_value=outline) as run_js:
assert helpers.page_outline(limit=3) == outline

expression = run_js.call_args[0][0]
assert "const limit = 3" in expression
assert "a,button,input,textarea,select" in expression
assert "aria-label" in expression


def test_page_outline_clamps_negative_limit():
with patch("helpers.js", return_value=[]) as run_js:
assert helpers.page_outline(limit=-1) == []

assert "const limit = 0" in run_js.call_args[0][0]