From 60d078447e7a46bcd6eb8b29560c22a216ca2855 Mon Sep 17 00:00:00 2001 From: Lhy099 Date: Sat, 25 Apr 2026 17:21:49 +0800 Subject: [PATCH 1/2] feat: add agent observation and SPA wait helpers --- SKILL.md | 2 ++ helpers.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ test_js.py | 37 +++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+) diff --git a/SKILL.md b/SKILL.md index 8e05e3bc..fff44733 100644 --- a/SKILL.md +++ b/SKILL.md @@ -126,8 +126,10 @@ The *durable* shape of the site — the map, not the diary. Focus on what the ne - Clicking: capture_screenshot() → read the pixel off the image → click_at_xy(x, y) → capture_screenshot() to verify. Suppress the Playwright-habit reflex of "locate first, then click" — no getBoundingClientRect, no selector hunt. Drop to DOM only when the target has no visible geometry (hidden input, 0×0 node). Hit-testing happens in Chrome's browser process, so clicks go through iframes / shadow DOM / cross-origin without extra work. - Bulk HTTP: http_get(url) + ThreadPoolExecutor. No browser for static pages (249 Netflix pages in 2.8s). - After goto: wait_for_load(). +- React/SPA readiness: use wait_for_selector(...) or wait_for_js(...) for the hydrated UI state you need instead of fixed sleeps after wait_for_load(). - Wrong/stale tab: ensure_real_tab(). Use it when the current tab is stale or internal; the daemon also auto-recovers from stale sessions on the next call. - Verification: print(page_info()) is the simplest "is this alive?" check, but screenshots are the default way to verify whether a visible action actually worked. +- Agent observation: page_outline() returns a compact list of visible interactive elements with text, roles, labels, hrefs, and rects. - DOM reads: use js(...) for inspection and extraction when the screenshot shows that coordinates are the wrong tool. - Iframe sites (Azure blades, Salesforce): click_at_xy(x, y) passes through; only drop to iframe DOM work when coordinate clicks are the wrong tool. - Auth wall: redirected to login → stop and ask the user. Don't type credentials from screenshots. diff --git a/helpers.py b/helpers.py index fd81b806..ae594430 100644 --- a/helpers.py +++ b/helpers.py @@ -197,6 +197,35 @@ def wait_for_load(timeout=15.0): time.sleep(0.3) return False +def wait_for_js(expression, timeout=10.0, interval=0.2): + """Poll a JS expression until it returns a truthy value, then return that value. + + Useful for React/SPA pages where document.readyState is complete before the + component the agent needs has hydrated or rendered.""" + deadline = time.time() + timeout + while time.time() < deadline: + value = js(expression) + if value: + return value + time.sleep(interval) + return False + +def wait_for_selector(selector, timeout=10.0, visible=False, interval=0.2): + """Wait for `document.querySelector(selector)`. + + Pass visible=True when the agent needs a painted target, not just a mounted + DOM node.""" + expression = f""" +const el = document.querySelector({json.dumps(selector)}); +if (!el) return false; +if (!{json.dumps(bool(visible))}) return true; +const r = el.getBoundingClientRect(); +const s = getComputedStyle(el); +return !!(r.width && r.height && s.visibility !== 'hidden' && s.display !== 'none' && + r.bottom >= 0 && r.right >= 0 && r.top <= innerHeight && r.left <= innerWidth); +""" + return bool(wait_for_js(expression, timeout=timeout, interval=interval)) + def js(expression, target_id=None): """Run JS in the attached tab (default) or inside an iframe target (via iframe_target()). @@ -209,6 +238,38 @@ def js(expression, target_id=None): r = cdp("Runtime.evaluate", session_id=sid, expression=expression, returnByValue=True, awaitPromise=True) return r.get("result", {}).get("value") +def page_outline(limit=80): + """Compact summary of visible interactive elements for agent observation.""" + limit = max(0, int(limit)) + expression = f""" +const limit = {limit}; +const q = 'a,button,input,textarea,select,[role],[aria-label],[contenteditable="true"]'; +const out = []; +for (const el of document.querySelectorAll(q)) {{ + if (out.length >= limit) break; + const r = el.getBoundingClientRect(); + const s = getComputedStyle(el); + if (!r.width || !r.height || s.visibility === 'hidden' || s.display === 'none') continue; + if (r.bottom < 0 || r.right < 0 || r.top > innerHeight || r.left > innerWidth) continue; + const text = (el.innerText || el.value || el.getAttribute('aria-label') || + el.getAttribute('placeholder') || el.getAttribute('title') || '') + .replace(/\\s+/g, ' ').trim().slice(0, 140); + out.push({{ + i: out.length, + tag: el.tagName.toLowerCase(), + text, + role: el.getAttribute('role'), + aria: el.getAttribute('aria-label'), + type: el.getAttribute('type'), + href: el.href || null, + disabled: !!el.disabled || el.getAttribute('aria-disabled') === 'true', + rect: [Math.round(r.x), Math.round(r.y), Math.round(r.width), Math.round(r.height)] + }}); +}} +return out; +""" + return js(expression) or [] + _KC = {"Enter": 13, "Tab": 9, "Escape": 27, "Backspace": 8, " ": 32, "ArrowLeft": 37, "ArrowUp": 38, "ArrowRight": 39, "ArrowDown": 40} diff --git a/test_js.py b/test_js.py index 02d68ae9..3ed9c49e 100644 --- a/test_js.py +++ b/test_js.py @@ -33,3 +33,40 @@ def test_iife_with_internal_return_is_not_double_wrapped(): with patch("helpers.cdp", side_effect=fake_cdp): helpers.js("(function(){ return document.title; })()") assert _evaluated_expression(captured) == "(function(){ return document.title; })()" + + +def test_wait_for_js_returns_first_truthy_value(): + with patch("helpers.js", side_effect=[False, None, {"ready": True}]), \ + patch("helpers.time.sleep") as sleep: + assert helpers.wait_for_js("window.__ready", timeout=1, interval=0.01) == {"ready": True} + + assert sleep.call_count == 2 + + +def test_wait_for_selector_uses_visible_predicate(): + with patch("helpers.wait_for_js", return_value=True) as wait: + assert helpers.wait_for_selector("button[aria-label='Save']", timeout=3, visible=True) + + expression = wait.call_args[0][0] + assert "button[aria-label='Save']" in expression + assert "getBoundingClientRect" in expression + assert "visibility !== 'hidden'" in expression + assert wait.call_args.kwargs == {"timeout": 3, "interval": 0.2} + + +def test_page_outline_returns_agent_summary(): + outline = [{"tag": "button", "text": "Save", "rect": [10, 20, 80, 30]}] + with patch("helpers.js", return_value=outline) as run_js: + assert helpers.page_outline(limit=3) == outline + + expression = run_js.call_args[0][0] + assert "const limit = 3" in expression + assert "a,button,input,textarea,select" in expression + assert "aria-label" in expression + + +def test_page_outline_clamps_negative_limit(): + with patch("helpers.js", return_value=[]) as run_js: + assert helpers.page_outline(limit=-1) == [] + + assert "const limit = 0" in run_js.call_args[0][0] From b19408362e1f31c91d03f0f33ac4c887f23fe3fa Mon Sep 17 00:00:00 2001 From: Lhy099 Date: Sat, 25 Apr 2026 17:39:19 +0800 Subject: [PATCH 2/2] fix: propagate JavaScript evaluation errors --- helpers.py | 7 +++++++ test_js.py | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/helpers.py b/helpers.py index ae594430..ddd17073 100644 --- a/helpers.py +++ b/helpers.py @@ -226,6 +226,11 @@ def wait_for_selector(selector, timeout=10.0, visible=False, interval=0.2): """ return bool(wait_for_js(expression, timeout=timeout, interval=interval)) +def _js_exception_text(r): + d = r.get("exceptionDetails") or {} + e = d.get("exception") or {} + return "\n".join(str(x) for x in (d.get("text"), e.get("description"), e.get("value")) if x) + def js(expression, target_id=None): """Run JS in the attached tab (default) or inside an iframe target (via iframe_target()). @@ -236,6 +241,8 @@ def js(expression, target_id=None): if "return " in expression and not expression.strip().startswith("("): expression = f"(function(){{{expression}}})()" r = cdp("Runtime.evaluate", session_id=sid, expression=expression, returnByValue=True, awaitPromise=True) + if "exceptionDetails" in r: + raise RuntimeError(f"JavaScript evaluation failed: {_js_exception_text(r) or 'unknown error'}") return r.get("result", {}).get("value") def page_outline(limit=80): diff --git a/test_js.py b/test_js.py index 3ed9c49e..e9e9db6f 100644 --- a/test_js.py +++ b/test_js.py @@ -1,3 +1,4 @@ +import pytest from unittest.mock import patch import helpers @@ -35,6 +36,20 @@ def test_iife_with_internal_return_is_not_double_wrapped(): assert _evaluated_expression(captured) == "(function(){ return document.title; })()" +def test_js_raises_on_runtime_exception(): + def fake_cdp(method, **kwargs): + return { + "exceptionDetails": { + "text": "Uncaught ReferenceError", + "exception": {"description": "ReferenceError: missing is not defined"}, + } + } + + with patch("helpers.cdp", side_effect=fake_cdp): + with pytest.raises(RuntimeError, match="missing is not defined"): + helpers.js("missing.value") + + def test_wait_for_js_returns_first_truthy_value(): with patch("helpers.js", side_effect=[False, None, {"ready": True}]), \ patch("helpers.time.sleep") as sleep: @@ -43,6 +58,15 @@ def test_wait_for_js_returns_first_truthy_value(): assert sleep.call_count == 2 +def test_wait_for_js_propagates_js_errors(): + with patch("helpers.js", side_effect=RuntimeError("JavaScript evaluation failed")), \ + patch("helpers.time.sleep") as sleep: + with pytest.raises(RuntimeError, match="JavaScript evaluation failed"): + helpers.wait_for_js("missing.value", timeout=1, interval=0.01) + + sleep.assert_not_called() + + def test_wait_for_selector_uses_visible_predicate(): with patch("helpers.wait_for_js", return_value=True) as wait: assert helpers.wait_for_selector("button[aria-label='Save']", timeout=3, visible=True)