From a9299f716b3bdb372ed63d3cff97b41d29b7c675 Mon Sep 17 00:00:00 2001 From: Hiten Shah Date: Wed, 22 Apr 2026 22:36:15 -0700 Subject: [PATCH] fix(helpers): resolve domain skills for subdomains and hyphenated hosts --- helpers.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/helpers.py b/helpers.py index ffe5ad90..e861c89f 100644 --- a/helpers.py +++ b/helpers.py @@ -47,10 +47,29 @@ def drain_events(): return _send({"meta": "drain_events"})["events"] # --- navigation / page --- +def _skill_dir(url): + """Resolve URL to domain-skills/ subdirectory. + + Tries progressively shorter hostname segments, then the full hostname + hyphenated, so ``booking.com`` → ``booking-com``, ``old.reddit.com`` → ``reddit``, + ``itch.io`` → ``itch-io``.""" + host = (urlparse(url).hostname or "").removeprefix("www.") + base = Path(__file__).parent / "domain-skills" + # Try each segment of the hostname: old.reddit.com → old, reddit, com + parts = host.split(".") + for i in range(len(parts)): + d = base / parts[i] + if d.is_dir(): return d + # Try full hostname hyphenated: booking.com → booking-com + d = base / host.replace(".", "-") + if d.is_dir(): return d + return None + + def goto(url): r = cdp("Page.navigate", url=url) - d = (Path(__file__).parent / "domain-skills" / (urlparse(url).hostname or "").removeprefix("www.").split(".")[0]) - return {**r, "domain_skills": sorted(p.name for p in d.rglob("*.md"))[:10]} if d.is_dir() else r + d = _skill_dir(url) + return {**r, "domain_skills": sorted(p.name for p in d.rglob("*.md"))[:10]} if d else r def page_info(): """{url, title, w, h, sx, sy, pw, ph} — viewport + scroll + page size.