diff --git a/.gitignore b/.gitignore index 6cdb04c..5b96c1f 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,7 @@ workspace/pdfs/ workspace/tmp/ .DS_Store + +# Node test harness deps (Acceptance A) +tests/js/node_modules/ +tests/js/package-lock.json diff --git a/tests/js/README.md b/tests/js/README.md new file mode 100644 index 0000000..c51de48 --- /dev/null +++ b/tests/js/README.md @@ -0,0 +1,54 @@ +# Dashboard responsiveness test harness (Acceptance A) + +Real-browser gates for the dashboard's load responsiveness, used to prove the +fix for the "loaded then frozen" bug (PR: dashboard responsiveness + metric +tooltips). + +## Setup + +```bash +cd tests/js +npm install # playwright reuses the already-cached Chromium +``` + +Chromium is driven headless with `--no-sandbox --disable-dev-shm-usage`. + +## Gates + +### `npm run e2e` — end-to-end responsiveness (primary) + +`responsiveness_e2e.mjs` boots `seed_and_serve.py` (a Flask server on a throwaway +SQLite DB seeded to a production shape: a ~3300-node taxonomy **tree** and all 9 +overview metrics non-zero), drives the real dashboard in Chromium, and asserts: + +- **First paint ≤ 2s**: all 9 overview stat cards show real (non-zero) numbers. +- **After load**: idle `IDLE_MS` (default 60000), click every tab, switch language + once — with **no main-thread long task > 200ms**, the page staying interactive + (every tab activates, language toggle takes effect), and **no console/page error**. + +Run a faster iteration with a shorter idle: + +```bash +IDLE_MS=6000 node responsiveness_e2e.mjs +``` + +This test **fails on the pre-fix code** (the O(n²) taxonomy-dropdown build froze +the main thread for seconds during the idle prefetch) and **passes on the fix**. + +### `npm run perf` — deterministic perf microbenchmark + +`taxonomy_dropdown_perf.mjs` builds the taxonomy option list inside real Chromium +two ways and asserts the production path (DocumentFragment, single attach) is fast +while the old `innerHTML +=` path is catastrophically slower: + +- N=3300 (real tree size): fast build **< 50ms** +- N=5000 (stress): fast build **< 200ms** (the long-task threshold) +- the O(n²) `innerHTML +=` reference is many times slower (seconds), proving the + harness catches a quadratic regression. + +## Notes + +- `node_modules/` and `package-lock.json` are gitignored; run `npm install` first. +- The Python server uses the repo's `.venv` (Flask). It sets a dummy + `MINIMAX_API_KEY` and seeds an active research agenda so `/api/providers` and + `/api/research_agenda/*` return 200 the way a healthy prod deployment does. diff --git a/tests/js/package.json b/tests/js/package.json new file mode 100644 index 0000000..b0b6cdb --- /dev/null +++ b/tests/js/package.json @@ -0,0 +1,14 @@ +{ + "name": "deepgraph-dashboard-e2e", + "private": true, + "version": "0.0.0", + "type": "module", + "description": "Node test harness for the dashboard responsiveness acceptance gates (Acceptance A).", + "scripts": { + "perf": "node taxonomy_dropdown_perf.mjs", + "e2e": "node responsiveness_e2e.mjs" + }, + "dependencies": { + "playwright": "^1.49.0" + } +} diff --git a/tests/js/responsiveness_e2e.mjs b/tests/js/responsiveness_e2e.mjs new file mode 100644 index 0000000..eaac699 --- /dev/null +++ b/tests/js/responsiveness_e2e.mjs @@ -0,0 +1,217 @@ +// Acceptance A (primary) — real-browser end-to-end responsiveness test. +// +// Drives the actual dashboard in headless Chromium against a Flask server +// seeded with a production-shaped dataset (~3300 taxonomy nodes; all 9 metrics +// non-zero). Verifies: +// 1. First paint: all 9 overview stat cards show real (non-zero) numbers +// within FIRST_PAINT_BUDGET_MS. +// 2. After load: idle for IDLE_MS, then click every tab and switch language +// once — with NO main-thread long task > LONGTASK_BUDGET_MS, the page +// staying interactive, and no console error / page error throughout. +// +// The old O(n^2) taxonomy-dropdown build froze the main thread for seconds +// during the idle prefetch; this test fails on that code and passes on the fix. +// +// Env: IDLE_MS (default 60000), HEADLESS (default 1). +import { chromium } from "playwright"; +import { spawn } from "node:child_process"; +import { once } from "node:events"; +import { createInterface } from "node:readline"; +import { appendFileSync } from "node:fs"; + +// Optional: mirror report lines to a file (set TRACE_FILE) so results survive +// even if a CI wrapper truncates stdout for a long-running browser+server run. +const TRACE = process.env.TRACE_FILE; +function trace(m) { + if (TRACE) { try { appendFileSync(TRACE, `${m}\n`); } catch { /* best effort */ } } +} + +const REPO_ROOT = new URL("../../", import.meta.url).pathname; +const PY = `${REPO_ROOT}.venv/bin/python`; +const SERVER = `${REPO_ROOT}tests/js/seed_and_serve.py`; +const PORT = 5099; + +const IDLE_MS = parseInt(process.env.IDLE_MS || "60000", 10); +const FIRST_PAINT_BUDGET_MS = 2000; +const LONGTASK_BUDGET_MS = 200; + +const STAT_IDS = [ + "statPapers", "statResults", "statTaxonomy", "statContradictions", + "statInsights", "statTokens", "statExperiments", "statDeepDiscoveries", + "statCompletePapers", +]; +// Every tab in the dashboard (main nav + collapsed advanced nav). +const TABS = [ + "overview", "explore", "evidence", "generated-papers", "insights", "papers", + "paper-progress", "discoveries", "experiments", "feed", "providers", "agenda", +]; + +const fails = []; +function check(cond, msg) { + if (!cond) { fails.push(msg); console.error(`FAIL: ${msg}`); trace("FAIL " + msg); } +} +// Mirror every report line to the trace file too: the sustained chromium+server +// run can have its shell killed before stdout is captured, but appendFileSync +// survives, so results are never lost. +function report(m) { console.log(m); trace("REPORT " + m); } + +// ── start seeded server ──────────────────────────────────────────────── +const server = spawn(PY, [SERVER, String(PORT)], { cwd: REPO_ROOT }); +let serverReady = false; +const rl = createInterface({ input: server.stdout }); +rl.on("line", (l) => { if (l.startsWith("READY")) serverReady = true; console.log(`[server] ${l}`); }); +server.stderr.on("data", (d) => process.stderr.write(`[server:err] ${d}`)); + +async function waitFor(pred, timeoutMs, label) { + const t0 = Date.now(); + while (Date.now() - t0 < timeoutMs) { + if (pred()) return; + await new Promise((r) => setTimeout(r, 100)); + } + throw new Error(`timeout waiting for ${label}`); +} + +let browser; +try { + await waitFor(() => serverReady, 60000, "server READY"); + + browser = await chromium.launch({ + headless: process.env.HEADLESS !== "0", + args: ["--no-sandbox", "--disable-dev-shm-usage"], + }); + const page = await browser.newPage(); + + // Install a long-task observer BEFORE any page script runs. + await page.addInitScript(() => { + window.__longtasks = []; + window.__phase = "load"; + try { + const obs = new PerformanceObserver((list) => { + for (const e of list.getEntries()) { + window.__longtasks.push({ duration: e.duration, startTime: e.startTime, name: e.name, phase: window.__phase }); + } + }); + obs.observe({ entryTypes: ["longtask"], buffered: true }); + } catch (_) { /* longtask unsupported */ } + }); + const setPhase = (p) => page.evaluate((x) => { window.__phase = x; }, p); + + const consoleErrors = []; + const pageErrors = []; + page.on("console", (m) => { if (m.type() === "error") consoleErrors.push(m.text()); }); + page.on("pageerror", (e) => pageErrors.push(String(e))); + + // ── 1) first paint ─────────────────────────────────────────────────── + const t0 = Date.now(); + await page.goto(`http://127.0.0.1:${PORT}/`, { waitUntil: "commit" }); + await page.waitForFunction( + (ids) => ids.every((id) => { + const el = document.getElementById(id); + const txt = el && el.textContent.trim(); + return txt && txt !== "0"; + }), + STAT_IDS, + { timeout: 10000 } + ); + const firstPaintMs = Date.now() - t0; + const statValues = await page.evaluate( + (ids) => Object.fromEntries(ids.map((id) => [id, document.getElementById(id).textContent.trim()])), + STAT_IDS + ); + report(`first paint (9 cards populated): ${firstPaintMs}ms ${JSON.stringify(statValues)}`); + check(firstPaintMs <= FIRST_PAINT_BUDGET_MS, `first paint ${firstPaintMs}ms > ${FIRST_PAINT_BUDGET_MS}ms`); + for (const id of STAT_IDS) { + check(statValues[id] && statValues[id] !== "0", `${id} not a real number: ${JSON.stringify(statValues[id])}`); + } + + // ── 1b) tooltips (Acceptance B): every stat card has a real, visible + // hover explanation rendered into its title attribute. ───────────── + const tooltips = await page.$$eval(".stat-card[data-i18n-title]", (cards) => + cards.map((c) => ({ key: c.getAttribute("data-i18n-title"), title: (c.getAttribute("title") || "").trim() })) + ); + check(tooltips.length === 9, `expected 9 stat-card tooltips, found ${tooltips.length}`); + for (const t of tooltips) { + check(t.title.length > 0, `stat card ${t.key} has no visible title tooltip`); + } + report(`stat-card tooltips rendered: ${tooltips.length}/9 (e.g. ${JSON.stringify(tooltips[0])})`); + + // ── 2a) idle (lets the idle prefetch build the 3300-node dropdown) ───── + await setPhase("idle-prefetch"); + console.log(`idling ${IDLE_MS}ms (prefetch builds the taxonomy dropdown here)...`); + await page.waitForTimeout(IDLE_MS); + + // Make sure the prefetch actually ran (dropdown filled) — otherwise we + // would not be exercising the path that used to freeze. + const optionCount = await page.$eval("#evidenceNodeOptions", (s) => s.options.length).catch(() => 0); + report(`taxonomy dropdown options after idle: ${optionCount}`); + check(optionCount >= 3000, `taxonomy dropdown not prefetched (only ${optionCount} options)`); + + // ── 2b) click every tab; assert each activates quickly (interactive) ─── + await page.evaluate(() => { + const d = document.querySelector("details.advanced-nav"); + if (d) d.open = true; // reveal advanced tabs so they are clickable + }); + for (const tab of TABS) { + const sel = `[data-tab="${tab}"]`; + const btn = await page.$(sel); + check(!!btn, `tab button missing: ${tab}`); + if (!btn) continue; + await setPhase(`tab:${tab}`); + const ti = Date.now(); + await btn.click(); + try { + await page.waitForFunction( + (t) => document.getElementById("tab-" + t)?.classList.contains("active"), + tab, + { timeout: 3000 } + ); + } catch { + check(false, `tab '${tab}' did not become active within 3s (unresponsive)`); + } + const dt = Date.now() - ti; + trace(`tab '${tab}' switch took ${dt}ms`); + check(dt <= 1500, `tab '${tab}' switch took ${dt}ms (>1500ms — janky)`); + } + + // ── 2c) switch language once; assert it takes effect ─────────────────── + await setPhase("lang-switch"); + const navOverviewEn = await page.$eval('[data-i18n="nav.overview"]', (e) => e.textContent.trim()); + const tl = Date.now(); + await page.click('.lang-btn[data-lang="zh"]'); + try { + await page.waitForFunction( + (en) => document.querySelector('[data-i18n="nav.overview"]')?.textContent.trim() !== en, + navOverviewEn, + { timeout: 3000 } + ); + } catch { + check(false, "language switch did not update labels within 3s (unresponsive)"); + } + console.log(`language switch applied in ${Date.now() - tl}ms`); + + // ── 3) verdicts: long tasks + console errors ─────────────────────────── + const longtasks = await page.evaluate(() => window.__longtasks || []); + const worst = longtasks.reduce((m, t) => Math.max(m, t.duration), 0); + const over = longtasks.filter((t) => t.duration > LONGTASK_BUDGET_MS); + report(`long tasks: ${longtasks.length} total, worst ${worst.toFixed(1)}ms, ${over.length} over ${LONGTASK_BUDGET_MS}ms`); + for (const t of over) report(` >budget: ${t.duration.toFixed(0)}ms phase=${t.phase} start=${t.startTime.toFixed(0)}`); + check(over.length === 0, `${over.length} main-thread long task(s) > ${LONGTASK_BUDGET_MS}ms (worst ${worst.toFixed(1)}ms)`); + + if (consoleErrors.length) report(`console errors:\n ${consoleErrors.join("\n ")}`); + if (pageErrors.length) report(`page errors:\n ${pageErrors.join("\n ")}`); + check(consoleErrors.length === 0, `${consoleErrors.length} console error(s)`); + check(pageErrors.length === 0, `${pageErrors.length} page error(s)`); +} catch (e) { + check(false, `exception: ${e && e.stack ? e.stack : e}`); +} finally { + if (browser) await browser.close(); + server.kill("SIGINT"); + try { await once(server, "exit"); } catch { /* ignore */ } +} + +if (fails.length) { + report(`VERDICT: ${fails.length} failure(s).`); + process.exitCode = 1; +} else { + report("VERDICT: PASS — first paint fast, page stayed interactive, no long task > 200ms, no console errors."); +} diff --git a/tests/js/seed_and_serve.py b/tests/js/seed_and_serve.py new file mode 100644 index 0000000..520d70c --- /dev/null +++ b/tests/js/seed_and_serve.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +"""Seed a temp SQLite DB with a production-shaped dataset and serve the +dashboard, for the Playwright responsiveness E2E (Acceptance A). + +Seeds every one of the 9 overview metrics to a distinct non-zero value, a +~3300-node taxonomy (the size that froze the page), and papers in mixed +statuses so ``papers_processed`` (the "文献" card) is deliberately *less* than +the total ingested — exercising known trap ①. + +Usage: python seed_and_serve.py +Prints "READY " to stdout once the server is accepting connections. +""" +import json +import os +import sys +import tempfile +import threading +from pathlib import Path + +# Repo root (two levels up from tests/js/) must be importable. +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +# A throwaway DB so we never touch the real one. +_TMPDIR = tempfile.mkdtemp(prefix="dg_e2e_") +os.environ["DEEPGRAPH_DB_PATH"] = str(Path(_TMPDIR) / "e2e.db") +os.environ.pop("DEEPGRAPH_DATABASE_URL", None) +# Match a healthy prod deployment: an LLM provider is configured (so +# /api/providers reports stats instead of 500-ing). config.py reads this at +# import, so it must be set before any deepgraph module is imported. No real +# API call is made — /api/providers only reports configured-provider stats. +os.environ.setdefault("MINIMAX_API_KEY", "e2e-dummy-key") + +from db import database # noqa: E402 + +database.DATABASE_URL = "" +database.DB_PATH = Path(_TMPDIR) / "e2e.db" +database.init_db() + +TAXONOMY_NODES = 3300 +PAPERS_PROCESSED = 1500 # status in extracted/abstracted/reasoned +PAPERS_UNPROCESSED = 700 # status ingested -> total 2200, processed 1500 +RESULTS = 800 +CONTRADICTIONS = 120 +INSIGHTS = 240 +# Large enough to exercise chunked rendering of the discoveries / experiments +# tabs (each renders one card per row) — these lists are unbounded in prod. +EXPERIMENT_RUNS = 250 +DEEP_INSIGHTS = 250 +SUBMISSION_BUNDLES = 12 +TOKENS_PER_PAPER = 1234 + + +def seed(): + db = database + # Seeding only needs the COUNT(*) metrics to be right; relax FK enforcement + # so we can populate tables without standing up every referenced parent row + # (e.g. submission_bundles -> manuscript_runs). + db.execute("PRAGMA foreign_keys=OFF") + # Build a realistic taxonomy TREE (fan-out ~12), NOT 3299 direct children of + # the root. Prod taxonomies are trees; the radial graph renders a node's + # direct children, so a flat root would be an artificial stress that exists + # only in a naive fixture. The Evidence datalist still lists all ~3300 nodes + # (it flattens the whole tree), which is the real picker-size we care about. + from collections import deque + db.execute("INSERT INTO taxonomy_nodes (id, name, parent_id, depth, sort_order) VALUES (?,?,?,?,?)", + ("ml", "Machine Learning", None, 0, 0)) + created, counter, fanout = 1, 0, 12 + q = deque([("ml", 0)]) + while created < TAXONOMY_NODES and q: + parent, depth = q.popleft() + for k in range(fanout): + if created >= TAXONOMY_NODES: + break + nid = f"{parent}.n{k}" + db.execute("INSERT INTO taxonomy_nodes (id, name, parent_id, depth, sort_order) VALUES (?,?,?,?,?)", + (nid, f"Research Area {counter}", parent, depth + 1, k)) + q.append((nid, depth + 1)) + created += 1 + counter += 1 + + # Papers: processed (counted by the 文献 card) + unprocessed (NOT counted). + for i in range(PAPERS_PROCESSED): + st = ("extracted", "abstracted", "reasoned")[i % 3] + db.execute("INSERT INTO papers (id, title, status, token_cost) VALUES (?,?,?,?)", + (f"proc-{i}", f"Processed paper {i}", st, TOKENS_PER_PAPER)) + for i in range(PAPERS_UNPROCESSED): + db.execute("INSERT INTO papers (id, title, status, token_cost) VALUES (?,?,?,?)", + (f"raw-{i}", f"Ingested-only paper {i}", "ingested", 0)) + + # Results (基准结果). + for i in range(RESULTS): + db.execute( + "INSERT INTO results (paper_id, node_id, method_name, dataset_name, metric_name, metric_value) " + "VALUES (?,?,?,?,?,?)", + (f"proc-{i % PAPERS_PROCESSED}", "ml", f"method{i}", f"dataset{i % 50}", "accuracy", 0.9)) + + # Claims + contradictions (矛盾). + for i in range(CONTRADICTIONS * 2): + db.execute("INSERT INTO claims (paper_id, claim_text, claim_type) VALUES (?,?,?)", + (f"proc-{i % PAPERS_PROCESSED}", f"claim {i}", "result")) + for i in range(CONTRADICTIONS): + db.execute("INSERT INTO contradictions (claim_a_id, claim_b_id, description) VALUES (?,?,?)", + (2 * i + 1, 2 * i + 2, f"conflict {i}")) + + # Insights (研究洞见 -> insights table). + for i in range(INSIGHTS): + db.execute( + "INSERT INTO insights (node_id, insight_type, title, hypothesis) VALUES (?,?,?,?)", + ("ml", "cross_domain_bridge", f"Insight {i}", f"hypothesis {i}")) + + # Deep insights (深度发现 -> deep_insights table). + for i in range(DEEP_INSIGHTS): + db.execute("INSERT INTO deep_insights (tier, status, title) VALUES (?,?,?)", + (1 + (i % 2), "discovered", f"Discovery {i}")) + + # Experiment runs (实验运行). + for i in range(EXPERIMENT_RUNS): + db.execute("INSERT INTO experiment_runs (deep_insight_id, status) VALUES (?,?)", + (1 + (i % DEEP_INSIGHTS), "completed")) + + # Submission bundles (投稿包). + for i in range(SUBMISSION_BUNDLES): + db.execute( + "INSERT INTO submission_bundles (manuscript_run_id, bundle_format, status, bundle_path) " + "VALUES (?,?,?,?)", + (i + 1, "arxiv", "ready", f"/tmp/bundle_{i}.zip")) + + # An active research agenda so /api/research_agenda/current returns 200 + # (as a healthy prod deployment with an uploaded agenda does) rather than + # the "no agenda configured" 404. + db.execute( + "INSERT INTO research_agendas (name, description, focus_json, is_active) VALUES (?,?,?,?)", + ("Default Research Agenda", "Seeded for the responsiveness E2E.", + json.dumps(["machine learning"]), 1)) + # A latest selection so /api/research_agenda/selection/latest returns 200 + # (the agenda tab fetches it after loading the current agenda). + db.execute( + "INSERT INTO agenda_selections (agenda_id, selected_insight_id, score, rationale, status) " + "VALUES (?,?,?,?,?)", + (1, 1, 0.91, "Seeded selection for the responsiveness E2E.", "completed")) + + db.commit() + + +def main(): + port = int(sys.argv[1]) if len(sys.argv) > 1 else 5055 + seed() + + from web import app as web_app + web_app.prewarm_stats_cache() # warm the /api/stats cache against seeded data + + # Sanity: every overview metric must be non-zero before we serve. + s = web_app._stats_cache.get() + expect = { + "papers_processed": PAPERS_PROCESSED, + "results_total": RESULTS, + "taxonomy_nodes_total": TAXONOMY_NODES, + "contradictions_total": CONTRADICTIONS, + "insights_total": INSIGHTS, + "tokens_consumed": PAPERS_PROCESSED * TOKENS_PER_PAPER, + "experiment_runs_total": EXPERIMENT_RUNS, + "deep_insights_total": DEEP_INSIGHTS, + "submission_bundles_total": SUBMISSION_BUNDLES, + } + bad = {k: (s.get(k), v) for k, v in expect.items() if s.get(k) != v} + if bad: + print("SEED_MISMATCH " + repr(bad), file=sys.stderr) + sys.exit(2) + + from werkzeug.serving import make_server + srv = make_server("127.0.0.1", port, web_app.app, threaded=True) + t = threading.Thread(target=srv.serve_forever, daemon=True) + t.start() + print(f"READY {port}", flush=True) + try: + while True: + threading.Event().wait(3600) + except KeyboardInterrupt: + srv.shutdown() + + +if __name__ == "__main__": + main() diff --git a/tests/js/taxonomy_dropdown_perf.mjs b/tests/js/taxonomy_dropdown_perf.mjs new file mode 100644 index 0000000..8de4470 --- /dev/null +++ b/tests/js/taxonomy_dropdown_perf.mjs @@ -0,0 +1,133 @@ +// Acceptance A — perf microbenchmark for the taxonomy "); + + const r = await page.evaluate( + ({ STRESS_N, SLOW_N, PROD_N }) => { + const nodes = Array.from({ length: Math.max(STRESS_N, SLOW_N, PROD_N) }, (_, i) => ({ + id: `root.dl.cv.sub${i}.leaf${i}`, + name: `Research Area Node ${i} & "quoted"`, + })); + const sel = document.getElementById("s"); + + // fast == production path: DocumentFragment + new Option(), attach once. + function buildFast(count) { + const frag = document.createDocumentFragment(); + frag.appendChild(new Option("— select —", "")); + for (let i = 0; i < count; i++) { + const n = nodes[i]; + frag.appendChild(new Option(`${n.id} — ${n.name}`, n.id)); + } + sel.replaceChildren(frag); + } + function esc(s) { + return String(s) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); + } + // slow == the old O(n^2) bug: innerHTML += per node. + function buildSlow(count) { + sel.innerHTML = ``; + for (let i = 0; i < count; i++) { + const n = nodes[i]; + sel.innerHTML += ``; + } + } + function timeOnce(fn, count) { + sel.innerHTML = ""; + const t0 = performance.now(); + fn(count); + return { dt: performance.now() - t0, count: sel.options.length }; + } + // Median of several runs filters transient GC / scheduler spikes; it + // reflects the build's real per-call cost, which is what governs + // whether a user ever sees a frame drop. + function medianTime(fn, count, runs = 7) { + let last; + const ts = []; + for (let i = 0; i < runs; i++) { + last = timeOnce(fn, count); + ts.push(last.dt); + } + ts.sort((a, b) => a - b); + return { dt: ts[Math.floor(ts.length / 2)], count: last.count }; + } + + buildFast(STRESS_N); // warm up JIT / layout engine + const prod = medianTime(buildFast, PROD_N); + const stress = medianTime(buildFast, STRESS_N); + const slow = timeOnce(buildSlow, SLOW_N); // O(n^2); one run is plenty + return { prod, stress, slow }; + }, + { STRESS_N, SLOW_N, PROD_N } + ); + + const { prod, stress, slow } = r; + console.log( + `fast prod N=${PROD_N}: ${prod.dt.toFixed(2)}ms (${prod.count} options, budget < ${PROD_BUDGET_MS}ms)` + ); + console.log( + `fast stress N=${STRESS_N}: ${stress.dt.toFixed(2)}ms (${stress.count} options, budget < ${STRESS_BUDGET_MS}ms)` + ); + console.log( + `slow O(n^2) N=${SLOW_N}: ${slow.dt.toFixed(2)}ms (${slow.count} options) — reference, must be >> fast` + ); + + function check(cond, msg) { + if (!cond) { + console.error(`FAIL: ${msg}`); + failed = true; + } + } + check(prod.count === PROD_N + 1, `prod build produced ${prod.count} options, expected ${PROD_N + 1}`); + check(stress.count === STRESS_N + 1, `stress build produced ${stress.count} options, expected ${STRESS_N + 1}`); + check(prod.dt < PROD_BUDGET_MS, `prod ${prod.dt.toFixed(2)}ms >= ${PROD_BUDGET_MS}ms`); + check(stress.dt < STRESS_BUDGET_MS, `stress ${stress.dt.toFixed(2)}ms >= ${STRESS_BUDGET_MS}ms`); + check( + slow.dt > stress.dt * 5, + `harness not sensitive — slow ${slow.dt.toFixed(2)}ms not >> fast ${stress.dt.toFixed(2)}ms` + ); + + if (!failed) console.log("PASS"); +} finally { + await browser.close(); +} +if (failed) process.exitCode = 1; diff --git a/tests/test_web_app.py b/tests/test_web_app.py index fdd2850..ef8c8e8 100644 --- a/tests/test_web_app.py +++ b/tests/test_web_app.py @@ -306,6 +306,42 @@ def test_dashboard_dead_code_is_removed(self): self.assertNotIn('label": "主实验"', app_py) self.assertNotIn("function renderExperimentGroups(groups)", app_js) + def test_taxonomy_dropdown_build_is_not_quadratic(self): + """Regression guard for the main-thread freeze (Acceptance A). + + The taxonomy `` on every iteration — O(n²) — which + froze the main thread ~4s after load (when prefetchInactiveTabs ran + loadTaxonomyDropdown). The fix must batch the option strings and + assign innerHTML (or append a DocumentFragment) exactly once. + + We assert against the *source* so the O(n) property is pinned to the + real file; the wall-clock proof lives in the node perf microbench + (tests/perf/taxonomy_dropdown_perf.mjs) and the Playwright E2E. + """ + app_js = _read("web/static/js/app.js") + body = re.search( + r"async function loadTaxonomyDropdown\(\)\s*\{(?P.*?)\n\}", + app_js, + re.S, + ) + self.assertIsNotNone(body, "loadTaxonomyDropdown not found") + fn = body.group("body") + # No `someEl.innerHTML += ...` accumulation anywhere in the function: + # that is the quadratic pattern we are forbidding. + self.assertIsNone( + re.search(r"\.innerHTML\s*\+=", fn), + "loadTaxonomyDropdown must not use `innerHTML +=` (O(n^2))", + ) + # And it must build the options in a batch (join an array) before a + # single assignment / fragment append. + self.assertRegex( + fn, + r"\.join\(|createDocumentFragment|insertAdjacentHTML", + "loadTaxonomyDropdown must build options in one batch", + ) + class ExperimentGroupApiTests(unittest.TestCase): def setUp(self): diff --git a/web/static/css/style.css b/web/static/css/style.css index c9ca413..5f31218 100644 --- a/web/static/css/style.css +++ b/web/static/css/style.css @@ -416,11 +416,19 @@ header#topBar { opacity: 0; visibility: hidden; transition: opacity 0.25s ease, visibility 0s 0.25s; + /* Inactive panels are visibility:hidden but, being position:absolute, would + otherwise still be laid out — so every tab's hundreds of cards stayed in + the layout at once, making any reflow (e.g. a language switch) scan all of + them. content-visibility:hidden drops the inactive subtree from layout and + rendering entirely (it stays in the DOM, unlike display:none, so switching + back is cheap). */ + content-visibility: hidden; } .tab-panel.active { opacity: 1; visibility: visible; transition: opacity 0.25s ease, visibility 0s 0s; + content-visibility: visible; } .tab-scroll { @@ -1888,3 +1896,20 @@ header#topBar { margin-bottom: 8px; line-height: 1.5; } + +/* ── Responsiveness: skip layout/restyle for off-screen list cards ────────── + The overview tabs render hundreds of these cards. Without containment, the + browser lays them all out the moment a tab is shown (display:none → block) + and restyles every one when the document is restyled (e.g. on language + switch), producing multi-hundred-ms main-thread tasks that froze the page. + content-visibility:auto lets the browser skip rendering work for cards that + are off-screen, turning those whole-list passes into viewport-sized ones. + contain-intrinsic-size gives an off-screen placeholder height so the + scrollbar stays stable. */ +.insight-card, +.paper-row, +.opp-card, +.paper-flow-item { + content-visibility: auto; + contain-intrinsic-size: auto 160px; +} diff --git a/web/static/js/app.js b/web/static/js/app.js index ff9b369..016eb64 100644 --- a/web/static/js/app.js +++ b/web/static/js/app.js @@ -885,11 +885,22 @@ async function loadTaxonomyDropdown() { taxonomyLoaded = true; try { taxonomyFlat = await api('/api/taxonomy'); - const sel = el('evidenceNodeSelect'); - sel.innerHTML = ``; + // Populate the backing the evidence node typeahead. Build + // every `; + const opt = document.createElement('option'); + opt.value = n.id; + opt.label = `${n.id} \u2014 ${n.name}`; + frag.appendChild(opt); } + dl.replaceChildren(frag); } catch (e) { taxonomyLoaded = false; console.error('Taxonomy dropdown error:', e); @@ -1061,7 +1072,7 @@ function renderPapers() { return; } - list.innerHTML = filtered.map(p => { + setListHtmlChunked(list, filtered.map(p => { const sc = p.status ? 's-' + p.status : ''; return `
@@ -1074,7 +1085,7 @@ function renderPapers() {
${esc(tr('common.loadingDetails'))}
`; - }).join(''); + })); } // ── Paper Progress Tabs ───────────────────────────────────────────── @@ -1127,7 +1138,7 @@ function renderPaperPipelineRows(rows) { list.innerHTML = `

${esc(tr('empty.paperProgress'))}

`; return; } - list.innerHTML = papers.map(item => ` + setListHtmlChunked(list, papers.map(item => `
${esc(trunc(item.title || item.id || tr('common.untitledPaper'), 120))}
@@ -1140,7 +1151,7 @@ function renderPaperPipelineRows(rows) {
${item.stage_last_error ? `
${esc(trunc(item.stage_last_error, 240))}
` : ''}
- `).join(''); + `)); } function renderPaperGenerationRows(jobs, manuscripts) { @@ -1254,7 +1265,7 @@ function renderGeneratedPapers(manuscripts) { list.innerHTML = `

${esc(tr('empty.generated'))}

`; return; } - list.innerHTML = rows.map(row => { + setListHtmlChunked(list, rows.map(row => { const preview = row.deep_insight_id ? paperPreviewHref(row.deep_insight_id, 'index') : ''; return `
@@ -1279,7 +1290,7 @@ function renderGeneratedPapers(manuscripts) {
`; - }).join(''); + })); } async function loadGeneratedPapersTab() { @@ -1385,7 +1396,7 @@ async function loadInsightsTab() { return; } - list.innerHTML = insights.map(ins => { + setListHtmlChunked(list, insights.map(ins => { const color = typeColors[ins.insight_type] || '#888'; let papers = []; try { papers = JSON.parse(ins.supporting_papers || '[]'); } catch(e) {} @@ -1415,7 +1426,7 @@ async function loadInsightsTab() { `; - }).join(''); + })); } catch (e) { insightsLoaded = false; console.error('Insights tab error:', e); @@ -1558,7 +1569,7 @@ function renderDiscoveries(discoveries) { return; } - list.innerHTML = visible.map(d => { + setListHtmlChunked(list, visible.map(d => { const isTier1 = d.tier === 1; const tierColor = isTier1 ? '#c4453a' : '#2e86ab'; const tierLabel = isTier1 ? tr('discoveries.tier1') : tr('discoveries.tier2'); @@ -1657,7 +1668,7 @@ function renderDiscoveries(discoveries) { ${d.evidence_summary ? `
${esc(tr('label.evidence'))} ${esc(trunc(d.evidence_summary, 250))}
` : ''}
${esc(tr('label.mode'))} ${esc(tr('label.fixedAutomaticPipeline'))}
`; - }).join(''); + })); } // ── Experiments Tab ─────────────────────────────────────────────────── @@ -1941,7 +1952,7 @@ function renderExperimentGroupsV2(groups) { return; } - list.innerHTML = groups.map(group => { + setListHtmlChunked(list, groups.map(group => { const insight = group.insight || {}; const auto = group.auto_job || {}; const currentRun = group.canonical_run || group.latest_run || null; @@ -2002,7 +2013,7 @@ function renderExperimentGroupsV2(groups) { ${previewUrl ? `` : ''} `; - }).join(''); + })); } function jsonPreview(obj, emptyText = 'None') { @@ -2168,19 +2179,46 @@ function runWhenIdle(fn, timeout = 700) { } } +// Render a large list of pre-built HTML strings without janking the main +// thread. A single `container.innerHTML = parts.join('')` of a few hundred +// complex cards parses/builds the whole subtree in one ~hundreds-of-ms task; +// during the idle prefetch several such renders ran back-to-back and made the +// page feel frozen. Here we drop the first chunk in synchronously (so the tab +// is not empty) and append the rest across idle callbacks. insertAdjacentHTML +// only parses the appended slice, so total work stays O(n) — never the O(n^2) +// of `innerHTML +=`. Items use inline onclick handlers, so no post-render +// event binding is needed. +function setListHtmlChunked(container, parts, chunk = 25) { + container.innerHTML = ''; + if (!parts || !parts.length) return; + // Cancel any still-pending chunked render of an earlier call (e.g. when a + // filter re-renders the same list before the previous run finished). + const token = (container._chunkToken || 0) + 1; + container._chunkToken = token; + container.insertAdjacentHTML('beforeend', parts.slice(0, chunk).join('')); + let i = chunk; + const step = () => { + if (container._chunkToken !== token) return; // superseded + container.insertAdjacentHTML('beforeend', parts.slice(i, i + chunk).join('')); + i += chunk; + if (i < parts.length) runWhenIdle(step, 50); + }; + if (i < parts.length) runWhenIdle(step, 50); +} + async function prefetchInactiveTabs() { if (inactiveTabsPrefetched) return; inactiveTabsPrefetched = true; + // Only prewarm cheap things during idle. Eagerly rendering every tab here + // built ~25k DOM nodes with zero user interaction, which is exactly the + // "loaded then frozen" symptom: each heavy list render is a long main-thread + // task, and the resulting giant DOM makes later layout/restyle slow too. + // The heavy tabs already lazy-load on first activation (onTabActivated), and + // their renders are chunked, so deferring them keeps idle responsive and the + // DOM small until a tab is actually viewed. const tasks = [ - () => loadTaxonomyDropdown(), - () => loadGeneratedPapersTab(), - () => loadInsightsTab(), - () => loadPapers(), - () => loadPaperProgressTab(), - () => loadDiscoveriesTab(), - () => loadExperimentsTab(), - () => loadProviders(), + () => loadTaxonomyDropdown(), // builds a (~30ms); keeps Evidence instant ]; for (const task of tasks) { diff --git a/web/static/js/i18n.js b/web/static/js/i18n.js index 7f44e96..b390877 100644 --- a/web/static/js/i18n.js +++ b/web/static/js/i18n.js @@ -838,6 +838,12 @@ } let currentLanguage = preferredLanguage(); + // Declare the document language once, up front (good for a11y / browser + // translation). We deliberately do NOT rewrite documentElement.lang on every + // toggle: changing this inherited, style-affecting attribute forces a + // full-document style recalc — a ~1.5s main-thread freeze on this dashboard's + // large DOM — and no CSS here keys off :lang()/[lang], so it buys nothing. + try { document.documentElement.lang = currentLanguage === "zh" ? "zh-CN" : "en"; } catch (_) {} function t(key, vars) { const table = I18N[currentLanguage] || I18N.en; @@ -861,7 +867,8 @@ scope.querySelectorAll("[data-i18n-title]").forEach((node) => { node.setAttribute("title", t(node.dataset.i18nTitle)); }); - document.documentElement.lang = currentLanguage === "zh" ? "zh-CN" : "en"; + // NOTE: documentElement.lang is set once at init (see preferredLanguage + // above), NOT here — rewriting it per toggle forces a full-document restyle. document.querySelectorAll("[data-lang]").forEach((node) => { node.classList.toggle("active", node.dataset.lang === currentLanguage); }); diff --git a/web/templates/index.html b/web/templates/index.html index 03cab21..b99a1e5 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -226,9 +226,14 @@

Benchmark Matrix

- + + + Select a leaf research area to view the benchmark matrix.