From 632cd2d8bbeaa5fbaa021cc034005b8182cd6d52 Mon Sep 17 00:00:00 2001 From: Andy Aragon Date: Mon, 8 Jun 2026 02:48:18 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20compile()=20=E2=80=94=20the=20ergonomic?= =?UTF-8?q?=20.each=20path,=20codegen'd=20to=20bindColumns=20speed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `query.compile(body)` reads an `.each`-style callback's own source, rewrites each `e..` to direct typed-array column indexing, and codegens a specialized per-archetype loop — so the readable accessor syntax lands near eachChunk/bindColumns (~1.6 ns/entity, ~6x faster than the proxy .each) while you keep writing `e.position.x += e.velocity.dx * ctx.dt`. Unlike bindColumns it PRESERVES reactivity: a written component is recorded in the write log exactly as the accessor setter would (component-granular, gated on tracking.active, so free when no .changed/observer consumer exists). The tracking branch is hoisted OUT of the loop, so the common no-consumer path is a clean kernel V8 compiles like bindColumns. Pure speedup that can never change results: the analyzer is conservative and falls back to the unchanged proxy .each for anything it can't prove safe — non-straight-line bodies, strings/comments/regex literals, destructuring-assign, non-numeric-scalar fields (vec/bool/eid/bigint/rich), non-required components, any `e` use beyond `e.comp.field`, per-row ctx writes, row-filtered queries, `__`-prefixed locals (no generated-name collision), and runtimes that block `new Function`. Every generated identifier is `__`-prefixed and the body is rejected if it contains `__`, so a user local can never silently shadow one. A scratch pre-flight runs the runner once on throwaway arrays, so a body that closes over an outer variable falls back instead of crashing the first frame. Correctness is property-tested byte-identical to proxy .each under random spawn/despawn/write/growth churn, with and without a .changed consumer. - new: packages/core/src/query/compile-each.ts (the source transform) - LiveQuery.compile() + public Query/LooseQuery.compile type - bench bucket (ecsia compile) + CI regression-ratio lane (≤1.0x bitECS) - docs: performance.md "Compile the ergonomic path" section - bundle budget ratcheted (+~2.3KB gz: compile lives in the query kernel) --- bench/iterate.ts | 35 ++ bench/regression-baseline.json | 3 +- bench/test/regression.bench.test.ts | 9 +- bundle-budget.json | 12 +- packages/core/src/internal.ts | 5 +- packages/core/src/query/compile-each.ts | 296 +++++++++++++++++ packages/core/src/query/index.ts | 2 + packages/core/src/query/live-query.ts | 233 +++++++++++++ .../core/test/compile-each.property.test.ts | 187 +++++++++++ packages/core/test/compile-each.test.ts | 313 ++++++++++++++++++ packages/schema/src/index.ts | 13 + scripts/bench-report.mjs | 30 +- website/guide/performance.md | 55 ++- 13 files changed, 1175 insertions(+), 18 deletions(-) create mode 100644 packages/core/src/query/compile-each.ts create mode 100644 packages/core/test/compile-each.property.test.ts create mode 100644 packages/core/test/compile-each.test.ts diff --git a/bench/iterate.ts b/bench/iterate.ts index 2ceae4d..6edd216 100644 --- a/bench/iterate.ts +++ b/bench/iterate.ts @@ -138,6 +138,41 @@ export function makeEcsiaPinnedIter(n: number): IterCase { } } +// The compiled-ergonomic variant: the SAME readable `.each` body the `ecsia` bucket runs, but handed to +// q.compile, which rewrites `e..` to direct column indexing and codegens the bindColumns- +// shape loop. It should land near ecsia-pinned while keeping the proxy-path syntax — the point of the +// bucket is to show the ergonomic path no longer pays the per-row proxy tax. +export function makeEcsiaCompiledIter(n: number): IterCase { + const Position = defineComponent({ x: 'f32', y: 'f32' }, { name: 'position' }) + const Velocity = defineComponent({ dx: 'f32', dy: 'f32' }, { name: 'velocity' }) + const world = createWorld({ components: [Position, Velocity], maxEntities: nextPow2(n) }) + let first = 0 as unknown as ReturnType + for (let i = 0; i < n; i++) { + const h = world.spawnWith(Position, Velocity) + if (i === 0) first = h + const v = world.entity(h).write(Velocity) as { dx: number; dy: number } + v.dx = 1 + v.dy = 0.5 + } + const q = world.query(write(Position), write(Velocity)) as unknown as { + compile(b: (e: { position: { x: number; y: number }; velocity: { dx: number; dy: number } }, ctx: Ctx) => void): (ctx: Ctx) => void + } + const run = q.compile<{ dt: number }>((e, ctx) => { + e.position.x += e.velocity.dx * ctx.dt + e.position.y += e.velocity.dy * ctx.dt + }) + const ctx = { dt: DT } + return { + name: 'ecsia-compiled', + step() { + run(ctx) + }, + sampleX() { + return (world.entity(first).read(Position) as { x: number }).x + }, + } +} + interface MiniEntity { position: { x: number; y: number } velocity: { dx: number; dy: number } diff --git a/bench/regression-baseline.json b/bench/regression-baseline.json index 22bce4e..dd8a7d3 100644 --- a/bench/regression-baseline.json +++ b/bench/regression-baseline.json @@ -1,7 +1,8 @@ { - "_comment": "CI bench regression ceilings — MAX allowed ns/entity RATIO of each ecsia path vs a SAME-RUN bitECS control (so machine drift cancels). A real regression (e.g. codegen breaking → bindColumns deopts from ~0.72x to ~1.5x) trips the ceiling; ~10% run-to-run noise does not. RATCHET: when a path durably improves, lower its ceiling here. Measured 2026-06-08: bindColumns ~0.72x, eachChunk ~1.08x, each ~7.4x.", + "_comment": "CI bench regression ceilings — MAX allowed ns/entity RATIO of each ecsia path vs a SAME-RUN bitECS control (so machine drift cancels). A real regression (e.g. codegen breaking → bindColumns deopts from ~0.72x to ~1.5x) trips the ceiling; ~10% run-to-run noise does not. RATCHET: when a path durably improves, lower its ceiling here. Measured 2026-06-08: bindColumns ~0.72x, compile ~0.75x (the ergonomic body, codegen'd — should track bindColumns), eachChunk ~1.08x, each ~7.4x.", "ratiosVsBitecs": { "bindColumns": 0.9, + "compile": 1.0, "eachChunk": 1.3, "each": 9.0 } diff --git a/bench/test/regression.bench.test.ts b/bench/test/regression.bench.test.ts index 3410fb8..eebfa4b 100644 --- a/bench/test/regression.bench.test.ts +++ b/bench/test/regression.bench.test.ts @@ -9,7 +9,13 @@ import { describe, expect, test } from 'vitest' import { readFileSync } from 'node:fs' import { fileURLToPath } from 'node:url' -import { makeEcsiaIter, makeEcsiaCursorIter, makeEcsiaPinnedIter, makeBitEcsIter } from '../iterate.js' +import { + makeEcsiaIter, + makeEcsiaCursorIter, + makeEcsiaPinnedIter, + makeEcsiaCompiledIter, + makeBitEcsIter, +} from '../iterate.js' import type { IterCase } from '../iterate.js' const ENABLED = process.env['BENCH_REGRESSION'] === '1' @@ -51,6 +57,7 @@ describe.skipIf(!ENABLED)('bench regression — ecsia/bitECS ns/entity ratios un test.each([ ['bindColumns', makeEcsiaPinnedIter as (n: number) => CtxIter], + ['compile', makeEcsiaCompiledIter as (n: number) => CtxIter], ['eachChunk', makeEcsiaCursorIter as (n: number) => CtxIter], ['each', makeEcsiaIter as (n: number) => CtxIter], ])('%s ratio vs bitECS stays under its ceiling', (name, make) => { diff --git a/bundle-budget.json b/bundle-budget.json index 4bdc86d..987ac48 100644 --- a/bundle-budget.json +++ b/bundle-budget.json @@ -2,16 +2,16 @@ "_comment": "Bundle-size budgets: max min+gzip BYTES per tree-shaken entry (scripts/size-check.mjs). The honest \"lean install\" number. Ratchet DOWN with `node scripts/size-check.mjs --update` when a build shrinks; CI fails if a build grows past budget*1.03.", "budgets": { "kernel": { - "gzip": 39987, - "min": 126142 + "gzip": 42285, + "min": 131819 }, "core-min": { - "gzip": 30154, - "min": 97265 + "gzip": 32443, + "min": 102905 }, "full-umbrella": { - "gzip": 54587, - "min": 171999 + "gzip": 56884, + "min": 177652 } } } diff --git a/packages/core/src/internal.ts b/packages/core/src/internal.ts index 2af36d1..f78f161 100644 --- a/packages/core/src/internal.ts +++ b/packages/core/src/internal.ts @@ -104,7 +104,7 @@ export type { ObserverDeps, } from './reactivity/index.js' -export { QueryEngine, LiveQuery, SparseSetU32, compileQuery } from './query/index.js' +export { QueryEngine, LiveQuery, SparseSetU32, compileQuery, analyzeEachBody } from './query/index.js' export type { QueryEngineDeps, LiveQueryDeps, @@ -117,6 +117,9 @@ export type { RowFilterTerm, ValueRole, Word, + EachPlan, + EachViewSpec, + EachAnalyzeDeps, } from './query/index.js' // Low-level schema inference helpers re-exported through core (not on the curated public surface; the diff --git a/packages/core/src/query/compile-each.ts b/packages/core/src/query/compile-each.ts new file mode 100644 index 0000000..c8a2e91 --- /dev/null +++ b/packages/core/src/query/compile-each.ts @@ -0,0 +1,296 @@ +// Ergonomic-path compiler: turn an `.each`-style body `(e, ctx) => { e.position.x += e.velocity.dx * ctx.dt }` +// into the same codegen'd raw-column loop `bindColumns` runs — so the readable accessor syntax pays the +// proxy-per-row tax (~10 ns/entity) only when it must, and lands near `eachChunk` (~1.5 ns/entity) when +// it can. The transform reads the callback's own `.toString()` and rewrites `e..` to direct +// column indexing, exactly the rewrite becsy performs. +// +// CORRECTNESS over speed, always. This analyzer is deliberately CONSERVATIVE: it compiles only +// straight-line numeric-scalar bodies and bails — returning null, so the caller runs the unchanged proxy +// `.each` — on ANYTHING it does not fully understand. An imperfect transform can therefore never corrupt; +// the worst case is a missed optimization. The bail set is wide on purpose: +// - any control flow / short-circuit / nested function (`if for while switch case ? && || return +// continue break => function`): a conditional write would make the after-body `trackWrite` over-report +// `.changed()`, and `return`/`continue` mean per-row skip (proxy semantics) not loop-exit. Straight-line +// only ⇒ every write always runs ⇒ the gated `trackWrite` block provably matches what was written. +// - strings / template literals / comments: they could hide a fake `e.position.x` that string-replacement +// would corrupt. +// - any `e` use that is not exactly `e..`: `e.handle`, `e[expr]`, +// `e.comp` bare, passing `e` along, a non-scalar (vec/bool/eid/bigint/rich) field, or a component the +// query does not REQUIRE (may be absent from a future matching archetype). +// - any non-read `ctx` use: per-row ctx mutation is rare and left to the proxy. +// +// Reactivity is PRESERVED: every component the body writes gets one gated `trackWrite(handleIndex(row), id)` +// emitted after the row body — the same component-granular record the scalar accessor setter makes — so +// `.changed()` filters and observers fire identically. The gate (`if (tracking.active)`) makes it free when +// no consumer exists, matching the accessor's own write-path fast-out. +// +// SECURITY: the generated source is built from the body's own `.toString()` plus component/field NAMES and +// integer ids drawn from the registered schema — never interpolated external strings. Same surface as the +// code the caller already wrote. + +import type { ComponentDef, FieldDescriptor, Schema } from '@ecsia/schema' + +/** One bound column the compiled loop reads/writes: which field of which component, and its column index. */ +export interface EachViewSpec { + readonly def: ComponentDef + readonly field: string + /** Column index within the component's ColumnSet (ctor-backed fields only — the bindColumns rule). */ + readonly colIndex: number +} + +export interface EachPlan { + /** Views in binding order; `views[k]` in the generated source is this spec's column. */ + readonly specs: readonly EachViewSpec[] + /** Component ids the body writes (each gets a gated per-row trackWrite). */ + readonly writtenIds: readonly number[] + /** + * The generated factory source: `(args) => { ...; return (ctx) => { ...loop... } }`. `args` carries + * `{ views, rows, trackWrite, tracking, handleIndex, meta }`. Recompiled per archetype by the caller + * (via the shared codegen path) so each runner is a specialized V8 singleton. + */ + readonly factorySource: string +} + +/** Lookups the analyzer needs from the query/world, kept injectable so the module imports no core internals. */ +export interface EachAnalyzeDeps { + /** Component the query exposes under `e.` (its value term), or undefined if not a value term. */ + defByName(name: string): ComponentDef | undefined + /** Registered component id, or undefined if unregistered. */ + idOf(def: ComponentDef): number | undefined + /** True iff the component is REQUIRED by the query (present in every matching archetype). */ + isRequired(def: ComponentDef): boolean +} + +// Statements we refuse to compile (see header). Word-boundaried for keywords; literal for operators. A +// match anywhere in the BODY BLOCK (not the outer signature — its own `=>` is fine) forces the proxy +// fallback. Local `const`/`let` are allowed: they keep the body straight-line. A nested `=>`/`function` +// (closures), control flow, short-circuit, and spread all bail. +const BANNED = /\b(?:if|for|while|switch|case|do|return|continue|break|function|yield|await|new|delete|void|throw|in|instanceof|typeof)\b|=>|\?|&&|\|\||\.\.\.|`/ +// EVERY identifier the generated loop introduces is `__`-prefixed (`__v0`, `__trackWrite`, `__ctx`, …). +// So a single guard — reject ANY double-underscore in the body — makes a user local collision impossible: +// it can never shadow a generated name (which would silently corrupt under sloppy-mode `new Function`). +const RESERVED = /__/ +// A destructuring-assignment target writes a column WITHOUT a trailing assign op, so write-detection would +// miss it (silent `.changed()` divergence). `]=`/`}=` is the LHS-bracket signature; bail on it. +const DESTRUCTURE_ASSIGN = /[\]}]\s*=(?!=)/ +// A regex literal could spell a real `e.comp.field` that string-rewriting would corrupt. A `/` right after +// an operator/open-bracket starts a regex (division's `/` follows a value: identifier/`)`/`]`/number), so +// this flags regex literals without bailing on division. +const REGEX_LITERAL = /[=(,:[!&|?{;+\-*%<>~^]\s*\// + +const ASSIGN_OPS = ['>>>=', '<<=', '>>=', '**=', '&&=', '||=', '??=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^='] + +function escapeRe(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') +} + +// A field is codegen-eligible iff it is a single-slot column whose codec is the identity on numbers — +// i.e. a plain float/int column (f32, i32, u8, ...). bool/eid/bigint/staticString/rich all fail the +// identity probe (or carry no column) and route to the proxy, where their non-identity codec runs. +function scalarColIndex(def: ComponentDef, field: string): number | null { + let colIndex = 0 + for (const f of def.fields as readonly FieldDescriptor[]) { + if (f.name === field) { + if (f.ctor === null || f.stride !== 1 || f.rich !== undefined) return null + if (f.decode(0) !== 0 || f.decode(1) !== 1 || f.encode(0) !== 0 || f.encode(1) !== 1) return null + return colIndex + } + if (f.ctor !== null) colIndex += 1 + } + return null +} + +interface Params { + readonly eParam: string + readonly ctxParam: string | null + readonly block: string +} + +/** Pull the (e, ctx) parameter names and the body block out of a function's source. Returns null for any + * shape we do not handle (destructured/defaulted/rest params, more than two params). */ +function parseFn(src: string): Params | null { + const s = src.trim() + let paramsRaw: string + let block: string + const arrowAt = s.indexOf('=>') + const braceAt = s.indexOf('{') + if (arrowAt !== -1 && (braceAt === -1 || arrowAt < braceAt)) { + let head = s.slice(0, arrowAt).trim() + if (head.startsWith('async')) head = head.slice(5).trim() + paramsRaw = head.replace(/^\(/, '').replace(/\)$/, '') + let body = s.slice(arrowAt + 2).trim() + if (body.startsWith('{')) { + const end = body.lastIndexOf('}') + if (end === -1) return null + block = body.slice(1, end) + } else { + // concise arrow body: a single expression. Wrap as a statement so writes (the common case) run. + block = body.replace(/;\s*$/, '') + ';' + } + } else { + const lp = s.indexOf('(') + if (lp === -1) return null + const rp = s.indexOf(')', lp) + if (rp === -1) return null + paramsRaw = s.slice(lp + 1, rp) + const bo = s.indexOf('{', rp) + if (bo === -1) return null + const be = s.lastIndexOf('}') + if (be <= bo) return null + block = s.slice(bo + 1, be) + } + const params = paramsRaw.trim() === '' ? [] : paramsRaw.split(',').map((p) => p.trim()) + if (params.length === 0 || params.length > 2) return null + for (const p of params) if (!/^[A-Za-z_$][\w$]*$/.test(p)) return null // no destructure/default/rest + return { eParam: params[0] as string, ctxParam: params[1] ?? null, block } +} + +/** + * Analyze an `.each` body. Returns a compile plan, or null to signal "run the proxy `.each` unchanged". + * Pure and side-effect-free (it reads `.toString()` and the schema only), so it is trivially testable. + */ +export function analyzeEachBody(body: (...args: never[]) => unknown, deps: EachAnalyzeDeps): EachPlan | null { + const src = String(body) + const parsed = parseFn(src) + if (parsed === null) return null + const { eParam, ctxParam, block } = parsed + // Hazard checks run on the BLOCK only — the outer arrow's `=>` and the params are not part of it. + if ( + BANNED.test(block) || + RESERVED.test(block) || + DESTRUCTURE_ASSIGN.test(block) || + REGEX_LITERAL.test(block) || + block.includes('//') || + block.includes('/*') || + /['"]/.test(block) + ) { + return null + } + + // --- map every e.. to a view; bail on any other use of e --------------------------- + const specs: EachViewSpec[] = [] + const specIndex = new Map() // "comp.field" -> views[] index (dedups repeat accesses) + const writtenIds = new Set() + const idByName = new Map() + + const accessRe = new RegExp(escapeRe(eParam) + '\\s*\\.\\s*([A-Za-z_$][\\w$]*)\\s*\\.\\s*([A-Za-z_$][\\w$]*)', 'g') + let transformed = '' + let last = 0 + let m: RegExpExecArray | null + while ((m = accessRe.exec(block)) !== null) { + const compName = m[1] as string + const field = m[2] as string + const def = deps.defByName(compName) + if (def === undefined || !deps.isRequired(def)) return null + const id = deps.idOf(def) + if (id === undefined) return null + const colIndex = scalarColIndex(def, field) + if (colIndex === null) return null + + // The char right after the field must not extend the access (`.`, `[`, `(`) — that would be a deeper + // member/index/call we do not model (`e.pos.x.foo`, `e.pos.x()`), so the whole body bails. + const after = block[accessRe.lastIndex] + if (after === '.' || after === '[' || after === '(') return null + + const key = compName + '.' + field + let k = specIndex.get(key) + if (k === undefined) { + k = specs.length + specs.push({ def, field, colIndex }) + specIndex.set(key, k) + idByName.set(compName, id) + } + + // Write detection: an assignment operator (or pre/post ++/--) on this access marks the component + // written. Straight-line bodies guarantee the write always runs, so the after-body trackWrite is exact. + const rest = block.slice(accessRe.lastIndex).replace(/^\s+/, '') + const postInc = rest.startsWith('++') || rest.startsWith('--') + const preInc = /(?:\+\+|--)\s*$/.test(block.slice(0, m.index)) // prefix ++/-- (any whitespace) + let isWrite = postInc || preInc + if (!isWrite) { + for (const op of ASSIGN_OPS) { + if (rest.startsWith(op)) { + isWrite = true + break + } + } + // plain `=` but not `==`/`===`/`=>` (`=>` already banned globally) + if (!isWrite && rest.startsWith('=') && rest[1] !== '=') isWrite = true + } + if (isWrite) writtenIds.add(id) + + transformed += block.slice(last, m.index) + '__v' + k + '[__i]' + last = accessRe.lastIndex + } + transformed += block.slice(last) + if (specs.length === 0) return null + + // Any remaining bare mention of e is something we did not model → bail. + if (new RegExp('\\b' + escapeRe(eParam) + '\\b').test(transformed)) return null + + // --- hoist ctx. reads out of the loop ----------------------------------------------------- + let preamble = '' + if (ctxParam !== null) { + const ctxRe = new RegExp(escapeRe(ctxParam) + '\\s*\\.\\s*([A-Za-z_$][\\w$]*)', 'g') + const hoisted = new Map() + let out = '' + let lc = 0 + let cm: RegExpExecArray | null + while ((cm = ctxRe.exec(transformed)) !== null) { + const prop = cm[1] as string + const after = transformed.slice(ctxRe.lastIndex).replace(/^\s+/, '') + // a ctx write (`ctx.x =`, `ctx.x +=`, `ctx.x++`) is not a hoistable read → bail to proxy. + if (after.startsWith('++') || after.startsWith('--')) return null + if (after.startsWith('=') && after[1] !== '=') return null + for (const op of ASSIGN_OPS) if (after.startsWith(op)) return null + const local = '__c_' + prop + if (!hoisted.has(prop)) hoisted.set(prop, local) + out += transformed.slice(lc, cm.index) + local + lc = ctxRe.lastIndex + } + out += transformed.slice(lc) + // a bare ctx mention (computed access, passed along) left over → bail. + if (new RegExp('\\b' + escapeRe(ctxParam) + '\\b').test(out)) return null + transformed = out + for (const [prop, local] of hoisted) preamble += 'const ' + local + ' = __ctx.' + prop + ';' + } + + // --- assemble the factory source ----------------------------------------------------------------- + // EVERY introduced identifier is `__`-prefixed (columns `__v0`, seam `__trackWrite`/`__handleIndex`/ + // `__tracking`, runner param `__ctx`, …). The RESERVED guard rejected any `__` in the body, so none of + // these can collide with — and silently shadow — a user local. + const viewDecls = specs.map((_, k) => 'const __v' + k + ' = __views[' + k + '];').join('') + const writeIds = [...writtenIds] + const cleanLoop = 'for(let __i=0;__i<__count;__i++){' + transformed + '}' + + // When the body writes a component, reactivity must observe it — but ONLY when a `.changed`/observer + // consumer is registered (`__tracking.active`). The branch is hoisted OUT of the loop: the common + // no-consumer path runs `cleanLoop` (zero trackWrite references, so V8 compiles it like bindColumns), + // and the tracked path runs a second loop that records one component-granular write per row — matching + // the accessor setter exactly. `arch.rows` is re-read per frame (it is reassigned on growth). + let loopSource: string + if (writeIds.length > 0) { + const trackedLoop = + 'const __rows=__arch.rows;' + + 'for(let __i=0;__i<__count;__i++){' + + transformed + + 'const __ix=__handleIndex(__rows[__i]);' + + writeIds.map((id) => '__trackWrite(__ix,' + id + ');').join('') + + '}' + loopSource = 'if(__tracking.active){' + trackedLoop + '}else{' + cleanLoop + '}' + } else { + loopSource = cleanLoop + } + + const factorySource = + '(__args)=>{' + + 'const __views=__args.views,__arch=__args.arch,__trackWrite=__args.trackWrite,__tracking=__args.tracking,__handleIndex=__args.handleIndex,__meta=__args.meta;' + + viewDecls + + 'return (__ctx)=>{' + + preamble + + 'const __count=__meta.count;' + + loopSource + + '};}' + + return { specs, writtenIds: writeIds, factorySource } +} diff --git a/packages/core/src/query/index.ts b/packages/core/src/query/index.ts index faca204..28fa409 100644 --- a/packages/core/src/query/index.ts +++ b/packages/core/src/query/index.ts @@ -9,6 +9,8 @@ export type { QueryEngineDeps } from './engine.js' export { LiveQuery } from './live-query.js' export type { LiveQueryDeps, PooledElement, ReactivityQueryHooks } from './live-query.js' +export { analyzeEachBody } from './compile-each.js' +export type { EachPlan, EachViewSpec, EachAnalyzeDeps } from './compile-each.js' export { SparseSetU32 } from './sparse-set.js' diff --git a/packages/core/src/query/live-query.ts b/packages/core/src/query/live-query.ts index 88288df..67051fd 100644 --- a/packages/core/src/query/live-query.ts +++ b/packages/core/src/query/live-query.ts @@ -26,9 +26,47 @@ import type { CompiledQuery, CompiledValueTerm, RowFilterTerm } from './compile. import type { SparseSetU32 } from './sparse-set.js' import { buildPinnedRunner } from './codegen.js' import type { PinnedFactory } from './codegen.js' +import { analyzeEachBody } from './compile-each.js' const NO_HANDLE = 0xffffffff as EntityHandle +/** The reactivity write-path seam a compiled loop needs, read off an accessor singleton's binding. */ +interface WorldSeam { + trackWrite(index: number, componentId: number, fieldIndex?: number): void + handleIndex(handle: EntityHandle): number + readonly tracking: { readonly active: boolean } +} + +/** Stand-in seam for a read-only compiled body (it is never called; keeps the runner total). */ +const NOOP_SEAM: WorldSeam = { + trackWrite: () => {}, + handleIndex: (h) => h as unknown as number, + tracking: { active: false }, +} + +/** The bundle a generated `compile` factory destructures: bound columns + the reactivity seam + count. */ +interface CompileArgs { + readonly views: readonly TypedArray[] + readonly arch: Archetype + readonly trackWrite: (index: number, componentId: number, fieldIndex?: number) => void + readonly tracking: { readonly active: boolean } + readonly handleIndex: (handle: EntityHandle) => number + readonly meta: { readonly count: number } +} + +/** Recover the ComponentDef a query term carries (`write(C)`/`read(C)` wrap it as `.c`; a bare def is C). */ +function termComponentDef(t: unknown): ComponentDef | null { + if (t === null || typeof t !== 'object') return null + const wrapped = (t as { c?: unknown }).c + if (wrapped !== undefined && wrapped !== null && (wrapped as { fields?: unknown }).fields !== undefined) { + return wrapped as ComponentDef + } + if ((t as { fields?: unknown }).fields !== undefined && typeof (t as { name?: unknown }).name === 'string') { + return t as ComponentDef + } + return null +} + /** The pooled element handed to each callback: value props (per value term) + the entity handle. */ type PooledElement = Record & { handle: EntityHandle } @@ -587,6 +625,201 @@ export class LiveQuery { } } + /** + * Compile an ergonomic `.each` body into the fast column loop `bindColumns` runs — without you naming + * columns or restating the math. `compile` reads the callback's own source, rewrites `e..` + * to direct typed-array indexing, and codegens a specialized per-archetype loop. The result keeps the + * readable accessor syntax but lands near `eachChunk` (~1.5 ns/entity) instead of paying the per-row + * proxy tax (~10 ns/entity). + * + * Unlike `bindColumns`, this path PRESERVES reactivity: a component the body writes is recorded in the + * write log exactly as the accessor setter would, so `.changed()` filters and observers see it — for + * free when no consumer is registered (the same gate the accessor uses), at write-log cost when one is. + * + * It is a pure SPEEDUP: the analyzer is conservative and falls back to the unchanged proxy `.each` (so + * results are always identical) whenever it cannot prove the rewrite safe — a non-straight-line body + * (`if`/`?`/`&&`/`return`/loops/nested fns), a string/comment/template, a non-numeric-scalar field + * (vec/bool/eid/bigint/rich), a component the query does not REQUIRE, any `e` use other than + * `e..`, a per-row `ctx` write, a row-filtered query, or a runtime that blocks `new + * Function` (strict CSP). Call it ONCE and reuse the returned runner per frame. + * + * ```ts + * const run = q.compile<{ dt: number }>((e, ctx) => { + * e.position.x += e.velocity.dx * ctx.dt + * e.position.y += e.velocity.dy * ctx.dt + * }) + * run({ dt: 1 / 60 }) // per frame — same result as q.each(e => ...), faster + * ``` + */ + compile(body: (e: PooledElement, ctx: Ctx) => void): (ctx: Ctx) => void { + const proxyRun = (ctx: Ctx): void => this.each((e) => body(e, ctx)) + // A flat compiled loop cannot skip rows; row-filtered queries stay on the proxy (which can). + if (this.compiled.rowFilters.length !== 0) return proxyRun + + const valueKeys = new Set(this.compiled.valueTerms.map((vt) => vt.key)) + const defByName = new Map>() + for (const t of this.terms) { + const def = termComponentDef(t) + if (def !== null && valueKeys.has(def.name)) defByName.set(def.name, def) + } + const requiredIds = this.#requiredComponentIds() + const plan = analyzeEachBody(body as unknown as (...a: never[]) => unknown, { + defByName: (n) => defByName.get(n), + idOf: (d) => ((d.id as number) >= 0 ? (d.id as number) : undefined), + isRequired: (d) => requiredIds.has(d.id as number), + }) + if (plan === null) return proxyRun + + // Probe up front, then SCRATCH pre-flight, both gating a proxy fallback: + // 1. compile the generated source — a transform bug that produced malformed code (or a runtime that + // blocks `new Function`) throws here, deterministically. + // 2. run the runner ONCE on 1-row throwaway typed arrays with the tracked path forced on. A body that + // is not self-contained — it closes over an outer variable (`const G = 9.8; … += G`) — throws a + // ReferenceError here, where it can be caught and demoted to the proxy, instead of crashing the + // first real frame (or, worse, half-integrating a row before throwing). The scratch arrays mean a + // mutating body can never touch real data during the probe. + let makeFactory: () => (args: CompileArgs) => (ctx: Ctx) => void + try { + makeFactory = () => + new Function('return (' + plan.factorySource + ')')() as (args: CompileArgs) => (ctx: Ctx) => void + const scratchViews = plan.specs.map((s) => { + const fd = (s.def.fields as readonly FieldDescriptor[]).find((f) => f.name === s.field) + const Ctor = fd?.ctor ?? Float64Array + return new Ctor(1) as TypedArray + }) + const probeArch = { rows: new Uint32Array(1), count: 1 } as unknown as Archetype + const probeArgs: CompileArgs = { + views: scratchViews, + arch: probeArch, + trackWrite: () => {}, + tracking: { active: true }, + handleIndex: () => 0, + meta: { count: 1 }, + } + makeFactory()(probeArgs)(new Proxy({}, { get: () => 1 }) as Ctx) + } catch { + return proxyRun + } + + interface CompiledBinding { + readonly arch: Archetype + readonly cols: readonly Column[] + views: readonly TypedArray[] + runner: (ctx: Ctx) => void + readonly meta: { readonly count: number } + } + + let seam: WorldSeam | null = null + const seamOf = (arch: Archetype): WorldSeam => { + if (seam !== null) return seam + for (const spec of plan.specs) { + const cs = arch.columnSets.get(spec.def.id as ComponentId) + const w = cs && (cs.accessor as { __binding?: { world?: WorldSeam } }).__binding?.world + if (w) return (seam = w) + } + // Read-only bodies never call the seam; a no-op keeps the runner total even if one is unreachable. + return (seam = NOOP_SEAM) + } + + const argsFor = (arch: Archetype, views: readonly TypedArray[], meta: { readonly count: number }): CompileArgs => { + const s = seamOf(arch) + return { views, arch, trackWrite: s.trackWrite.bind(s), tracking: s.tracking, handleIndex: s.handleIndex.bind(s), meta } + } + + const byArch = new Map() + let bindings: CompiledBinding[] = [] + let coldMatched: Archetype[] = [] + let boundMatchCount = -1 + + const colsOf = (arch: Archetype): Column[] => + plan.specs.map((spec) => { + const cs = arch.columnSets.get(spec.def.id as ComponentId) + const col = cs && cs.columns[spec.colIndex] + if (!col) throw new Error(`compile: missing column for '${spec.def.name}.${spec.field}'`) + return col + }) + + const makeBinding = (arch: Archetype): CompiledBinding => { + const cols = colsOf(arch) + const views = cols.map((c) => c.view) + const meta = { + get count(): number { + return arch.count + }, + } + return { arch, cols, views, runner: makeFactory()(argsFor(arch, views, meta)), meta } + } + + const reinvoke = (b: CompiledBinding): void => { + b.views = b.cols.map((c) => c.view) + b.runner = makeFactory()(argsFor(b.arch, b.views, b.meta)) + } + + const rebuild = (): void => { + const archs = this.matchingArchetypes + const nextBindings: CompiledBinding[] = [] + const nextCold: Archetype[] = [] + for (let ai = 0; ai < archs.length; ai++) { + const arch = archs[ai] as Archetype + if (arch.cold) { + nextCold.push(arch) + continue + } + let b = byArch.get(arch.id as number) + if (b === undefined) { + b = makeBinding(arch) + byArch.set(arch.id as number, b) + } + nextBindings.push(b) + } + bindings = nextBindings + coldMatched = nextCold + boundMatchCount = archs.length + } + + rebuild() + + // Cold archetypes have no contiguous columns: the compiled loop cannot visit them, so a query that + // fragments into cold storage runs those rows through the proxy (correctness over the fast path). + return (ctx: Ctx): void => { + if (this.matchingArchetypes.length !== boundMatchCount) { + rebuild() + } else { + for (let i = 0; i < coldMatched.length; i++) { + if (!(coldMatched[i] as Archetype).cold) { + rebuild() + break + } + } + } + const bs = bindings + for (let bi = 0; bi < bs.length; bi++) { + const b = bs[bi] as CompiledBinding + const cols = b.cols + const views = b.views + for (let i = 0; i < cols.length; i++) { + if ((cols[i] as Column).view !== views[i]) { + reinvoke(b) + break + } + } + if (b.arch.count !== 0) b.runner(ctx) + } + for (let i = 0; i < coldMatched.length; i++) { + const arch = coldMatched[i] as Archetype + if (arch.cold) this.#eachCold(arch, this.#hotBinding(), (e) => body(e, ctx)) + } + } + } + + /** Required component ids: single-bit with-words + non-negated residual terms (the bindColumns rule). */ + #requiredComponentIds(): Set { + const ids = new Set() + for (const w of this.compiled.withWords) ids.add(w.wordIndex * 32 + (31 - Math.clz32(w.mask))) + for (const r of this.compiled.residualWith) if (!r.negate) ids.add(r.componentId as number) + return ids + } + *[Symbol.iterator](): Iterator { // A simple eager collection of (archetype,row) snapshots would allocate; instead drive `each` // through a buffered generator that yields the SAME pooled element per archetype. Single active diff --git a/packages/core/test/compile-each.property.test.ts b/packages/core/test/compile-each.property.test.ts new file mode 100644 index 0000000..c3f61a2 --- /dev/null +++ b/packages/core/test/compile-each.property.test.ts @@ -0,0 +1,187 @@ +// compile() PROPERTY suite: the compiled `.each` body and the SAME body run through the proxy `.each` +// produce byte-identical final column state under random interleavings of spawn (into BOTH matched +// archetypes — exercising the archetype-set-change rebuild), despawn, value writes, integrate steps, and +// forced column growth (a burst crosses the 1024-row reservation, forcing the re-back that replaces +// col.view under a live compiled binding). Two worlds get the same op sequence; only the integration path +// differs, so any divergence is a compile() bug. A second property drives the SAME comparison with a +// `.changed(Position)` consumer attached to BOTH worlds and asserts the drained changed-sets match — +// proving the compiled tracked-write path records reactivity identically to the accessor. + +import { describe, expect, test } from 'vitest' +import fc from 'fast-check' +import { createWorld, defineComponent, read, write } from '@ecsia/core' +import type { ComponentDef, EntityHandle, Schema } from '@ecsia/core' +import type { PooledElement } from '../src/internal.js' + +const DT = 1 / 60 + +type Op = + | { kind: 'spawn'; arch: 0 | 1; n: number; dx: number; dy: number } + | { kind: 'despawn'; pick: number } + | { kind: 'write'; pick: number; dx: number; dy: number } + | { kind: 'step' } + | { kind: 'burst'; n: number } + +const opArb: fc.Arbitrary = fc.oneof( + { + arbitrary: fc.record({ + kind: fc.constant('spawn' as const), + arch: fc.constantFrom(0 as const, 1 as const), + n: fc.integer({ min: 1, max: 20 }), + dx: fc.integer({ min: -8, max: 8 }), + dy: fc.integer({ min: -8, max: 8 }), + }), + weight: 3, + }, + { arbitrary: fc.record({ kind: fc.constant('despawn' as const), pick: fc.nat() }), weight: 1 }, + { + arbitrary: fc.record({ + kind: fc.constant('write' as const), + pick: fc.nat(), + dx: fc.integer({ min: -8, max: 8 }), + dy: fc.integer({ min: -8, max: 8 }), + }), + weight: 1, + }, + { arbitrary: fc.record({ kind: fc.constant('step' as const) }), weight: 2 }, + { arbitrary: fc.record({ kind: fc.constant('burst' as const), n: fc.integer({ min: 1100, max: 1300 }) }), weight: 1 }, +) + +interface Rig { + world: ReturnType + handles: EntityHandle[] + step: () => void + Position: ComponentDef + Velocity: ComponentDef + Extra: ComponentDef +} + +type IntegEl = { position: { x: number; y: number }; velocity: { dx: number; dy: number } } + +const bodyEach = (e: PooledElement, ctx: { dt: number }): void => { + const el = e as unknown as { position: { x: number; y: number }; velocity: { dx: number; dy: number } } + el.position.x += el.velocity.dx * ctx.dt + el.position.y += el.velocity.dy * ctx.dt +} + +function makeRig(compiled: boolean): Rig { + const Position = defineComponent({ x: 'f32', y: 'f32' }, { name: 'position' }) + const Velocity = defineComponent({ dx: 'f32', dy: 'f32' }, { name: 'velocity' }) + const Extra = defineComponent({ v: 'i32' }, { name: 'extra' }) + const world = createWorld({ + components: [Position, Velocity, Extra] as readonly ComponentDef[], + maxEntities: 1 << 16, + }) + const q = world.query(write(Position), read(Velocity)) as unknown as { + compile(b: (e: IntegEl, ctx: Ctx) => void): (ctx: Ctx) => void + each(fn: (e: PooledElement) => void): void + } + // Seed the burst archetype before binding so a re-back hits a LIVE compiled binding. + const seed = world.spawnWith(Position, Velocity) + const compiledRun = q.compile<{ dt: number }>((e, ctx) => { + e.position.x += e.velocity.dx * ctx.dt + e.position.y += e.velocity.dy * ctx.dt + }) + const step = compiled ? () => compiledRun({ dt: DT }) : () => q.each((e) => bodyEach(e, { dt: DT })) + return { world, handles: [seed], step, Position, Velocity, Extra } +} + +function apply(rig: Rig, op: Op): void { + const { world, handles } = rig + switch (op.kind) { + case 'spawn': + case 'burst': { + for (let i = 0; i < op.n; i++) { + const h = + op.kind === 'spawn' && op.arch === 1 + ? world.spawnWith(rig.Position, rig.Velocity, rig.Extra) + : world.spawnWith(rig.Position, rig.Velocity) + if (op.kind === 'spawn') { + const v = world.entity(h).write(rig.Velocity) as { dx: number; dy: number } + v.dx = op.dx + v.dy = op.dy + } + handles.push(h) + } + break + } + case 'despawn': { + if (handles.length === 0) return + const i = op.pick % handles.length + world.despawn(handles[i] as EntityHandle) + handles.splice(i, 1) + break + } + case 'write': { + if (handles.length === 0) return + const h = handles[op.pick % handles.length] as EntityHandle + const v = world.entity(h).write(rig.Velocity) as { dx: number; dy: number } + v.dx = op.dx + v.dy = op.dy + break + } + case 'step': + rig.step() + break + } +} + +describe('PROP compile() integrator == .each integrator', { timeout: 60_000 }, () => { + test('random spawn/despawn/write/step/growth interleavings end byte-identical', () => { + fc.assert( + fc.property(fc.array(opArb, { minLength: 1, maxLength: 25 }), (ops) => { + const cmp = makeRig(true) + const oracle = makeRig(false) + for (const op of ops) { + apply(cmp, op) + apply(oracle, op) + } + cmp.step() + oracle.step() + + expect(cmp.handles.length).toBe(oracle.handles.length) + for (let i = 0; i < cmp.handles.length; i++) { + const a = cmp.world.entity(cmp.handles[i] as EntityHandle).read(cmp.Position) as { x: number; y: number } + const b = oracle.world.entity(oracle.handles[i] as EntityHandle).read(oracle.Position) as { + x: number + y: number + } + expect(a.x).toBe(b.x) + expect(a.y).toBe(b.y) + } + }), + { numRuns: 40 }, + ) + }) + + test('with a .changed(Position) consumer, the drained changed-sets match the proxy path', () => { + fc.assert( + fc.property(fc.array(opArb, { minLength: 1, maxLength: 20 }), (ops) => { + const cmp = makeRig(true) + const oracle = makeRig(false) + const changedOf = (rig: Rig) => + rig.world.query(read(rig.Position)).changed(rig.Position) as unknown as { + eachChanged(fn: (e: PooledElement) => void): void + } + const cChanged = changedOf(cmp) + const oChanged = changedOf(oracle) + for (const op of ops) { + apply(cmp, op) + apply(oracle, op) + } + cmp.world.frameReset() + oracle.world.frameReset() + cmp.step() + oracle.step() + + const drain = (c: { eachChanged(fn: (e: PooledElement) => void): void }): number => { + let n = 0 + c.eachChanged(() => n++) + return n + } + expect(drain(cChanged)).toBe(drain(oChanged)) + }), + { numRuns: 30 }, + ) + }) +}) diff --git a/packages/core/test/compile-each.test.ts b/packages/core/test/compile-each.test.ts new file mode 100644 index 0000000..2855bc6 --- /dev/null +++ b/packages/core/test/compile-each.test.ts @@ -0,0 +1,313 @@ +import { describe, expect, test } from 'vitest' +import { createWorld, defineComponent, read, write, vec2 } from '@ecsia/core' +import type { ComponentDef, Schema } from '@ecsia/core' +import { analyzeEachBody } from '../src/internal.js' + +const DT = 1 / 60 + +// Test element: the pooled element is dynamically shaped; these tests only ever touch position/velocity. +type El = { position: { x: number; y: number }; velocity: { dx: number; dy: number }; handle: unknown } +interface Compilable { + compile(body: (e: El, ctx: Ctx) => void): (ctx: Ctx) => void + each(fn: (e: El) => void): void +} + +// Fresh component defs per world — a ComponentDef registers to exactly one world. +function makeWorld() { + const Position = defineComponent({ x: 'f32', y: 'f32' }, { name: 'position' }) + const Velocity = defineComponent({ dx: 'f32', dy: 'f32' }, { name: 'velocity' }) + const world = createWorld({ components: [Position, Velocity], maxEntities: 1 << 16 }) + const spawn = (x: number, y: number, dx: number, dy: number) => + world.spawnWith([Position, { x, y }], [Velocity, { dx, dy }]) + return { world, Position, Velocity, spawn } +} + +describe('compile() — correctness vs .each', () => { + test('integrate body matches .each byte-for-byte', () => { + const a = makeWorld() + const b = makeWorld() + for (let i = 0; i < 2000; i++) { + const dx = (i % 7) - 3 + const dy = (i % 5) - 2 + a.spawn(i, -i, dx, dy) + b.spawn(i, -i, dx, dy) + } + const qa = a.world.query(write(a.Position), read(a.Velocity)) as unknown as Compilable + const qb = b.world.query(write(b.Position), read(b.Velocity)) as unknown as Compilable + + const run = qa.compile<{ dt: number }>((e, ctx) => { + e.position.x += e.velocity.dx * ctx.dt + e.position.y += e.velocity.dy * ctx.dt + }) + for (let f = 0; f < 10; f++) { + run({ dt: DT }) + qb.each((e) => { + e.position.x += e.velocity.dx * DT + e.position.y += e.velocity.dy * DT + }) + } + + const dump = (q: Compilable): number[] => { + const out: number[] = [] + q.each((e) => out.push(e.position.x as number, e.position.y as number)) + return out + } + const da = dump(qa) + expect(da.length).toBe(4000) + expect(da).toEqual(dump(qb)) + }) + + test('stays correct across column growth (crosses the 1024-row reservation)', () => { + const { world, Position, Velocity, spawn } = makeWorld() + const q = world.query(write(Position), read(Velocity)) as unknown as Compilable + const run = q.compile<{ dt: number }>((e, ctx) => { + e.position.x += e.velocity.dx * ctx.dt + }) + for (let i = 0; i < 1500; i++) spawn(0, 0, 2, 0) + void Velocity + for (let f = 0; f < 5; f++) run({ dt: 1 }) + let bad = 0 + let seen = 0 + q.each((e) => { + seen++ + if ((e.position.x as number) !== 10) bad++ + }) + expect(seen).toBe(1500) + expect(bad).toBe(0) + }) + + test('compound and assign forms both compile correctly', () => { + const { world, Position, Velocity, spawn } = makeWorld() + const q = world.query(write(Position), read(Velocity)) as unknown as Compilable + spawn(1, 1, 3, 4) + void Velocity + const run = q.compile((e) => { + e.position.x = e.velocity.dx + e.position.y *= 2 + }) + run() + q.each((e) => { + expect(e.position.x).toBe(3) + expect(e.position.y).toBe(2) + }) + }) + + test('local const inside a straight-line body still compiles', () => { + const { world, Position, Velocity, spawn } = makeWorld() + const q = world.query(write(Position), read(Velocity)) as unknown as Compilable + spawn(0, 0, 4, 5) + void Velocity + const run = q.compile<{ dt: number }>((e, ctx) => { + const sx = e.velocity.dx * ctx.dt + e.position.x += sx + }) + run({ dt: 2 }) + q.each((e) => expect(e.position.x).toBe(8)) + }) + + test('multiple archetypes (some entities lack a 3rd component) all integrate', () => { + const Position = defineComponent({ x: 'f32', y: 'f32' }, { name: 'position' }) + const Velocity = defineComponent({ dx: 'f32', dy: 'f32' }, { name: 'velocity' }) + const Tag = defineComponent({ t: 'u8' }, { name: 'tag' }) + const world = createWorld({ components: [Position, Velocity, Tag], maxEntities: 1 << 14 }) + for (let i = 0; i < 300; i++) world.spawnWith([Position, { x: 0, y: 0 }], [Velocity, { dx: 1, dy: 0 }]) + for (let i = 0; i < 300; i++) + world.spawnWith([Position, { x: 0, y: 0 }], [Velocity, { dx: 1, dy: 0 }], [Tag, { t: 1 }]) + const q = world.query(write(Position), read(Velocity)) as unknown as Compilable + const run = q.compile((e) => { + e.position.x += e.velocity.dx + }) + run() + run() + let seen = 0 + q.each((e) => { + seen++ + expect(e.position.x).toBe(2) + }) + expect(seen).toBe(600) + }) +}) + +describe('compile() — reactivity preserved', () => { + test('.changed() sees compiled writes (and matches the proxy path)', () => { + const count = (useCompiled: boolean): number => { + const { world, Position, Velocity, spawn } = makeWorld() + spawn(0, 0, 1, 1) + spawn(0, 0, 2, 2) + const writer = world.query(write(Position), read(Velocity)) + const changed = writer.changed(Position) as unknown as { + eachChanged(fn: (e: El) => void): void + } + const w = writer as unknown as Compilable + world.frameReset() // separate the spawn-time writes from the run's writes + + if (useCompiled) { + w.compile<{ dt: number }>((e, ctx) => { + e.position.x += e.velocity.dx * ctx.dt + })({ dt: 1 }) + } else { + w.each((e) => { + e.position.x += e.velocity.dx * 1 + }) + } + let n = 0 + changed.eachChanged(() => n++) + return n + } + expect(count(false)).toBe(2) // proxy reference + expect(count(true)).toBe(2) // compiled path + }) + + test('no .changed consumer ⇒ writes still land (gate is transparent)', () => { + const { world, Position, Velocity, spawn } = makeWorld() + spawn(5, 0, 10, 0) + void Velocity + const q = world.query(write(Position), read(Velocity)) as unknown as Compilable + const run = q.compile((e) => { + e.position.x += e.velocity.dx + }) + run() + q.each((e) => expect(e.position.x).toBe(15)) + }) +}) + +describe('compile() — fallback to proxy stays correct', () => { + test('control-flow body falls back yet matches an explicit-if proxy', () => { + const a = makeWorld() + const b = makeWorld() + for (let i = 0; i < 500; i++) { + const dx = (i % 9) - 4 + a.spawn(i, 0, dx, 0) + b.spawn(i, 0, dx, 0) + } + const qa = a.world.query(write(a.Position), read(a.Velocity)) as unknown as Compilable + const qb = b.world.query(write(b.Position), read(b.Velocity)) as unknown as Compilable + const run = qa.compile<{ dt: number }>((e, ctx) => { + if ((e.velocity.dx as number) > 0) e.position.x += e.velocity.dx * ctx.dt + }) + for (let f = 0; f < 4; f++) { + run({ dt: DT }) + qb.each((e) => { + if ((e.velocity.dx as number) > 0) e.position.x += e.velocity.dx * DT + }) + } + const dumpA: number[] = [] + const dumpB: number[] = [] + qa.each((e) => dumpA.push(e.position.x as number)) + qb.each((e) => dumpB.push(e.position.x as number)) + expect(dumpA.length).toBe(500) + expect(dumpA).toEqual(dumpB) + }) + + test('body closing over an outer variable falls back (not self-contained) without crashing', () => { + const { world, Position, Velocity, spawn } = makeWorld() + spawn(0, 0, 3, 0) + void Velocity + const GRAVITY = 7 // captured from outer scope — the codegen copy cannot see it + const q = world.query(write(Position), read(Velocity)) as unknown as Compilable + // Must NOT throw at compile or at run — the scratch pre-flight demotes it to the proxy, which closes + // over GRAVITY correctly. + const run = q.compile((e) => { + e.position.x += e.velocity.dx + GRAVITY + }) + run() + q.each((e) => expect(e.position.x).toBe(10)) + }) + + test('vec field → proxy (non-scalar), correct result', () => { + const Pos = defineComponent({ p: vec2() }, { name: 'vpos' }) + const Vel = defineComponent({ v: vec2() }, { name: 'vvel' }) + const world = createWorld({ components: [Pos, Vel], maxEntities: 1 << 12 }) + world.spawnWith([Pos, { p: [1, 2] as never }], [Vel, { v: [3, 4] as never }]) + const q = world.query(write(Pos), read(Vel)) as unknown as { + compile(body: (e: { vpos: { p: number[] }; vvel: { v: number[] } }) => void): () => void + each(fn: (e: { vpos: { p: number[] } }) => void): void + } + const run = q.compile((e) => { + const p = e.vpos.p + const v = e.vvel.v + p[0] = p[0]! + v[0]! + p[1] = p[1]! + v[1]! + }) + run() + q.each((e) => { + const p = e.vpos.p + expect([p[0], p[1]]).toEqual([4, 6]) + }) + }) +}) + +describe('analyzeEachBody — transform unit', () => { + // Unregistered defs: the analyzer reads only schema metadata, never a world. + const Position = defineComponent({ x: 'f32', y: 'f32' }, { name: 'position' }) + const Velocity = defineComponent({ dx: 'f32', dy: 'f32' }, { name: 'velocity' }) + const deps = { + defByName: (n: string): ComponentDef | undefined => + n === 'position' ? Position : n === 'velocity' ? Velocity : undefined, + idOf: (d: ComponentDef) => (d === Position ? 0 : d === Velocity ? 1 : undefined), + isRequired: () => true, + } + + test('rewrites e.comp.field to column indexing and finds the write set', () => { + const plan = analyzeEachBody( + ((e: El, ctx: { dt: number }) => { + e.position.x += e.velocity.dx * ctx.dt + }) as never, + deps as never, + ) + expect(plan).not.toBeNull() + expect(plan!.specs.map((s) => `${s.def.name}.${s.field}`)).toEqual(['position.x', 'velocity.dx']) + expect(plan!.writtenIds).toEqual([0]) + expect(plan!.factorySource).toContain('__v0[__i]') + expect(plan!.factorySource).toContain('const __c_dt = __ctx.dt') + }) + + test('bails on control flow, strings, nested fn, bare e', () => { + const bail = (f: unknown) => expect(analyzeEachBody(f as never, deps as never)).toBeNull() + bail((e: El) => { + if (true) e.position.x = 1 + }) + bail((e: El) => { + const s = 'e.position.x' + e.position.x = s.length + }) + bail((e: El) => [1].forEach(() => (e.position.x = 1))) + bail((e: El) => { + e.position.x = e.handle as unknown as number + }) + }) + + test('bails when a body local could collide with a generated name (silent-shadow guard)', () => { + const bail = (f: unknown) => expect(analyzeEachBody(f as never, deps as never)).toBeNull() + // A local named like a generated column/seam ident must NOT silently shadow it — any `__` bails. + bail((e: El, ctx: { scale: number }) => { + const __v0 = ctx.scale + e.position.x += e.velocity.dx * __v0 + }) + bail((e: El) => { + const __trackWrite = e.velocity.dx + e.position.x += __trackWrite + }) + }) + + test('bails on destructuring-assignment and regex literals (write-miss / rewrite hazards)', () => { + const bail = (f: unknown) => expect(analyzeEachBody(f as never, deps as never)).toBeNull() + bail((e: El) => { + ;[e.position.x] = [5] as [number] + }) + bail((e: El) => { + e.position.x = /e.position.x/.test('') ? 1 : 0 + }) + }) + + test('division survives (not mistaken for a regex literal)', () => { + const plan = analyzeEachBody( + ((e: El, ctx: { mass: number }) => { + e.position.x += e.velocity.dx / ctx.mass + }) as never, + deps as never, + ) + expect(plan).not.toBeNull() + expect(plan!.factorySource).toContain('/') + }) +}) diff --git a/packages/schema/src/index.ts b/packages/schema/src/index.ts index 554e69f..8d21bae 100644 --- a/packages/schema/src/index.ts +++ b/packages/schema/src/index.ts @@ -582,6 +582,17 @@ export interface Query { factory: (views: ColumnViews, meta: BoundColumnsMeta) => (ctx: Ctx) => void, ] ): (ctx: Ctx) => void + /** + * Compile an ergonomic `.each` body into the codegen'd column loop `bindColumns` runs — without naming + * columns or restating the math. It reads the callback's source, rewrites `e..` to direct + * typed-array indexing, and lands near `eachChunk` (~1.5 ns/entity) instead of the per-row proxy + * (~10 ns/entity). Unlike `bindColumns`, it PRESERVES reactivity: a written component feeds `.changed()` + * and observers exactly as the accessor would (free when no consumer is registered). Pure speedup — the + * analyzer is conservative and falls back to the unchanged proxy `.each` (identical result) for any body + * it cannot prove safe (non-straight-line, non-numeric-scalar field, row-filtered query, blocked + * `new Function`, etc.). Call ONCE and reuse the returned runner per frame. + */ + compile(body: (e: QueryElement & { handle: EntityHandle }, ctx: Ctx) => void): (ctx: Ctx) => void /** * Derive a narrower query: the cached query for [...this query's terms, ...terms] — pure sugar * over `world.query` with the merged term list, riding the same canonical-hash dedup (deriving @@ -656,6 +667,8 @@ export interface LooseQuery { factory: (views: ColumnViews, meta: BoundColumnsMeta) => (ctx: Ctx) => void, ] ): (ctx: Ctx) => void + /** Compile an `.each` body into the fast column loop: see {@link Query.compile}. */ + compile(body: (e: EL & { handle: EntityHandle }, ctx: Ctx) => void): (ctx: Ctx) => void /** See {@link Query.derive}. Arity is already past the cap, so the result stays loose. */ derive(...terms: QueryTerm[]): LooseQuery /** Flavor declarations (chainable). */ diff --git a/scripts/bench-report.mjs b/scripts/bench-report.mjs index 02793fb..5b15fd8 100644 --- a/scripts/bench-report.mjs +++ b/scripts/bench-report.mjs @@ -66,31 +66,44 @@ function buildBenchBuilders() { } // --- step 1: iterate comparison --------------------------------------------- -async function runIterate(makeEcsiaIter, makeEcsiaCursorIter, makeEcsiaPinnedIter, makeMiniplexIter, makeBitEcsIter) { +async function runIterate( + makeEcsiaIter, + makeEcsiaCursorIter, + makeEcsiaPinnedIter, + makeEcsiaCompiledIter, + makeMiniplexIter, + makeBitEcsIter, +) { const n = CONFIG.iterEntities const ecsia = makeEcsiaIter(n) const cursor = makeEcsiaCursorIter(n) const pinned = makeEcsiaPinnedIter(n) + const compiled = makeEcsiaCompiledIter(n) const mini = makeMiniplexIter(n) const bit = makeBitEcsIter(n) - // Honesty: the cursor and pinned rows must integrate the SAME data as the accessor row at this N - // (crosses the 1024 column-growth boundary). Cross-validate one step before timing — a + // Honesty: the cursor, pinned, and compiled rows must integrate the SAME data as the accessor row at + // this N (crosses the 1024 column-growth boundary). Cross-validate one step before timing — a // fast-but-wrong loop fails here instead of silently reporting a misleading number. ecsia.step() cursor.step() pinned.step() + compiled.step() if (Math.abs(ecsia.sampleX() - cursor.sampleX()) > 1e-9) { throw new Error(`bench honesty gate: ecsia-cursor disagrees with ecsia accessor at n=${n}`) } if (Math.abs(ecsia.sampleX() - pinned.sampleX()) > 1e-9) { throw new Error(`bench honesty gate: ecsia-pinned disagrees with ecsia accessor at n=${n}`) } + if (Math.abs(ecsia.sampleX() - compiled.sampleX()) > 1e-9) { + throw new Error(`bench honesty gate: ecsia-compiled disagrees with ecsia accessor at n=${n}`) + } const results = [] for (let rep = 0; rep < CONFIG.iterReps; rep++) { const bench = new Bench({ time: CONFIG.iterTimeMs }) bench.add('ecsia .each', () => ecsia.step()) + bench.add('ecsia compile', () => compiled.step()) bench.add('ecsia eachChunk', () => cursor.step()) bench.add('ecsia bindColumns', () => pinned.step()) bench.add('miniplex', () => mini.step()) @@ -109,7 +122,7 @@ async function runIterate(makeEcsiaIter, makeEcsiaCursorIter, makeEcsiaPinnedIte const prev = best.get(r.name) if (!prev || r.hz > prev.hz) best.set(r.name, r) } - const order = ['ecsia .each', 'ecsia eachChunk', 'ecsia bindColumns', 'miniplex', 'bitECS'] + const order = ['ecsia .each', 'ecsia compile', 'ecsia eachChunk', 'ecsia bindColumns', 'miniplex', 'bitECS'] return order.map((name) => { const r = best.get(name) return { name, hz: r.hz, meanMs: r.meanMs, nsPerEntity: r.meanMs > 0 ? (r.meanMs * 1e6) / n : 0 } @@ -273,7 +286,14 @@ async function main() { process.env['ECSIA_KERNEL_MODULE'] = resolve(ROOT, 'packages/scheduler/test/fixtures/heavy-bench-kernels.mjs') const pool = await import(resolve(ROOT, 'bench/.report-dist/worker-pool/heavy-pool.js')) - const iterate = await runIterate(iter.makeEcsiaIter, iter.makeEcsiaCursorIter, iter.makeEcsiaPinnedIter, iter.makeMiniplexIter, iter.makeBitEcsIter) + const iterate = await runIterate( + iter.makeEcsiaIter, + iter.makeEcsiaCursorIter, + iter.makeEcsiaPinnedIter, + iter.makeEcsiaCompiledIter, + iter.makeMiniplexIter, + iter.makeBitEcsIter, + ) const trackedWrite = await runTrackedWrite() const poolReport = await runPool(pool.main) diff --git a/website/guide/performance.md b/website/guide/performance.md index 3fef9d8..8419de7 100644 --- a/website/guide/performance.md +++ b/website/guide/performance.md @@ -21,7 +21,9 @@ adds each entity's `Velocity` to its `Position`, one frame per timed op. The inn allocation-free — it creates no objects while running — so the measurement is storage and iteration cost, not garbage collection. -- **ecsia `.each`** — the ergonomic accessor path: per-row proxy objects, write-log aware. +- **ecsia `.each`** — the ergonomic accessor path: per-row proxy objects, write-log aware. Its readable + `e.position.x += …` body can be compiled to the fast column loop with + [`compile`](#compile-the-ergonomic-path-compile) — same syntax, near-`eachChunk` speed. - **ecsia `eachChunk`** — the opt-in column cursor. ecsia stores each component field in its own contiguous array (a column), a layout called Structure-of-Arrays (SoA). `eachChunk` hands you those raw typed-array columns plus a row span, and the loop indexes `Float32Array` directly — bypassing @@ -159,6 +161,51 @@ The trade-offs are the same as `eachChunk`: writes through the bound arrays bypa `.changed()` filters and observers will not see them, and structural changes during `run()` follow the same collect-first, mutate-after rule as every other loop. +## Compile the ergonomic path: `compile` + +`bindColumns` is fast but makes you name every column and rewrite your loop against raw arrays. +`compile` gets the same speed from the **readable `.each` body you would write anyway**: hand it a +callback, and it reads that callback's source, rewrites each `e..` to direct column +indexing, and codegens the same specialized per-archetype loop. The result lands near `eachChunk` — +roughly **6× faster than the proxy `.each`** — while you keep writing `e.position.x += …`. + +```ts +import { createWorld, defineComponent, write, read } from 'ecsia' + +const Position = defineComponent({ x: 'f32', y: 'f32' }, { name: 'position' }) +const Velocity = defineComponent({ dx: 'f32', dy: 'f32' }, { name: 'velocity' }) +const world = createWorld({ components: [Position, Velocity], maxEntities: 1 << 16 }) + +const q = world.query(write(Position), read(Velocity)) + +const run = q.compile<{ dt: number }>((e, ctx) => { + e.position.x += e.velocity.dx * ctx.dt + e.position.y += e.velocity.dy * ctx.dt +}) + +run({ dt: 1 / 60 }) // call once per frame +``` + +Two things set it apart from `bindColumns`: + +- **It preserves reactivity.** Unlike `bindColumns` and `eachChunk`, a component your body writes is + recorded in the write log exactly as the accessor setter would record it, so `.changed()` filters and + observers still fire. That bookkeeping is free when no consumer is registered (the same gate the + accessor uses) and costs the write-log push only when one is — so `compile` is the fast path you can + reach for *even when you depend on change tracking*. +- **It is a pure speedup that can never change your result.** The analyzer is deliberately + conservative: it compiles only straight-line numeric-scalar bodies, and for anything it cannot prove + safe it transparently runs your unchanged callback through the normal `.each`. So a body with control + flow (`if`/`?`/`&&`/`return`/a loop/a nested function), a string or comment, a non-numeric-scalar + field (`vec`/`bool`/`eid`/`string`/object), a component the query does not *require*, any use of `e` + other than `e..`, a per-row write to `ctx`, a row-filtered query, or a runtime that + blocks `new Function` (a strict Content-Security-Policy) all keep working — they just run the proxy + loop. A property test asserts the compiled loop is byte-identical to `.each` under random spawn / + despawn / write / growth churn, with and without a change consumer. + +Call `compile` once and reuse the returned `run` every frame, the same as `bindColumns`. Structural +changes during `run()` follow the same collect-first, mutate-after rule as every other loop. + ## Reproduce ```bash @@ -190,9 +237,9 @@ numbers still come from `bench:report` on a fixed machine. ## Bundle size -The kernel — typed data, systems, queries, and the scheduler — is about **40 KB min+gzip**; just a -world and components (`@ecsia/core` alone) is ~30 KB, and importing *everything* from the umbrella is -~55 KB. ecsia is batteries-included, so it is larger than a minimal core like bitECS (~5 KB); the +The kernel — typed data, systems, queries, and the scheduler — is about **42 KB min+gzip**; just a +world and components (`@ecsia/core` alone) is ~32 KB, and importing *everything* from the umbrella is +~57 KB. ecsia is batteries-included, so it is larger than a minimal core like bitECS (~5 KB); the packages are `sideEffects: false`, so a bundler ships only the subsystems you import — relations, serialization, and topics drop unless used. A CI budget (`pnpm size`) holds these numbers in place so a change can't quietly inflate the install.