diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca89813..50fec2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,6 +40,29 @@ jobs: - name: Test (unit + property + worker + type-level, coverage-gated) run: pnpm test -- --coverage + # --------------------------------------------------------------------------- + # Bench regression lane: a dedicated job (noise-isolated from unit CI) asserting each ecsia + # iteration path stays under its committed ns/entity RATIO vs a same-run bitECS control. The ratio + # cancels shared-runner drift, so a failure is a real regression, not scheduling noise. Single Node + # version (timing, not correctness — that's build-test's job). + # --------------------------------------------------------------------------- + bench-regression: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6.0.8 + - uses: actions/setup-node@v6 + with: + node-version: 24 + cache: pnpm + - run: pnpm install --frozen-lockfile + - name: Build + run: pnpm build + - name: Bench regression (ratios vs bitECS control) + env: + BENCH_REGRESSION: '1' + run: pnpm vitest run --project bench bench/test/regression.bench.test.ts + # --------------------------------------------------------------------------- # Runtime lanes (P3): prove the SHIPPED dist actually runs on each claimed runtime. # Each lane builds first (the smoke imports packages/ecsia/dist), then runs the SAME diff --git a/bench/regression-baseline.json b/bench/regression-baseline.json new file mode 100644 index 0000000..22bce4e --- /dev/null +++ b/bench/regression-baseline.json @@ -0,0 +1,8 @@ +{ + "_comment": "CI bench regression ceilings — MAX allowed ns/entity RATIO of each ecsia path vs a SAME-RUN bitECS control (so machine drift cancels). A real regression (e.g. codegen breaking → bindColumns deopts from ~0.72x to ~1.5x) trips the ceiling; ~10% run-to-run noise does not. RATCHET: when a path durably improves, lower its ceiling here. Measured 2026-06-08: bindColumns ~0.72x, eachChunk ~1.08x, each ~7.4x.", + "ratiosVsBitecs": { + "bindColumns": 0.9, + "eachChunk": 1.3, + "each": 9.0 + } +} diff --git a/bench/test/regression.bench.test.ts b/bench/test/regression.bench.test.ts new file mode 100644 index 0000000..3410fb8 --- /dev/null +++ b/bench/test/regression.bench.test.ts @@ -0,0 +1,63 @@ +// CI bench REGRESSION lane. Times each ecsia iteration path against a SAME-RUN bitECS control and +// asserts the ns/entity RATIO stays under a committed ceiling (bench/regression-baseline.json). The +// ratio cancels machine drift — a noisy shared runner moves both ecsia and bitECS together — so a +// failure means a genuine regression (e.g. codegen breaking and bindColumns deopting to ~1.5x), not +// scheduling noise. Gated behind BENCH_REGRESSION=1 so it runs ONLY in its dedicated CI job, never +// in the default `pnpm test` (where measurement noise would flake unit CI). Ratchet ceilings down in +// the baseline file when a path durably improves. + +import { describe, expect, test } from 'vitest' +import { readFileSync } from 'node:fs' +import { fileURLToPath } from 'node:url' +import { makeEcsiaIter, makeEcsiaCursorIter, makeEcsiaPinnedIter, makeBitEcsIter } from '../iterate.js' +import type { IterCase } from '../iterate.js' + +const ENABLED = process.env['BENCH_REGRESSION'] === '1' +const N = 50_000 +const WARMUP = 300 +const TIMED = 1500 +const REPS = 3 // best-of-N rounds (each round rebuilds + re-warms) to shake off a single bad schedule + +interface CtxIter extends IterCase { + step(): void +} + +/** p50 ns/entity over TIMED samples, taking the best (min) p50 across REPS rebuilds. */ +function nsPerEntity(make: (n: number) => CtxIter): number { + let best = Infinity + for (let rep = 0; rep < REPS; rep++) { + const c = make(N) + for (let i = 0; i < WARMUP; i++) c.step() + const s: number[] = [] + for (let r = 0; r < TIMED; r++) { + const t0 = performance.now() + c.step() + s.push(performance.now() - t0) + } + s.sort((a, b) => a - b) + const p50 = (s[s.length >> 1] as number) * 1e6 / N + if (p50 < best) best = p50 + } + return best +} + +const baseline = JSON.parse( + readFileSync(fileURLToPath(new URL('../regression-baseline.json', import.meta.url)), 'utf8'), +) as { ratiosVsBitecs: Record } + +describe.skipIf(!ENABLED)('bench regression — ecsia/bitECS ns/entity ratios under ceiling', { timeout: 120_000 }, () => { + // ONE bitECS control measured in the same process/run as the ecsia paths below. + const bit = nsPerEntity(makeBitEcsIter) + + test.each([ + ['bindColumns', makeEcsiaPinnedIter as (n: number) => CtxIter], + ['eachChunk', makeEcsiaCursorIter as (n: number) => CtxIter], + ['each', makeEcsiaIter as (n: number) => CtxIter], + ])('%s ratio vs bitECS stays under its ceiling', (name, make) => { + const ns = nsPerEntity(make) + const ratio = ns / bit + const ceiling = baseline.ratiosVsBitecs[name] as number + // Report the actual ratio in the assertion message so a CI failure shows the regression size. + expect(ratio, `${name}: ${ns.toFixed(2)} ns/e = ${ratio.toFixed(3)}x bitECS (${bit.toFixed(2)} ns/e); ceiling ${ceiling}x`).toBeLessThanOrEqual(ceiling) + }) +})