diff --git a/README.md b/README.md
index 534a1ee..5b93d17 100644
--- a/README.md
+++ b/README.md
@@ -172,9 +172,10 @@ Lower is faster (nanoseconds per entity):
 | miniplex | 12.15 |
 
 `.each` is the ergonomic accessor path from the example above; `eachChunk` loops over
-the raw storage arrays directly; `bindColumns` goes one step further and binds your
-loop to those arrays once, up front — which is what lets it edge ahead of bitECS, so
-long as you size the world before binding.
+the raw storage arrays directly; `bindColumns` goes one step further and compiles a
+specialized loop per archetype — which is what lets it beat bitECS, and it holds that
+edge as the world grows (no pre-sizing required; it falls back to a plain loop where a
+strict CSP or sandbox forbids dynamic compilation).
 
 Worker-thread speedup on a compute-heavy simulation (8,192 entities, 512 physics
 steps per frame, 60 frames), with every threaded run byte-identical to the
diff --git a/bench/iterate.ts b/bench/iterate.ts
index 8046b97..2ceae4d 100644
--- a/bench/iterate.ts
+++ b/bench/iterate.ts
@@ -109,23 +109,28 @@ export function makeEcsiaPinnedIter(n: number): IterCase {
     v.dy = 0.5
   }
   const q = world.query(write(Position), write(Velocity))
+  // The factory is SELF-CONTAINED (closes over nothing): per-frame dt arrives via the runner's ctx,
+  // hoisted to a local const before the loop. This is the shape the codegen path requires — each
+  // archetype's runner is recompiled into a specialized singleton, no post-growth penalty.
   const run = q.bindColumns(
     [Position, 'x'],
     [Position, 'y'],
     [Velocity, 'dx'],
     [Velocity, 'dy'],
-    ([px, py, dx, dy], meta) => () => {
+    ([px, py, dx, dy], meta) => (ctx: { dt: number }) => {
+      const dt = ctx.dt
       const count = meta.count
       for (let i = 0; i < count; i++) {
-        px[i] = px[i]! + dx[i]! * DT
-        py[i] = py[i]! + dy[i]! * DT
+        px[i] = px[i]! + dx[i]! * dt
+        py[i] = py[i]! + dy[i]! * dt
       }
     },
   )
+  const ctx = { dt: DT } // hoisted so step() allocates nothing
   return {
     name: 'ecsia-pinned',
     step() {
-      run()
+      run(ctx)
     },
     sampleX() {
       return (world.entity(first).read(Position) as { x: number }).x
diff --git a/packages/core/src/query/codegen.ts b/packages/core/src/query/codegen.ts
new file mode 100644
index 0000000..c363756
--- /dev/null
+++ b/packages/core/src/query/codegen.ts
@@ -0,0 +1,125 @@
+// Pinned-loop codegen: the mechanism that lets `bindColumns` BEAT bitECS on the default iteration
+// path, robustly. The win is a V8 specialization detail: TurboFan embeds typed arrays captured as
+// closure CONSTANTS directly into optimized code (base pointer + length as immediates), but ONLY for
+// a SINGLETON closure — the single closure produced by its enclosing function. The interpreted path
+// invokes ONE user factory per archetype; the moment it produces a second runner (a 2nd matched
+// archetype, or a re-invoke after column growth) V8 sees the factory making multiple closures and
+// disables specialization for ALL of them — ~1.5 ns/entity, which LOSES to bitECS (~1.4). (Measured.)
+//
+// The fix: recompile the user's factory into a DISTINCT function object per archetype (per growth),
+// via `new Function('return (' + factory.toString() + ')')()`. Each archetype's runner is then the
+// singleton of its own freshly-minted factory → specialized → ~1.0 ns/entity, ~0.7× bitECS, with NO
+// post-growth penalty. The recompile cost is paid only at bind / growth (rare), never per frame.
+//
+// SAFETY (this never produces a wrong result):
+//   - eval availability is probed once; under CSP (`script-src` without unsafe-eval) or a sandbox
+//     that blocks `new Function`, codegen is skipped and the interpreted factory call is used.
+//   - the user's factory MUST be self-contained — it may close over NOTHING from its outer scope
+//     (the recompiled copy only sees globals), so per-frame inputs come through the runner's `ctx`
+//     argument and fixed constants are defined inside the factory body. A factory that violates this
+//     fails the pre-flight below and silently falls back to interpreted.
+//   - PRE-FLIGHT VALIDATION: before a codegen runner is ever trusted, it is run once on a tiny scratch
+//     clone of the columns alongside the interpreted runner; only if their outputs match byte-for-byte
+//     is codegen used. A miscompile, a ReferenceError from an illegal closure, or any divergence →
+//     fall back to interpreted. Codegen is therefore a pure speed optimization gated on proven equality.
+//
+// SECURITY: the generated source is `'return (' + factory.toString() + ')'` — the user's OWN function
+// source, never interpolated external/untrusted strings. No injection surface beyond the code the
+// caller already wrote and passed.
+
+import type { TypedArray } from '../memory/index.js'
+import type { BoundColumnsMeta } from '@ecsia/schema'
+
+/** A bindColumns factory: resolve the views into a persistent runner; deps arrive via the runner's ctx. */
+export type PinnedFactory<Ctx = unknown> = (
+  views: readonly TypedArray[],
+  meta: BoundColumnsMeta,
+) => (ctx: Ctx) => void
+
+/** Probed once: can this runtime compile a function from source? (False under strict CSP / locked sandboxes.) */
+export const CODEGEN_AVAILABLE: boolean = (() => {
+  try {
+    // eslint-disable-next-line no-new-func
+    return new Function('return 1')() === 1
+  } catch {
+    return false
+  }
+})()
+
+/** Recompile a factory into a distinct function object (its runner becomes a specialized singleton). */
+function recompile<Ctx>(factory: PinnedFactory<Ctx>): PinnedFactory<Ctx> {
+  // Re-evaluate the factory's OWN source as a fresh function. No external strings are interpolated.
+  // eslint-disable-next-line no-new-func
+  return new Function('return (' + factory.toString() + ')')() as PinnedFactory<Ctx>
+}
+
+/** A probe ctx that hands back a stable non-zero number for ANY property read, so a hoisted
+ * `const dt = ctx.dt` yields deterministic, comparable arithmetic in the pre-flight (and never NaN). */
+const PROBE_CTX = new Proxy(
+  {},
+  {
+    get: () => 1,
+  },
+) as never
+
+/**
+ * Build the runner for one archetype binding. Returns the codegen runner when eval is available AND
+ * it provably matches the interpreted runner on a scratch row; otherwise the interpreted runner.
+ * `strides` sizes the scratch clone (slots per row, per spec).
+ */
+export function buildPinnedRunner<Ctx>(
+  factory: PinnedFactory<Ctx>,
+  views: readonly TypedArray[],
+  meta: BoundColumnsMeta,
+  strides: readonly number[],
+): (ctx: Ctx) => void {
+  const interpreted = factory(views, meta)
+  if (!CODEGEN_AVAILABLE) return interpreted
+  try {
+    // The real runner is the singleton of its OWN recompiled factory (specialized). The pre-flight
+    // validates a SEPARATELY-recompiled runner over scratch — same source, so faithful ⇒ the real
+    // runner is faithful too. (Reusing one recompiled factory for both would make it produce two
+    // closures and forfeit specialization — the very penalty codegen exists to avoid.)
+    const codegen = recompile(factory)(views, meta)
+    if (preflightMatches(factory, views, strides)) return codegen
+  } catch {
+    // recompile / factory-invoke threw (illegal closure, exotic source) → interpreted.
+  }
+  return interpreted
+}
+
+/**
+ * Run a recompiled runner and a fresh interpreted runner over IDENTICAL 1-row scratch clones of the
+ * columns with the probe ctx, and compare. Equal ⇒ a recompile of this factory is faithful (and the
+ * real runner, recompiled from the same source, is therefore faithful too). The real columns are
+ * never touched. NOTE: this INVOKES the user's runner once over the scratch — a runner with effects
+ * beyond its views (a global write, a ctx method call) fires/throws here; the contract is a pure SoA
+ * loop reading values off ctx. A throw (e.g. an illegal outer-scope closure → ReferenceError) counts
+ * as a mismatch and falls back to interpreted.
+ */
+function preflightMatches<Ctx>(
+  factory: PinnedFactory<Ctx>,
+  views: readonly TypedArray[],
+  strides: readonly number[],
+): boolean {
+  try {
+    const rows = 1
+    const scratchA = views.map((v, i) => v.slice(0, rows * (strides[i] ?? 1)) as TypedArray)
+    const scratchB = views.map((v, i) => v.slice(0, rows * (strides[i] ?? 1)) as TypedArray)
+    const scratchMeta: BoundColumnsMeta = { count: rows, strides }
+    recompile(factory)(scratchA, scratchMeta)(PROBE_CTX)
+    factory(scratchB, scratchMeta)(PROBE_CTX)
+    for (let i = 0; i < scratchA.length; i++) {
+      const a = scratchA[i] as TypedArray
+      const b = scratchB[i] as TypedArray
+      for (let k = 0; k < a.length; k++) {
+        // Object.is, not !==, so identical NaN writes (e.g. row-0 data already NaN at re-bind) count
+        // as a MATCH — a faithful recompile reproduces the same writes, NaN included.
+        if (!Object.is(a[k], b[k])) return false
+      }
+    }
+    return true
+  } catch {
+    return false
+  }
+}
diff --git a/packages/core/src/query/live-query.ts b/packages/core/src/query/live-query.ts
index ba372f0..88288df 100644
--- a/packages/core/src/query/live-query.ts
+++ b/packages/core/src/query/live-query.ts
@@ -24,6 +24,8 @@ import type { Column, TypedArray } from '../memory/index.js'
 import { decodeEid } from '../memory/index.js'
 import type { CompiledQuery, CompiledValueTerm, RowFilterTerm } from './compile.js'
 import type { SparseSetU32 } from './sparse-set.js'
+import { buildPinnedRunner } from './codegen.js'
+import type { PinnedFactory } from './codegen.js'
 
 const NO_HANDLE = 0xffffffff as EntityHandle
 
@@ -377,36 +379,44 @@ export class LiveQuery {
   }
 
   /**
-   * Pinned columns (the persistent-closure fast path). Where {@link eachChunk} re-resolves column
-   * views every call, `bindColumns` resolves each `[ComponentDef, fieldName]` spec ONCE per matched
-   * hot archetype and invokes `factory(views, meta)` to mint that archetype's runner — a persistent
-   * closure capturing the views as constants, which V8 context-specializes. The returned `run()`
-   * walks the bindings (matching `eachChunk` iteration order: cold archetypes never visited, empty
-   * ones skipped) with one cheap safety check per binding per call.
+   * Pinned columns — the fastest iteration path, ~0.7× bitECS on the canonical bench (measured).
+   * Where {@link eachChunk} re-resolves column views every call, `bindColumns` resolves each
+   * `[ComponentDef, fieldName]` spec ONCE per matched hot archetype and mints that archetype's runner
+   * — a persistent closure capturing the views as constants, which V8 embeds into optimized code. The
+   * returned `run(ctx)` walks the bindings (matching `eachChunk` iteration order: cold archetypes
+   * never visited, empty ones skipped) with one cheap safety check per binding per call.
    *
-   * The two load-bearing contract points:
-   * 1. The runner takes ZERO arguments (a `count` parameter measures 2× slower).
-   * 2. `meta` is identity-stable across rebinds; read `meta.count` (the archetype's live row count)
-   * inside the runner — population churn (spawn/despawn) never re-invokes the factory.
+   * Per-archetype runners are CODEGEN'D (each recompiled into a distinct function so it stays a
+   * specialized V8 singleton) where the runtime allows `new Function`; under strict CSP / a locked
+   * sandbox it transparently falls back to the interpreted factory call. The codegen path is gated on
+   * a pre-flight equality check against the interpreted runner, so it is a pure speed win that can
+   * never change results. Because codegen recompiles fresh on growth, there is **NO post-growth
+   * penalty** — the loop holds ~1.0 ns/entity even after columns re-back, with no pre-sizing required.
+   *
+   * Contract:
+   * 1. The factory must be **self-contained** — it may close over NOTHING from its outer scope (the
+   *    recompiled copy only sees globals). Pass per-frame inputs through the runner's `ctx` argument
+   *    (hoist them to a local const before the loop: `const dt = ctx.dt`); define fixed constants
+   *    inside the factory body. A factory that closes over outer scope fails the pre-flight and falls
+   *    back to interpreted (correct, just unspecialized).
+   * 2. `meta` is identity-stable; read `meta.count` (the live row count) inside the runner — population
+   *    churn (spawn/despawn) never re-invokes the factory.
    *
    * ```ts
-   * const run = q.bindColumns(
-   * [Position, 'x'], [Velocity, 'dx'],
-   * ([px, dx], meta) => () => {
-   * const count = meta.count
-   * for (let i = 0; i < count; i++) px[i] += dx[i] * dt
-   * },
+   * const run = q.bindColumns<{ dt: number }>(
+   *   [Position, 'x'], [Velocity, 'dx'],
+   *   ([px, dx], meta) => (ctx) => {
+   *     const dt = ctx.dt           // hoist per-frame inputs out of the loop
+   *     const count = meta.count
+   *     for (let i = 0; i < count; i++) px[i] += dx[i] * dt
+   *   },
    * )
-   * run() // per frame
+   * run({ dt: 1 / 60 }) // per frame
    * ```
    *
-   * The factory is re-invoked ONLY when a bound column re-backs (growth replaced its view) or when
-   * the matched-archetype set changes (a new archetype matched, or a matched cold archetype was
-   * warm-promoted) — never on population change. V8 only context-specializes SINGLETON closures, so
-   * the first re-invocation after binding disables specialization for that binding permanently
-   * (~1.7 ns/entity vs ~1.0 steady-state on the canonical bench): pre-size to peak capacity (spawn
-   * or reserve rows BEFORE binding) so growth never lands in a hot phase — growth before the first
-   * bind is free.
+   * The factory is re-invoked ONLY when a bound column re-backs (growth) or the matched-archetype set
+   * changes (a new archetype matched, or a matched cold archetype was warm-promoted) — never on
+   * population change.
    *
    * Vec fields hand their raw view through: row `r` occupies `[r*stride, (r+1)*stride)` where the
    * stride is the declared vec arity (`vec3()` → 3). Hardcode it, or read `meta.strides[specIndex]`
@@ -421,13 +431,13 @@ export class LiveQuery {
    * observe the write. Structural changes during `run()` follow the `eachChunk` discipline: collect,
    * then mutate after the loop (despawn swap-removes rows under the runner's feet).
    */
-  bindColumns(
+  bindColumns<Ctx = void>(
     ...args: [
       ...specs: ReadonlyArray<readonly [ComponentDef<Schema>, string]>,
-      factory: (views: readonly TypedArray[], meta: BoundColumnsMeta) => () => void,
+      factory: PinnedFactory<Ctx>,
     ]
-  ): () => void {
-    const factory = args[args.length - 1] as (views: readonly TypedArray[], meta: BoundColumnsMeta) => () => void
+  ): (ctx: Ctx) => void {
+    const factory = args[args.length - 1] as PinnedFactory<Ctx>
     const specs = args.slice(0, -1) as ReadonlyArray<readonly [ComponentDef<Schema>, string]>
     if (this.compiled.rowFilters.length !== 0) {
       throw new Error('bindColumns: row-filtered queries are not supported (a pinned runner cannot skip rows); use each()')
@@ -469,8 +479,9 @@ export class LiveQuery {
       readonly arch: Archetype
       readonly cols: readonly Column[]
       views: readonly TypedArray[]
-      runner: () => void
+      runner: (ctx: Ctx) => void
       readonly meta: BoundColumnsMeta
+      readonly strides: readonly number[]
     }
 
     // Bindings are keyed by archetype id and PRESERVED across rebuilds: a rebuild only mints
@@ -485,8 +496,10 @@ export class LiveQuery {
     let boundMatchCount = -1
 
     const reinvoke = (b: PinnedBinding): void => {
+      // Re-build through codegen on growth: a FRESH recompiled factory keeps the new runner a
+      // specialized singleton (re-invoking the same factory would forfeit specialization).
       b.views = b.cols.map((c) => c.view)
-      b.runner = factory(b.views, b.meta)
+      b.runner = buildPinnedRunner(factory, b.views, b.meta, b.strides)
     }
 
     const makeBinding = (arch: Archetype): PinnedBinding => {
@@ -512,7 +525,7 @@ export class LiveQuery {
         },
         strides,
       }
-      return { arch, cols, views, runner: factory(views, meta), meta }
+      return { arch, cols, views, runner: buildPinnedRunner(factory, views, meta, strides), meta, strides }
     }
 
     // Builds into locals and commits at the end so a makeBinding throw (defensive backstop; the
@@ -542,7 +555,7 @@ export class LiveQuery {
 
     rebuild()
 
-    return () => {
+    return (ctx: Ctx) => {
       // Archetype-set check: matchingArchetypes is append-only (lastMatchTick is never bumped), so
       // a length change is the complete new-match signal; the cold flags cover warm promotion.
       if (this.matchingArchetypes.length !== boundMatchCount) {
@@ -569,7 +582,7 @@ export class LiveQuery {
             break
           }
         }
-        if (b.arch.count !== 0) b.runner()
+        if (b.arch.count !== 0) b.runner(ctx)
       }
     }
   }
diff --git a/packages/core/test/bind-columns.property.test.ts b/packages/core/test/bind-columns.property.test.ts
index aecdca0..c1aa5f1 100644
--- a/packages/core/test/bind-columns.property.test.ts
+++ b/packages/core/test/bind-columns.property.test.ts
@@ -78,11 +78,21 @@ function makeRig(pinned: boolean): Rig {
         [Position, 'y'],
         [Velocity, 'dx'],
         [Velocity, 'dy'],
-        ([px, py, dx, dy], meta) => () => {
-          const count = meta.count
-          for (let i = 0; i < count; i++) {
-            px[i] = (px[i] as number) + (dx[i] as number) * DT
-            py[i] = (py[i] as number) + (dy[i] as number) * DT
+        // SELF-CONTAINED (DT defined inside, closes over nothing) so the CODEGEN path is exercised —
+        // this property (codegen integrator byte-identical to .each under random spawn/despawn/grow)
+        // is the primary codegen correctness gate.
+        (vs, meta) => {
+          const px = vs[0] as Float32Array
+          const py = vs[1] as Float32Array
+          const dx = vs[2] as Float32Array
+          const dy = vs[3] as Float32Array
+          const dt = 1 / 60
+          return () => {
+            const count = meta.count
+            for (let i = 0; i < count; i++) {
+              px[i] = px[i]! + dx[i]! * dt
+              py[i] = py[i]! + dy[i]! * dt
+            }
           }
         },
       )
diff --git a/packages/core/test/codegen-pinned.test.ts b/packages/core/test/codegen-pinned.test.ts
new file mode 100644
index 0000000..7702937
--- /dev/null
+++ b/packages/core/test/codegen-pinned.test.ts
@@ -0,0 +1,129 @@
+// Pinned-loop codegen: the mechanism behind bindColumns beating bitECS. These tests pin the SAFETY
+// contract — codegen is used only when it provably matches the interpreted path, and falls back
+// (always correct) under CSP, on an illegal outer-scope closure, or on any divergence. The
+// steady-state speed win + the no-post-growth-penalty property are bench territory (bench/iterate.ts
+// + the CI bench lane); correctness under random churn is bind-columns.property.test.ts.
+
+import { describe, expect, test } from 'vitest'
+import { createWorld, defineComponent, write } from '@ecsia/core'
+import type { ComponentDef, Schema } from '@ecsia/core'
+import { CODEGEN_AVAILABLE, buildPinnedRunner } from '../src/query/codegen.js'
+import type { BoundColumnsMeta } from '@ecsia/schema'
+
+const asComps = (...c: ComponentDef<Schema>[]): readonly ComponentDef<Schema>[] => c
+
+describe('pinned codegen — safety contract', () => {
+  test('eval is available under Node (the codegen path is live, not the CSP fallback)', () => {
+    expect(CODEGEN_AVAILABLE).toBe(true)
+  })
+
+  test('a self-contained factory codegens and integrates correctly through growth', () => {
+    const Pos = defineComponent({ x: 'f32' }, { name: 'CgPos' })
+    const Vel = defineComponent({ dx: 'f32' }, { name: 'CgVel' })
+    const world = createWorld({ components: asComps(Pos, Vel), maxEntities: 1 << 14 })
+    const seed = () => {
+      const h = world.spawnWith(Pos, Vel)
+      ;(world.entity(h).write(Vel) as { dx: number }).dx = 2
+      return h
+    }
+    for (let i = 0; i < 4; i++) seed()
+    const q = world.query(write(Pos), write(Vel))
+    // Self-contained: closes over nothing; dt arrives via ctx, hoisted out of the loop.
+    const run = q.bindColumns([Pos, 'x'], [Vel, 'dx'], (vs, meta) => {
+      const px = vs[0] as Float32Array
+      const dx = vs[1] as Float32Array
+      return (ctx: { dt: number }) => {
+        const dt = ctx.dt
+        const c = meta.count
+        for (let i = 0; i < c; i++) px[i] = px[i]! + dx[i]! * dt
+      }
+    })
+    run({ dt: 0.5 })
+    // Force growth past the initial capacity so the runner re-builds (fresh codegen, no penalty),
+    // then run again — the new rows integrate too, the old ones keep their accumulated value.
+    const grown: ReturnType<typeof seed>[] = []
+    for (let i = 0; i < 5000; i++) grown.push(seed())
+    run({ dt: 0.5 })
+    // Original 4 ran twice (x = 2*0.5*2 = 2); the 5000 new ran once (x = 2*0.5 = 1).
+    let twice = 0
+    let once = 0
+    q.each((e) => {
+      const x = (e as unknown as { CgPos: { x: number } }).CgPos.x
+      if (Math.abs(x - 2) < 1e-6) twice++
+      else if (Math.abs(x - 1) < 1e-6) once++
+    })
+    expect(twice).toBe(4)
+    expect(once).toBe(5000)
+  })
+
+  test('a factory that closes over OUTER scope falls back to interpreted (still correct)', () => {
+    const Pos = defineComponent({ x: 'f32' }, { name: 'CgPos2' })
+    const world = createWorld({ components: asComps(Pos), maxEntities: 64 })
+    for (let i = 0; i < 3; i++) world.spawnWith(Pos)
+    const q = world.query(write(Pos))
+    const bump = 7 // an OUTER closure — recompile can't see it; the pre-flight catches the throw → fallback
+    const run = q.bindColumns([Pos, 'x'], (vs, meta) => {
+      const px = vs[0] as Float32Array
+      return () => {
+        const c = meta.count
+        for (let i = 0; i < c; i++) px[i] = px[i]! + bump
+      }
+    })
+    run()
+    q.each((e) => {
+      expect((e as unknown as { CgPos2: { x: number } }).CgPos2.x).toBe(7)
+    })
+  })
+
+  test('buildPinnedRunner: codegen and interpreted produce identical output (the pre-flight invariant)', () => {
+    // Drive the helper directly on scratch typed arrays — no world needed. Self-contained factory.
+    const factory = (vs: readonly Float32Array[], meta: BoundColumnsMeta) => {
+      const a = vs[0] as Float32Array
+      const b = vs[1] as Float32Array
+      return (ctx: { k: number }) => {
+        const k = ctx.k
+        const c = meta.count
+        for (let i = 0; i < c; i++) a[i] = a[i]! + b[i]! * k
+      }
+    }
+    const meta: BoundColumnsMeta = { count: 8, strides: [1, 1] }
+    const codegen = [new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]), new Float32Array([1, 1, 1, 1, 1, 1, 1, 1])]
+    const interp = [codegen[0]!.slice(), codegen[1]!.slice()]
+    // buildPinnedRunner returns codegen when CODEGEN_AVAILABLE (Node) and the pre-flight matches.
+    const cgRun = buildPinnedRunner(factory as never, codegen as never, meta, [1, 1])
+    // A hand-built interpreted runner over the clone.
+    const inRun = factory(interp as never, meta)
+    cgRun({ k: 10 } as never)
+    inRun({ k: 10 })
+    expect([...(codegen[0] as Float32Array)]).toEqual([...(interp[0] as Float32Array)])
+  })
+
+  test('a recycled-index / multi-archetype query integrates correctly under codegen', () => {
+    const Pos = defineComponent({ x: 'f32' }, { name: 'CgPos3' })
+    const Vel = defineComponent({ dx: 'f32' }, { name: 'CgVel3' })
+    const Tag = defineComponent({ t: 'u8' }, { name: 'CgTag3' })
+    const world = createWorld({ components: asComps(Pos, Vel, Tag), maxEntities: 1 << 12 })
+    const mk = (withTag: boolean) => {
+      const h = withTag ? world.spawnWith(Pos, Vel, Tag) : world.spawnWith(Pos, Vel)
+      ;(world.entity(h).write(Vel) as { dx: number }).dx = 3
+    }
+    for (let i = 0; i < 10; i++) mk(false)
+    for (let i = 0; i < 10; i++) mk(true) // a SECOND matching archetype — two bindings, two codegen runners
+    const q = world.query(write(Pos), write(Vel))
+    const run = q.bindColumns([Pos, 'x'], [Vel, 'dx'], (vs, meta) => {
+      const px = vs[0] as Float32Array
+      const dx = vs[1] as Float32Array
+      return () => {
+        const c = meta.count
+        for (let i = 0; i < c; i++) px[i] = px[i]! + dx[i]!
+      }
+    })
+    run()
+    let n = 0
+    q.each((e) => {
+      expect((e as unknown as { CgPos3: { x: number } }).CgPos3.x).toBe(3)
+      n++
+    })
+    expect(n).toBe(20) // both archetypes integrated
+  })
+})
diff --git a/packages/schema/src/index.ts b/packages/schema/src/index.ts
index 9c945b4..554e69f 100644
--- a/packages/schema/src/index.ts
+++ b/packages/schema/src/index.ts
@@ -573,15 +573,15 @@ export interface Query<Terms extends readonly QueryTerm[]> {
    * raw typed column views + a row span. Bypasses the per-row accessor AND the reactivity write log. */
   eachChunk(fn: (chunk: QueryChunk) => void): void
   /** Pinned columns: resolve each `[ComponentDef, field]` spec's column views ONCE per matched hot
-   * archetype, invoke `factory(views, meta)` to mint a persistent zero-argument runner, and return a
-   * `run()` that re-checks the bindings and runs each archetype's runner. See the runtime doc on the
-   * core LiveQuery for the full contract (zero-arg runner, `meta.count`, invalidation, caveats). */
-  bindColumns<const Specs extends readonly ColumnSpec[]>(
+   * archetype, invoke `factory(views, meta)` to mint a persistent runner (per-frame inputs via the runner's
+   * `ctx` argument), and return a `run(ctx)` that re-checks the bindings and runs each archetype's runner. See the runtime doc on the
+   * core LiveQuery for the full contract (self-contained factory, `ctx` deps, `meta.count`, codegen + CSP fallback). */
+  bindColumns<const Specs extends readonly ColumnSpec[], const Ctx = void>(
     ...args: [
       ...specs: { [I in keyof Specs]: ColumnSpecFor<Specs[I]> },
-      factory: (views: ColumnViews<Specs>, meta: BoundColumnsMeta) => () => void,
+      factory: (views: ColumnViews<Specs>, meta: BoundColumnsMeta) => (ctx: Ctx) => void,
     ]
-  ): () => void
+  ): (ctx: Ctx) => void
   /**
    * Derive a narrower query: the cached query for [...this query's terms, ...terms] — pure sugar
    * over `world.query` with the merged term list, riding the same canonical-hash dedup (deriving
@@ -650,12 +650,12 @@ export interface LooseQuery {
   /** Opt-in SoA fast path: see {@link Query.eachChunk}. */
   eachChunk(fn: (chunk: QueryChunk) => void): void
   /** Pinned columns: see {@link Query.bindColumns}. */
-  bindColumns<const Specs extends readonly ColumnSpec[]>(
+  bindColumns<const Specs extends readonly ColumnSpec[], const Ctx = void>(
     ...args: [
       ...specs: { [I in keyof Specs]: ColumnSpecFor<Specs[I]> },
-      factory: (views: ColumnViews<Specs>, meta: BoundColumnsMeta) => () => void,
+      factory: (views: ColumnViews<Specs>, meta: BoundColumnsMeta) => (ctx: Ctx) => void,
     ]
-  ): () => void
+  ): (ctx: Ctx) => void
   /** See {@link Query.derive}. Arity is already past the cap, so the result stays loose. */
   derive(...terms: QueryTerm[]): LooseQuery
   /** Flavor declarations (chainable). */
diff --git a/scripts/bench-report.mjs b/scripts/bench-report.mjs
index 46c5dd2..02793fb 100644
--- a/scripts/bench-report.mjs
+++ b/scripts/bench-report.mjs
@@ -237,7 +237,7 @@ function genTables(report) {
 
 ### Single-thread iteration
 
-Each loop adds every entity's velocity to its position, over ${fmtInt(report.config.iterEntities)} entities per op. \`ns per entity\` is mean op time divided by entity count (nanoseconds per entity — lower is faster); \`ratio vs bitECS\` is bitECS ops/s ÷ this row's ops/s. The \`ecsia bindColumns\` row binds its loop to the storage once, up front; if storage grows after that binding the loop runs slower from then on (roughly 1.7 ns per entity instead of ~1.0), so pre-size the world to peak capacity — spawn or reserve before binding.
+Each loop adds every entity's velocity to its position, over ${fmtInt(report.config.iterEntities)} entities per op. \`ns per entity\` is mean op time divided by entity count (nanoseconds per entity — lower is faster); \`ratio vs bitECS\` is bitECS ops/s ÷ this row's ops/s. The \`ecsia bindColumns\` row compiles a specialized loop per matched archetype (re-evaluating the factory into a fresh function so V8 keeps it on the fast path), which holds through storage growth with no pre-sizing; where dynamic compilation is forbidden (strict CSP / locked sandbox) it falls back to a plain interpreted loop.
 
 | loop | ops/s | ms/op | ns per entity | ratio vs bitECS |
 | --- | ---: | ---: | ---: | ---: |
diff --git a/website/guide/_perf-tables.md b/website/guide/_perf-tables.md
index c8a7abe..1756971 100644
--- a/website/guide/_perf-tables.md
+++ b/website/guide/_perf-tables.md
@@ -5,7 +5,7 @@
 
 ### Single-thread iteration
 
-Each loop adds every entity's velocity to its position, over 50,000 entities per op. `ns per entity` is mean op time divided by entity count (nanoseconds per entity — lower is faster); `ratio vs bitECS` is bitECS ops/s ÷ this row's ops/s. The `ecsia bindColumns` row binds its loop to the storage once, up front; if storage grows after that binding the loop runs slower from then on (roughly 1.7 ns per entity instead of ~1.0), so pre-size the world to peak capacity — spawn or reserve before binding.
+Each loop adds every entity's velocity to its position, over 50,000 entities per op. `ns per entity` is mean op time divided by entity count (nanoseconds per entity — lower is faster); `ratio vs bitECS` is bitECS ops/s ÷ this row's ops/s. The `ecsia bindColumns` row compiles a specialized loop per matched archetype (re-evaluating the factory into a fresh function so V8 keeps it on the fast path), which holds through storage growth with no pre-sizing; where dynamic compilation is forbidden (strict CSP / locked sandbox) it falls back to a plain interpreted loop.
 
 | loop | ops/s | ms/op | ns per entity | ratio vs bitECS |
 | --- | ---: | ---: | ---: | ---: |
diff --git a/website/guide/performance.md b/website/guide/performance.md
index bb526cb..e458526 100644
--- a/website/guide/performance.md
+++ b/website/guide/performance.md
@@ -65,17 +65,18 @@ number.
 - **bitECS wins the default-path comparison.** Its flat SoA loop out-iterates both `.each` and
   `eachChunk`, and we do not pretend otherwise. If your entire workload is one tight integrate loop
   on a single thread and you never reach for `bindColumns`, bitECS is the fastest tool here.
-- **ecsia `bindColumns` edges ahead of bitECS** on this bench. Once the loop is bound, it is the same
-  raw-typed-array shape bitECS uses with one less indirection — ecsia walks its rows densely where
-  bitECS indexes through an entity list. The edge comes with homework: keep the loop closure
-  persistent and pre-size before binding. See
-  [Bind your loop once](#bind-your-loop-once-bindcolumns).
+- **ecsia `bindColumns` beats bitECS** on this bench (~0.7×). It compiles a specialized loop per
+  matched archetype — the same raw-typed-array shape bitECS uses with one less indirection (ecsia
+  walks its rows densely where bitECS indexes through an entity list), kept on V8's fast path by
+  re-evaluating your factory into a fresh function per archetype. Because that recompiles on growth,
+  the edge holds with no pre-sizing and no after-growth penalty; the one rule is a self-contained
+  factory (deps via `ctx`). See [Bind your loop once](#bind-your-loop-once-bindcolumns).
 - **ecsia `.each` beats miniplex.** The ergonomic accessor path — proxies, write-log awareness, and
   all — still out-iterates miniplex's array-of-objects walk. You do not pay for ecsia's ergonomics by
   dropping below the closest ergonomic competitor.
 - **ecsia `eachChunk` lands within ~1.1× of bitECS on a modern V8.** The column cursor re-resolves
   its columns every call — that re-resolution is what keeps it safe under storage growth with zero
-  setup, and what `bindColumns` trades away for the last third.
+  setup; `bindColumns` is the version that compiles the loop and pulls ahead of bitECS.
 - **The tracked-write row is the cost you opt into.** Attaching a `.changed()` filter and draining it
   each frame is markedly more expensive than the bare integrate loop — that is the write-log doing
   real work so reactivity, deltas, and change observers are available. You pay it only when you ask
@@ -103,10 +104,13 @@ number.
 `eachChunk` looks its columns up again on every call. That re-lookup is the safe default — a column's
 array can be replaced when storage grows — but it stops V8 from compiling your loop with the arrays
 baked in as constants, and that compilation is worth about 30% on the iteration bench. `bindColumns`
-gets it back without giving up the safety: you hand the query the columns you want and a factory
-function; ecsia resolves the columns once, calls your factory with them, and keeps the loop your
-factory returns. Each `run()` then runs your loop directly — ecsia re-binds it only when storage
-actually moved.
+gets it back: you hand the query the columns you want and a factory function; ecsia resolves the
+columns once and **compiles a specialized loop per matched archetype** (it re-evaluates your factory
+into a fresh function so V8 keeps each loop on its fast path). The result lands at **~0.7× bitECS** on
+the iteration bench — faster than bitECS — and stays there even after storage grows, with no pre-sizing
+required. Where a runtime forbids dynamic compilation (a strict Content-Security-Policy, a locked
+sandbox), it transparently falls back to a plain interpreted loop; the codegen path is used only when
+it provably matches that interpreted result, so it can never change what your loop computes.
 
 ```ts
 import { createWorld, defineComponent, write } from 'ecsia'
@@ -116,11 +120,11 @@ const Velocity = defineComponent({ dx: 'f32', dy: 'f32' }, { name: 'velocity' })
 const world = createWorld({ components: [Position, Velocity], maxEntities: 1 << 16 })
 
 const q = world.query(write(Position), write(Velocity))
-const dt = 1 / 60
 
 const run = q.bindColumns(
   [Position, 'x'], [Position, 'y'], [Velocity, 'dx'], [Velocity, 'dy'],
-  ([px, py, dx, dy], meta) => () => {
+  ([px, py, dx, dy], meta) => (ctx: { dt: number }) => {
+    const dt = ctx.dt        // per-frame inputs arrive via ctx — hoist them out of the loop
     const count = meta.count // the live entity count — read it inside the loop
     for (let i = 0; i < count; i++) {
       px[i] = px[i]! + dx[i]! * dt
@@ -129,7 +133,7 @@ const run = q.bindColumns(
   },
 )
 
-run() // call once per frame
+run({ dt: 1 / 60 }) // call once per frame
 ```
 
 For a `vec` field the view is the raw flat array — row `r`'s axes live at `[r * stride, (r+1) * stride)`,
@@ -141,28 +145,20 @@ const s = meta.strides[0]            // the first spec's slots-per-row
 for (let r = 0; r < meta.count; r++) pos[r * s] += dx[r] * dt
 ```
 
-Two requirements make this fast, and both are part of the contract rather than style:
+One rule makes the codegen path kick in, and it is the natural shape anyway:
 
-- **Your loop must persist.** The speed comes from V8 treating the captured arrays as constants, and
-  it only does that for a closure created once and then reused. The API shape makes that the natural
-  thing: ecsia calls your factory, keeps the returned loop, and re-invokes the factory only when a
-  bound column's storage was replaced or a new group of entities starts matching the query. Entities
-  spawning and despawning never re-invoke it — the loop reads the live count from `meta.count`.
-- **The returned loop takes no arguments.** Passing the count in as a parameter measured about 2×
-  slower; reading `meta.count` inside the loop is free.
+- **Your factory must be self-contained** — it may close over nothing from the surrounding scope.
+  ecsia re-evaluates your factory's own source to compile each archetype's loop, and that fresh copy
+  only sees globals. So pass per-frame inputs through the runner's `ctx` argument (hoist them to a
+  local before the loop, as above) and define fixed constants inside the factory body. A factory that
+  reaches outside itself still works — it just runs the plain interpreted loop instead of the compiled
+  one. `meta.count` is read inside the loop (the live entity count); spawning and despawning never
+  re-invoke the factory.
 
 The trade-offs are the same as `eachChunk`: writes through the bound arrays bypass the write log, so
 `.changed()` filters and observers will not see them, and structural changes during `run()` follow
 the same collect-first, mutate-after rule as every other loop.
 
-::: tip Pre-size before you bind
-The first time a bound column grows *after* you have bound, V8 permanently stops specializing that
-loop — it keeps working, just slower (about 1.7 ns per entity in our profiling, instead of the
-steady-state number in the table). Growth *before* the bind costs nothing. So spawn, or reserve, up
-to your peak entity count first — the world's `maxEntities` is a natural guide — and bind once the
-world is at size.
-:::
-
 ## Reproduce
 
 ```bash