From 36c128fc149cf9fe40fcc3992d2eb12965cac2c5 Mon Sep 17 00:00:00 2001 From: Eric Boothe Date: Fri, 29 May 2026 11:46:09 -0600 Subject: [PATCH] =?UTF-8?q?feat(chunked):=20close=20#22=20scaling=20walls?= =?UTF-8?q?=20=E2=80=94=20streamed=20emit,=20borrowed=20partitions,=20opt-?= =?UTF-8?q?in=20lazy=20engine?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wall C (streamed emit): write each sheet module to disk as generated and drop the string instead of collect-all-then-write; heavy sheets (>=200k formulas) emit one-at-a-time, light ones in parallel. Was materializing all ~800 MB of generated JS in memory before writing any module -> ~18 GB peak on the real models, sheets/ empty for the whole run. Wall B (borrowed partitions): SheetPartition<'a> holds Vec<&CellData> instead of cloning ~6M cells while the workbook still holds the originals (peak-memory doubling). Consumers are read-only, so unchanged beyond the borrow. Wall A (opt-in --lazy-engine): emit a chunked engine whose sheet modules load on demand via async load()/runScoped() with output-cone scoping (load only the requested sheets/cells' transitive dependency closure, whole clusters included). Sync run() preserved and guarded against pre-load calls. Default engine.js is unchanged (eager + synchronous) so the Mippy contract, ete eval, smoke, and the engine suite are untouched; eager and lazy share the run() body so they can't drift. New `npm run test:lazy-engine` (19) + CI step. Validated: cargo test 17/17, smoke 78/78, test:engine 21/21, test:runnable 20/20, test:depgraph 11/11, test:slimming 13/13, test:golden 20/20, full npm test, and an `ete init --lazy-engine` e2e build. Residual (deferred): generate_sheet_module builds a Vec then joins (~2x a monster module transiently); row-chunk the 3 monster sheets so even one is small to emit + import. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 3 + CHANGELOG.md | 48 +++ HANDOFF.md | 36 ++- PLAN.md | 45 ++- README.md | 36 +++ ROADMAP.md | 34 ++- cli/commands/init.mjs | 5 + cli/index.mjs | 5 +- package.json | 1 + pipelines/rust/src/chunked_emitter.rs | 343 ++++++++++++++++++---- pipelines/rust/src/main.rs | 8 +- pipelines/rust/src/sheet_partition.rs | 25 +- pipelines/rust/tests/test-lazy-engine.mjs | 170 +++++++++++ 13 files changed, 659 insertions(+), 100 deletions(-) create mode 100644 pipelines/rust/tests/test-lazy-engine.mjs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 847a51f..e11008e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,6 +77,9 @@ jobs: - name: Runnable engine + build manifest run: npm run test:runnable + - name: Lazy engine (--lazy-engine load()/runScoped() + cone scoping) + run: npm run test:lazy-engine + - name: Artifact slimming run: npm run test:slimming diff --git a/CHANGELOG.md b/CHANGELOG.md index e30b8b3..72cb370 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,53 @@ # excel-to-engine — Changelog +## 2026-05-29 — Chunked-build scaling walls: streamed emit, borrowed partitions, opt-in lazy engine (#22) + +With the partition-hang fixed, a clean `ete init` on the real models got *past* +partitioning but then drove the parser past 18 GB in the module-emit step (and a +complete build was still slow to *run* as an oracle). Three walls closed — two +internal memory fixes done unconditionally, one opt-in runtime feature. + +- **Wall C — streamed module emit (`chunked_emitter.rs`).** The emit did + `partitions.par_iter().map(generate_sheet_module).collect()` and wrote in a + *second* pass — holding **all ~800 MB** of generated JS in memory at once (on + top of the multi-million-cell workbook), with nothing in `sheets/` until every + module finished. It now **writes each module the instant it's generated and + drops the string**; the few "heavy" sheets (≥200k formula cells) are emitted + one-at-a-time (peak ≈ one big module) while the many light sheets stay parallel. + Files land incrementally; a write failure is still fatal. +- **Wall B — `SheetPartition` borrows cells instead of cloning + (`sheet_partition.rs`).** `partition_sheets` did `cell.clone()` into the + partition while `workbook.sheets` still held the originals — a full second copy + of ~6M `CellData` (addresses + values + formula strings) → peak-memory doubling. + `SheetPartition<'a>` now holds `Vec<&'a CellData>` (the workbook outlives every + partition), so the partition is a few pointers per cell. The four consumers are + read-only, so they're unchanged beyond the borrow. +- **Wall A — opt-in `--lazy-engine` (`chunked_emitter.rs`, `main.rs`, + `cli/`).** The default `engine.js` statically imports every sheet module, so + `import('engine.js')` pulls ~800 MB into the heap before `run()` can be called. + `ete init --lazy-engine` (parser `--lazy-engine`) now emits a lazy orchestrator: + sheet modules load on demand via `export async function load(options)` (with + **output-cone scoping** — `load({ sheets })` / `load({ cells })` loads only the + requested sheets' transitive dependency closure, expanding whole clusters), a + synchronous `run()` guarded against being called before any load, and + `runScoped(inputs, options)` (load + run in one await). **The default engine is + unchanged** — it stays eager + synchronous, so the Mippy contract, `ete eval`, + the smoke test, and the engine suite are untouched. The eager and lazy engines + share the `run()` body via `emit_run_function`, so they can't drift. +- New `npm run test:lazy-engine` (19) + CI step: asserts the lazy engine has no + static sheet imports, exports `run`/`load`/`runScoped`, throws before load, + matches the eager engine's `run()` output after load (base + cross-sheet + override), and that cone scoping loads only the closure. +- Validated: `cargo build --release`, `cargo test` 17/17, `smoke` 78/78, + `test:engine` 21/21, `test:runnable` 20/20, `test:depgraph` 11/11, + `test:lazy-engine` 19/19, `test:slimming` 13/13, `test:golden` 20/20, full + `npm test`, and an `ete init --lazy-engine` end-to-end build. +- **Residual (deeper, deferred):** `generate_sheet_module` builds a `Vec` + of lines then `.join("\n")` — ~2× a monster module transiently; and even one + ~200 MB monster module is heavy to import. Row-chunking the monster sheets + (Owned_Asset_PP_E, Future_Owned_Acquisitions, Technology) into smaller lazy + modules is the next step to make them usable, not just emittable. + ## 2026-05-29 — Fix chunked-build hang in `partition_sheets` (range-expansion blowup) A clean `ete init` on the full real models hung for ~12h in the chunked emitter, diff --git a/HANDOFF.md b/HANDOFF.md index 4dd7ba4..a13ab90 100644 --- a/HANDOFF.md +++ b/HANDOFF.md @@ -51,13 +51,35 @@ formulas. Validated: `cargo test` 17/17, `smoke` 78/78, `test:depgraph`/`runnabl `engine` 11/20/21. **Rebuild the release parser** (`cd pipelines/rust && cargo build --release`) before re-running the regen — the fix is in the binary. -Next session (all nice-to-have, none on the critical path): **P3 (#22)** -output-cone scoping / lazy sheet loading — now also the home for the two residual -scaling walls (partition clones every cell → peak-memory doubling; `engine.js` -eagerly imports ~800 MB of sheet modules → Node load-time wall, so even a complete -build is slow to *run* as the oracle). Plus **deeper transpiler coverage** (the -11,813 `_fn` offenders behind #26) and **cluster-once eval**. The Mippy contract + -its trust gates are complete. +**Latest session (chunked-build scaling walls, 2026-05-29):** the three #22 walls +are closed. A clean build got *past* partitioning but the module-emit step drove +the parser past 18 GB (it `collect()`ed all ~800 MB of generated module strings +before writing any), and even a complete engine was slow to *run* (eager imports). +- **Wall C (streamed emit):** `chunked_emitter.rs` writes each sheet module to + disk the instant it's generated and drops the string (heavy sheets ≥200k + formulas one-at-a-time, light ones parallel) — peak ≈ one monster module, files + land incrementally. **This is the fix for the 18 GB OOM the regen hit.** +- **Wall B (borrowed partitions):** `SheetPartition<'a>` holds `Vec<&CellData>` + (`sheet_partition.rs`) instead of cloning ~6M cells — no more peak-memory + doubling during emit. +- **Wall A (opt-in lazy engine):** `ete init --lazy-engine` (parser + `--lazy-engine`) emits an engine whose sheet modules load on demand via async + `load()`/`runScoped()` with **output-cone scoping** (`load({sheets})` / + `load({cells})` loads only the dependency closure, whole clusters included); + sync `run()` preserved, guarded against pre-load calls. **Default engine.js is + unchanged** (eager + sync) — Mippy / `ete eval` / smoke / engine suite untouched. + Eager & lazy share the `run()` body so they can't drift. New + `npm run test:lazy-engine` (19) + CI. **Rebuild the release parser before regen.** + +Next session (none on the critical path): **a clean A1/A2 regen** to confirm the +emit completes within memory (couldn't be measured here — models are gitignored); +then **row-chunk the 3 monster sheets** (Owned_Asset_PP_E, Future_Owned_Acquisitions, +Technology) so even one is small to generate (`generate_sheet_module` still builds +a `Vec` then joins, ~2× a monster transiently) and import. Plus the rest of +**#22's umbrella** (`--output-profile contract` to skip the per-sheet emit for +contract-only consumers; guided `ete create` skill), **deeper transpiler coverage** +(the 11,813 `_fn` offenders behind #26), and **cluster-once eval**. The Mippy +contract + its trust gates are complete. **Baseline (real models, `npm run bench`):** Model A **84.3%**, Model B **85.5%** — standalone sheets only (cluster + 190 MB PP&E skipped). diff --git a/PLAN.md b/PLAN.md index c696536..f08ff26 100644 --- a/PLAN.md +++ b/PLAN.md @@ -1,12 +1,43 @@ # excel-to-engine — Plan -> **Next session:** the real-model chunked build now gets *past* `partition_sheets` -> (the 12h hang is fixed). The remaining scaling walls are about actually -> *running* the oracle at this size — the partition step still clones every cell -> (peak-memory doubling) and `engine.js` eagerly imports ~800 MB of sheet modules -> (Node load-time wall). Both fold into **P3 (#22) output-cone scoping / lazy -> sheet loading**. Also still open: deeper transpiler coverage (the 11,813 `_fn` -> offenders behind #26) and cluster-once eval. +> **Next session:** the three chunked-build scaling walls are closed (#22) — the +> emit streams module-by-module (was: hold all ~800 MB before writing), partitions +> borrow cells instead of cloning (was: peak-memory doubling), and `ete init +> --lazy-engine` emits an on-demand engine so a consumer can run the oracle without +> importing ~800 MB up front. **Next: a full clean regen on the real A1/A2 models +> to confirm the emit now completes within memory** (couldn't be measured here — +> the models are gitignored), then the deeper residual: **row-chunk the 3 monster +> sheets** (Owned_Asset_PP_E, Future_Owned_Acquisitions, Technology) so even one is +> small to generate + import. Also still open: deeper transpiler coverage (the +> 11,813 `_fn` offenders behind #26) and cluster-once eval. + +## Status: Chunked-build scaling walls closed (streamed emit + borrowed partitions + opt-in lazy engine) — landed 2026-05-29 + +With the partition hang fixed, a clean build got *past* partitioning but then the +module-emit step drove the parser past 18 GB (it materialized all ~800 MB of +generated module strings before writing any), and even a complete ~800 MB engine +was slow to *run* (eager imports). Three walls (#22) closed: + +- **Wall C — streamed emit (`chunked_emitter.rs`).** Each sheet module is written + to disk the instant it's generated and the string dropped, instead of + collect-all-then-write; heavy sheets (≥200k formulas) emit one-at-a-time, light + ones in parallel. Peak emit memory ≈ one monster module, not the whole output. +- **Wall B — borrowed partitions (`sheet_partition.rs`).** `SheetPartition<'a>` + holds `Vec<&'a CellData>` instead of cloning ~6M cells; removes the second + full copy that doubled peak memory during emit. +- **Wall A — opt-in `--lazy-engine` (`chunked_emitter.rs`, `main.rs`, `cli/`).** + Emits a chunked `engine.js` whose sheet modules load on demand via async + `load()`/`runScoped()` with output-cone scoping; sync `run()` is preserved + (guarded against pre-load calls). **Default engine.js is unchanged** (eager + + sync) — the Mippy contract and all in-repo consumers are untouched. Eager/lazy + share the `run()` body, so they can't drift. + +New `npm run test:lazy-engine` (19) + CI. Validated: `cargo test` 17/17, `smoke` +78/78, `test:engine`/`test:runnable`/`test:depgraph` 21/20/11, `test:lazy-engine` +19/19, `test:slimming`/`test:golden` 13/20, full `npm test`, `ete init +--lazy-engine` e2e. **Residual (deferred):** `generate_sheet_module` still builds +a `Vec` then joins (~2× a monster transiently), and a single monster +module is still heavy to import — row-chunking the monster sheets is the next step. ## Status: Chunked-build partition hang fixed — landed 2026-05-29 diff --git a/README.md b/README.md index 56cd250..dd4407e 100644 --- a/README.md +++ b/README.md @@ -385,6 +385,42 @@ synthetic fixture (`npm run test:golden`); point it at a real build with `ETE_GOLDEN_DIR` + a gitignored `canonical-returns.json` to verify a regenerated model still reproduces the hand-port's gross/net MOIC & IRR exactly. +### Lazy engine for large models (`--lazy-engine`) + +The default `engine.js` statically imports every per-sheet module, so +`import('engine.js')` pulls **all** of them into memory (hundreds of MB on the +big PE models — dominated by a couple of monster sheets) before `run()` can be +called. For a consumer that only needs to *sample* the model (the calibration- +oracle use case), that load is the wall. + +`ete init --lazy-engine` emits an engine that imports sheet modules **on demand**: + +```js +import engine from './my-model/chunked/engine.js'; + +// Load only what you need, then run() synchronously (same return shape as always). +await engine.load({ cells: ['Returns!D22', 'Returns!E22'] }); // loads just the + // dependency cone +const { values, meta } = engine.run({ 'Assumptions!B3': 18 }); // override + run + +// Or do both in one call: +const r = await engine.runScoped({ 'Assumptions!B3': 18 }, { cells: ['Returns!D22'] }); +``` + +- **`load(options)`** — `{ sheets: [...] }` and/or `{ cells: ['Sheet!A1', ...] }` + loads only those sheets plus their transitive dependency closure (whole + circular clusters are pulled in as a unit). No options ⇒ load everything (still + lazy, but complete). To scope to named outputs, map their names → cells via + `named-outputs.json`, then pass `cells`. +- **`run(inputs, options)`** — unchanged synchronous semantics; throws if called + before anything is loaded. Sheets outside the loaded cone are simply skipped. +- **`runScoped(inputs, options)`** — `await load(options)` then `run(inputs, options)`. + +The **default build is unchanged** — `engine.js` stays eager and `run()` stays +synchronous, so existing integrations are untouched. `--lazy-engine` is purely +opt-in. (Per-sheet modules are emitted either way; the flag only changes how +`engine.js` loads them.) + ### The Delta Cascade When you run a scenario, the CLI doesn't re-execute the full engine (which can take 10+ minutes on large models). Instead, it: diff --git a/ROADMAP.md b/ROADMAP.md index 384127a..590c082 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -60,12 +60,18 @@ Mippy. Order (issues on ebootheee/excel-to-engine; the Done line is the contract artifacts. New `npm run test:runnable` + CI. See CHANGELOG/PLAN. - **P2 · [#25] — pin the value-bearing cells as named-outputs. ✅ DONE (2026-05-29).** Per-class MIP Proceeds, hurdle/threshold, participation %, equity basis, valuation/shares — not just MOIC/IRR. Schedules and timeline timelines (such as debt, equity base, cash flow) are now surfaced and participate fully in closure analysis via range expansion. Drivable driver-inputs (`exitMultiple`, `exitYearSelector`, and `hurdleRate`) are also mapped under `named-inputs.json`. - **P2 · [#26] — `_fn` fallback audit (`_fn-fallbacks.json`). ✅ DONE (2026-05-29).** Scans the generated sheet modules → `_fn-fallbacks.json`, and checks each named output/schedule's dependency closure against it. **Reports** by default (annotates affected outputs with `resolvesThroughFallback`, records `stats.fallbackViolations`, `ete init` warns); **hard-fails only under `--assert-no-fallbacks`** so the gate doesn't block the real models (~11,813 fallbacks today). The "assert no value cell uses a stub" target is the golden-master CI check below, run with `--assert-no-fallbacks`. -- **P3 (nice-to-have) · [#22] — output-cone scoping / lazy sheet loading.** - Cheaper oracle; not required (we don't ship the blob). Now also the home for - the two scaling walls that remain after the partition-hang fix below: the - partition step `clone()`s every cell (peak-memory doubling), and `engine.js` - eagerly imports ~800 MB of sheet modules (Node load-time wall — even a complete - build is slow to *run*). Scoping/lazy-loading addresses both. +- **P3 · [#22] — scaling walls + lazy sheet loading. ✅ DONE (2026-05-29).** + Three walls closed so the real models both *build* and *run* at scale: + **(C) streamed emit** — write each sheet module to disk as generated and drop + the string (was: hold all ~800 MB before writing → 18 GB peak); heavy sheets + emit one-at-a-time. **(B) borrowed partitions** — `SheetPartition<'a>` holds + `Vec<&CellData>` (was: clone ~6M cells → peak-memory doubling). **(A) opt-in + `ete init --lazy-engine`** — emits an engine whose sheet modules load on demand + via async `load()`/`runScoped()` with output-cone scoping; sync `run()` + preserved; **default engine unchanged** (eager + sync, Mippy untouched). + `npm run test:lazy-engine` (19) + CI. Still open under #22's original umbrella: + the `--output-profile contract` knob (skip the per-sheet emit entirely for + contract-only consumers) and a guided `ete create` skill. Supporting (makes the oracle trustworthy, not on the critical path): - **Golden-master CI assert ✅ DONE (2026-05-29).** `eval/golden-master.mjs` + @@ -123,10 +129,18 @@ Issues filed: [#22] (output scoping) and [#23] (parser/emitter perf). range-expanding `extract_refs` (post-Round-2 it explodes every range to ≤1000 cells per formula, then discards the same-sheet ones) on the 1.62M-formula PP&E sheet → swap thrash. Now uses a sheet-names-only scanner (`collect_sheet_deps`); - cycle detection uses `extract_refs_shallow`. **Residual scaling walls (→ [#22]):** - partition still `clone()`s every cell (peak-memory doubling), and the generated - `engine.js` eagerly imports ~800 MB of sheet modules (Node load-time wall). Also - still wanted: within-sheet parallelism for the heaviest sheets. + cycle detection uses `extract_refs_shallow`. ✅ **Two more walls fixed + (2026-05-29, #22):** the emit was materializing all ~800 MB of generated module + strings before writing any (18 GB peak) — now **streamed** (write + drop per + module, heavy sheets one-at-a-time); and `partition_sheets` cloned every cell + (peak-memory doubling) — now **borrows** (`Vec<&CellData>`). The eager + `engine.js` still imports all modules, so `ete init --lazy-engine` adds an + on-demand engine for the run-the-oracle path. **Residual (deferred):** + `generate_sheet_module` builds a `Vec` then joins (~2× a monster + transiently), and a single ~200 MB monster module is still heavy to import → + **row-chunk the 3 monster sheets** into smaller lazy modules. Also still wanted: + within-sheet parallelism for the heaviest sheets. **Not yet measured on the real + models** (gitignored) — a clean A1/A2 regen should confirm the emit completes. - **`--output-profile` / guided `ete create` ([#22]).** Skip the ~752 MB per-sheet engine emit when a consumer only needs ground truth + contract maps. - **Transpiler coverage — 11,813 `_fn()` fallbacks (unchanged old→new).** That diff --git a/cli/commands/init.mjs b/cli/commands/init.mjs index 58f2cf7..5c425be 100644 --- a/cli/commands/init.mjs +++ b/cli/commands/init.mjs @@ -109,6 +109,11 @@ export function runInit(excelPath, args) { // in chunked mode by default; see request #8 slimming). const parserArgs = [resolve(excelPath), absOutput, '--chunked']; if (args.emitDebug) parserArgs.push('--emit-debug'); + // --lazy-engine emits a chunked engine.js that imports sheet modules on + // demand (async load()/runScoped() + output-cone scoping) instead of + // eagerly at module-load time — so a consumer can run the engine without + // pulling every sheet module into memory just to import it (#22, Wall A). + if (args.lazyEngine) parserArgs.push('--lazy-engine'); const result = spawnSync( parserBin, parserArgs, diff --git a/cli/index.mjs b/cli/index.mjs index ee063a3..f219213 100755 --- a/cli/index.mjs +++ b/cli/index.mjs @@ -212,7 +212,10 @@ Commands: --assert-no-fallbacks (hard-fail if any named output resolves through an _fn() stub), --emit-debug (retain dependency-graph.json, - _graph.json, model-map.json for offline analysis) + _graph.json, model-map.json for offline analysis), + --lazy-engine (engine.js loads sheet modules on + demand via async load()/runScoped() + output-cone + scoping; run() stays sync — await load() first) summary One-shot model overview (--terse to hide suspects) query [args] Query ground truth cells pnl Extract annual P&L by segment diff --git a/package.json b/package.json index 4c82ec5..0a9c422 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "test:engine": "node pipelines/rust/tests/test-engine-runtime.mjs", "test:depgraph": "node pipelines/rust/tests/test-dependency-graph.mjs", "test:runnable": "node pipelines/rust/tests/test-runnable-engine.mjs", + "test:lazy-engine": "node pipelines/rust/tests/test-lazy-engine.mjs", "test:slimming": "node tests/cli/test-artifact-slimming.mjs", "test:golden": "node tests/cli/test-golden-master.mjs", "golden": "node eval/golden-master.mjs", diff --git a/pipelines/rust/src/chunked_emitter.rs b/pipelines/rust/src/chunked_emitter.rs index 1196694..1854d9d 100644 --- a/pipelines/rust/src/chunked_emitter.rs +++ b/pipelines/rust/src/chunked_emitter.rs @@ -24,7 +24,11 @@ use std::time::Instant; /// Generate all chunked output artifacts into `output_dir`. /// Returns a summary string of what was emitted. -pub fn emit_chunked(workbook: &WorkbookData, output_dir: &Path) -> Result { +pub fn emit_chunked( + workbook: &WorkbookData, + output_dir: &Path, + lazy_engine: bool, +) -> Result { let t_start = Instant::now(); eprintln!("[chunked] Partitioning {} sheets...", workbook.sheets.len()); @@ -61,51 +65,63 @@ pub fn emit_chunked(workbook: &WorkbookData, output_dir: &Path) -> Result exit 1). let total_sheets = partitions.len(); - eprintln!("[chunked] Emitting {} sheet modules (parallel)...", total_sheets); + eprintln!("[chunked] Emitting {} sheet modules (streamed)...", total_sheets); let t_emit = Instant::now(); - let completed = AtomicUsize::new(0); - // Generate all modules in parallel - let sheet_results: Vec<(String, String, String, usize, usize, usize)> = partitions - .par_iter() - .map(|partition| { - let code = generate_sheet_module(partition, &workbook); - let safe_name = sanitize_sheet_name(&partition.name); - let file_name = format!("{}.mjs", safe_name); - let code_len = code.len(); - let n_formulas = partition.formula_cells.len(); - let n_inputs = partition.input_cells.len(); - - let done = completed.fetch_add(1, Ordering::Relaxed) + 1; - if done % 5 == 0 || done == total_sheets { - eprint!( - "\r[chunked] [{}/{}] generating modules...", - done, total_sheets - ); - std::io::stderr().flush().ok(); - } + // Generate one module, write it, drop the string; return only small metadata + // (file name + counts), never the code. Shared by the parallel (light) and + // sequential (heavy) passes below. All captures are Sync, so this is usable + // as a rayon map operator. + let emit_one = |partition: &SheetPartition| -> Result<(String, usize, usize), String> { + let code = generate_sheet_module(partition, workbook); + let file_name = format!("{}.mjs", sanitize_sheet_name(&partition.name)); + let code_len = code.len(); + let n_formulas = partition.formula_cells.len(); + fs::write(sheets_dir.join(&file_name), &code) + .map_err(|e| format!("Failed to write {}: {}", file_name, e))?; + drop(code); // free the (possibly hundreds-of-MB) module string now + let done = completed.fetch_add(1, Ordering::Relaxed) + 1; + if done % 5 == 0 || done == total_sheets { + eprint!("\r[chunked] [{}/{}] modules written...", done, total_sheets); + std::io::stderr().flush().ok(); + } + Ok((file_name, n_formulas, code_len)) + }; - (partition.name.clone(), file_name, code, n_formulas, n_inputs, code_len) - }) - .collect(); + // A single transpiled monster sheet can be hundreds of MB. Emit "heavy" + // sheets one-at-a-time so two are never materialized concurrently; emit the + // many "light" sheets in parallel. On small models every sheet is light, so + // this stays fully parallel — same behaviour as before, minus the retention. + const HEAVY_FORMULA_THRESHOLD: usize = 200_000; + let (heavy, light): (Vec<&SheetPartition>, Vec<&SheetPartition>) = partitions + .iter() + .partition(|p| p.formula_cells.len() >= HEAVY_FORMULA_THRESHOLD); + + let mut metas: Vec<(String, usize, usize)> = light + .into_par_iter() + .map(|p| emit_one(p)) + .collect::, String>>()?; + for p in heavy { + metas.push(emit_one(p)?); + } eprintln!(); // newline after progress - // Write files sequentially (fast — just I/O) - let mut sheet_files: Vec = Vec::new(); - let mut total_formulas_emitted: usize = 0; - let mut total_bytes_emitted: usize = 0; - for (_sheet_name, file_name, code, n_formulas, _n_inputs, code_len) in &sheet_results { - let file_path = sheets_dir.join(file_name); - fs::write(&file_path, code) - .map_err(|e| format!("Failed to write {}: {}", file_name, e))?; - sheet_files.push(file_name.clone()); - total_formulas_emitted += n_formulas; - total_bytes_emitted += code_len; - } + let sheet_files: Vec = metas.iter().map(|(f, _, _)| f.clone()).collect(); + let total_formulas_emitted: usize = metas.iter().map(|(_, n, _)| *n).sum(); + let total_bytes_emitted: usize = metas.iter().map(|(_, _, b)| *b).sum(); eprintln!( "[chunked] All {} sheet modules emitted in {:.1}s ({} formulas, {})", @@ -170,9 +186,16 @@ pub fn emit_chunked(workbook: &WorkbookData, output_dir: &Path) -> Result Result String { +fn generate_sheet_module(partition: &SheetPartition<'_>, _workbook: &WorkbookData) -> String { let mut lines: Vec = Vec::new(); let sheet_name = &partition.name; @@ -520,7 +543,7 @@ fn extract_cell_addr_from_var(var_body: &str) -> Option { // Orchestrator (engine.js) generation // --------------------------------------------------------------------------- -fn generate_orchestrator(graph: &SheetGraph, _partitions: &[SheetPartition]) -> String { +fn generate_orchestrator(graph: &SheetGraph, _partitions: &[SheetPartition<'_>]) -> String { let mut lines: Vec = Vec::new(); lines.push("// engine.js — AUTO-GENERATED orchestrator (chunked mode)".to_string()); @@ -567,27 +590,59 @@ fn generate_orchestrator(graph: &SheetGraph, _partitions: &[SheetPartition]) -> lines.push("};".to_string()); lines.push(String::new()); - // Sheet clusters (circular dependency groups that need convergence loops) - if !graph.sheet_clusters.is_empty() { - lines.push("// Sheet clusters — groups of sheets with circular dependencies".to_string()); - lines.push("// These are executed in convergence loops until values stabilize.".to_string()); - lines.push("const SHEET_CLUSTERS = [".to_string()); - for cluster in &graph.sheet_clusters { - let names: Vec = cluster - .iter() - .map(|s| format!("\"{}\"", escape_js_string(s))) - .collect(); - lines.push(format!(" [{}],", names.join(", "))); - } - lines.push("];".to_string()); - lines.push(String::new()); + // Sheet clusters (circular dependency groups that need convergence loops). + // Eager engine emits these only when the model actually has them. + lines.extend(emit_clusters_block(graph, false)); - // Build a set of all sheets that belong to a cluster - lines.push("const CLUSTER_SHEETS = new Set(SHEET_CLUSTERS.flat());".to_string()); - lines.push(String::new()); + // run() — shared with the lazy orchestrator (eager passes lazy=false, so no + // load() guard and the output is identical to the original hand-written run). + lines.extend(emit_run_function(graph, false)); + lines.push(String::new()); + + // Default export + lines.push("export default { run };".to_string()); + lines.push(String::new()); + + lines.join("\n") +} + +/// Emit the `SHEET_CLUSTERS` + `CLUSTER_SHEETS` constants. The eager engine emits +/// them only when the model has circular clusters (`force=false`, matching the +/// original output). The lazy engine passes `force=true` so its cone loader can +/// always reference `SHEET_CLUSTERS` (an empty array when the model is acyclic). +fn emit_clusters_block(graph: &SheetGraph, force: bool) -> Vec { + let mut lines: Vec = Vec::new(); + if graph.sheet_clusters.is_empty() && !force { + return lines; + } + lines.push("// Sheet clusters — groups of sheets with circular dependencies".to_string()); + lines.push("// These are executed in convergence loops until values stabilize.".to_string()); + lines.push("const SHEET_CLUSTERS = [".to_string()); + for cluster in &graph.sheet_clusters { + let names: Vec = cluster + .iter() + .map(|s| format!("\"{}\"", escape_js_string(s))) + .collect(); + lines.push(format!(" [{}],", names.join(", "))); } + lines.push("];".to_string()); + lines.push(String::new()); - // run() function — shared preamble (overrides + read-tracking + meta scaffold) + // Build a set of all sheets that belong to a cluster + lines.push("const CLUSTER_SHEETS = new Set(SHEET_CLUSTERS.flat());".to_string()); + lines.push(String::new()); + lines +} + +/// Emit the `run()` function — shared by the eager and lazy orchestrators. Both +/// reference the same `SHEET_COMPUTE` / `TOPO_ORDER` / `SHEET_CLUSTERS` / +/// `CLUSTER_SHEETS`, so the body is identical. With `lazy=true` a guard is +/// inserted that throws if no sheets have been loaded (the lazy engine's footgun); +/// with `lazy=false` the output is byte-identical to the original eager run(). +fn emit_run_function(graph: &SheetGraph, lazy: bool) -> Vec { + let mut lines: Vec = Vec::new(); + + // JSDoc + signature lines.push(r#"/** * Execute the full model. * @param {Object} [inputs] - Optional cell overrides: { "Sheet!A1": value, ... } @@ -595,8 +650,22 @@ fn generate_orchestrator(graph: &SheetGraph, _partitions: &[SheetPartition]) -> * @param {boolean} [options.strict] - Throw if any override cell is not read by a formula. * @returns {{ values: Object, kpis: Object, meta: Object, unknownOverrides: string[] }} */ -export function run(inputs = {}, options = {}) { - const ctx = new ComputeContext(); +export function run(inputs = {}, options = {}) {"#.to_string()); + + if lazy { + lines.push(r#" // Lazy engine: sheet modules load on demand via load()/runScoped(). Guard the + // footgun of calling run() before anything is loaded (it would otherwise no-op + // every sheet and silently return an all-zero model). A cone-scoped load() + // intentionally leaves out-of-cone sheets unloaded; those are skipped by the + // `if (computeFn)` checks below, which is the correct behaviour for a scoped run. + if (Object.keys(SHEET_COMPUTE).length === 0) { + throw new Error('engine.run(): no sheets loaded — call `await load()` (or `await load({ sheets: [...] })` / `load({ cells: [...] })`) first, or use `await runScoped(inputs, options)`.'); + }"#.to_string()); + } + + // Body preamble (ctx + override tracking + apply/pin). Joining after the + // signature line above reproduces the original single-string preamble. + lines.push(r#" const ctx = new ComputeContext(); const _t0 = Date.now(); const TOL = 1e-6; const _clusterMeta = []; @@ -711,10 +780,154 @@ export function run(inputs = {}, options = {}) { unknownOverrides, }; }"#.to_string()); + + lines +} + +/// Lazy orchestrator — emitted for `--lazy-engine`. Identical `run()` semantics to +/// the eager engine, but sheet modules are NOT statically imported: they load on +/// demand via an async `load()` (optionally scoped to an output cone), so +/// `import('engine.js')` no longer pulls ~800 MB of modules into the heap before +/// `run()` can be called. `run()` stays synchronous; the consumer awaits `load()` +/// (or `runScoped()`) first. This is the opt-in fix for Wall A (#22); the default +/// engine.js stays eager + synchronous so existing consumers are untouched. +fn generate_orchestrator_lazy(graph: &SheetGraph) -> String { + let mut lines: Vec = Vec::new(); + + lines.push("// engine.js — AUTO-GENERATED orchestrator (chunked mode, LAZY)".to_string()); + lines.push("// Sheet modules are imported ON DEMAND by load()/runScoped(), not at".to_string()); + lines.push("// module-load time — so importing this file is cheap regardless of model size.".to_string()); + lines.push("// run() is synchronous: await load() (or use runScoped()) before calling it.".to_string()); + lines.push("// Do not edit manually — re-run the pipeline to regenerate.".to_string()); + lines.push(String::new()); + + // Runtime context class (no static sheet imports) + lines.push(generate_ctx_runtime()); + lines.push(String::new()); + + // Topo order constant + let topo_strs: Vec = graph + .topo_order + .iter() + .map(|s| format!("\"{}\"", escape_js_string(s))) + .collect(); + lines.push(format!("const TOPO_ORDER = [{}];", topo_strs.join(", "))); + lines.push(String::new()); + + // Lazy module loaders — thunks that dynamically import each sheet module. + lines.push("// Lazy loaders — each returns a Promise of its sheet module. load() awaits".to_string()); + lines.push("// only the ones it needs (requested sheets/cells + their transitive deps),".to_string()); + lines.push("// so a cone-scoped run never imports modules outside the cone.".to_string()); + lines.push("const SHEET_LOADERS = {".to_string()); + for name in &graph.topo_order { + let safe = sanitize_sheet_name(name); + lines.push(format!( + " \"{}\": () => import('./sheets/{}.mjs'),", + escape_js_string(name), + safe + )); + } + lines.push("};".to_string()); + lines.push(String::new()); + + // Per-sheet forward-dependency map (for the output-cone closure). + lines.push("// Sheet-level forward deps (sheet -> sheets it reads) for cone expansion.".to_string()); + lines.push("const SHEET_DEPS = {".to_string()); + for entry in &graph.sheets { + let deps: Vec = entry + .deps + .iter() + .map(|d| format!("\"{}\"", escape_js_string(d))) + .collect(); + lines.push(format!( + " \"{}\": [{}],", + escape_js_string(&entry.name), + deps.join(", ") + )); + } + lines.push("};".to_string()); + lines.push(String::new()); + + // Clusters — always emitted (force=true) so the cone closure can pull in + // whole clusters even when the consumer seeds only one member. + lines.extend(emit_clusters_block(graph, true)); + + // Compute map filled lazily by load(), plus load-state tracking. + lines.push("const SHEET_COMPUTE = {};".to_string()); + lines.push("const _loaded = new Set();".to_string()); + lines.push(String::new()); + + // load() + the output-cone closure. + lines.push(r#"/** + * Load sheet modules into SHEET_COMPUTE. Call (and await) before run(). + * @param {Object} [options] + * @param {string[]} [options.sheets] - Sheet names to load (with their transitive deps). + * @param {string[]} [options.cells] - Qualified cell addrs ("Sheet!A1"); each cell's + * sheet prefix seeds the cone. + * With neither, ALL sheets load (still lazy, but complete). To scope to named + * outputs, map their names -> cells via named-outputs.json, then pass `cells`. + * @returns {Promise<{ loaded: string[], count: number }>} + */ +export async function load(options = {}) { + // Seed set: explicit sheets + the sheet owning each requested cell. + let seeds = null; + if (Array.isArray(options.sheets) || Array.isArray(options.cells)) { + seeds = new Set(options.sheets || []); + for (const cell of (options.cells || [])) { + const i = String(cell).indexOf('!'); + if (i > 0) seeds.add(String(cell).slice(0, i)); + } + } + + // Target set: everything, or the transitive forward-dependency closure of the + // seeds. Any sheet in a circular cluster pulls in ALL of its cluster members — + // a cluster converges as a unit, so a partial load would be wrong. + let targets; + if (seeds === null) { + targets = Object.keys(SHEET_LOADERS); + } else { + const want = new Set(); + const stack = [...seeds]; + while (stack.length) { + const s = stack.pop(); + if (want.has(s)) continue; + want.add(s); + for (const d of (SHEET_DEPS[s] || [])) if (!want.has(d)) stack.push(d); + const cluster = CLUSTER_SHEETS.has(s) ? SHEET_CLUSTERS.find(c => c.includes(s)) : null; + if (cluster) for (const m of cluster) if (!want.has(m)) stack.push(m); + } + targets = [...want].filter(s => SHEET_LOADERS[s]); + } + + const toLoad = targets.filter(s => !_loaded.has(s)); + await Promise.all(toLoad.map(async (s) => { + const mod = await SHEET_LOADERS[s](); + SHEET_COMPUTE[s] = mod.compute; + _loaded.add(s); + })); + return { loaded: [..._loaded], count: _loaded.size }; +} +"#.to_string()); + + // run() — shared body, with the no-sheets-loaded guard. + lines.extend(emit_run_function(graph, true)); + lines.push(String::new()); + + // runScoped() convenience. + lines.push(r#"/** + * Convenience: load (optionally cone-scoped), then run, in one await. + * @param {Object} [inputs] - Cell overrides, as for run(). + * @param {Object} [options] - { sheets?, cells?, strict? } — passed to load() and run(). + * @returns {Promise<{ values: Object, kpis: Object, meta: Object, unknownOverrides: string[] }>} + */ +export async function runScoped(inputs = {}, options = {}) { + await load(options); + return run(inputs, options); +}"#.to_string()); lines.push(String::new()); // Default export - lines.push("export default { run };".to_string()); + lines.push("export default { run, load, runScoped };".to_string()); lines.push(String::new()); lines.join("\n") @@ -831,7 +1044,7 @@ function numToCol(n) { /// omitted, matching the previous behaviour. Iteration follows partition order /// then cell order — stable for a given workbook, so the output is /// deterministic across builds of the same model. Returns the entry count. -fn write_dependency_graph(partitions: &[SheetPartition], path: &Path) -> Result { +fn write_dependency_graph(partitions: &[SheetPartition<'_>], path: &Path) -> Result { let file = fs::File::create(path) .map_err(|e| format!("Failed to create {}: {}", path.display(), e))?; let mut w = std::io::BufWriter::new(file); @@ -891,7 +1104,7 @@ fn write_dependency_graph(partitions: &[SheetPartition], path: &Path) -> Result< /// Detect cells within a single sheet that form circular references. /// Returns the set of qualified addresses involved in cycles. -fn detect_intra_sheet_cycles(partition: &SheetPartition, sheet_name: &str) -> Vec { +fn detect_intra_sheet_cycles(partition: &SheetPartition<'_>, sheet_name: &str) -> Vec { // Build intra-sheet dependency graph let mut edges: HashMap> = HashMap::new(); let mut all_addrs: HashSet = HashSet::new(); diff --git a/pipelines/rust/src/main.rs b/pipelines/rust/src/main.rs index 0c26b12..ad177ac 100644 --- a/pipelines/rust/src/main.rs +++ b/pipelines/rust/src/main.rs @@ -37,12 +37,18 @@ fn main() { eprintln!(" --emit-debug Retain large debug artifacts that are otherwise skipped"); eprintln!(" in --chunked mode (root model-map.json). The cell-level"); eprintln!(" dependency-graph.json is always emitted (closures consume it)."); + eprintln!(" --lazy-engine Emit a chunked engine.js that imports sheet modules ON"); + eprintln!(" DEMAND (async load()/runScoped() + output-cone scoping) instead"); + eprintln!(" of eagerly at module-load time. run() stays synchronous: await"); + eprintln!(" load() first. Avoids pulling all sheet modules into memory just"); + eprintln!(" to import the engine. Per-sheet modules are emitted either way."); std::process::exit(1); } let compact_flag = args.iter().any(|a| a == "--compact"); let chunked_flag = args.iter().any(|a| a == "--chunked"); let emit_debug = args.iter().any(|a| a == "--emit-debug"); + let lazy_engine = args.iter().any(|a| a == "--lazy-engine"); // Filter out flags from positional args let positional: Vec<&String> = args.iter().skip(1).filter(|a| !a.starts_with("--")).collect(); @@ -357,7 +363,7 @@ fn main() { let chunked_dir = output_dir.join("chunked"); fs::create_dir_all(&chunked_dir).expect("Failed to create chunked/ directory"); - match chunked_emitter::emit_chunked(&workbook, &chunked_dir) { + match chunked_emitter::emit_chunked(&workbook, &chunked_dir, lazy_engine) { Ok(summary) => { println!( "[rust-parser] Chunked output written in {}ms: {}", diff --git a/pipelines/rust/src/sheet_partition.rs b/pipelines/rust/src/sheet_partition.rs index 22342a6..8ed664f 100644 --- a/pipelines/rust/src/sheet_partition.rs +++ b/pipelines/rust/src/sheet_partition.rs @@ -14,13 +14,20 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; // --------------------------------------------------------------------------- /// A single sheet's partition: its cells grouped for emission. +/// +/// Holds **borrows** of the workbook's cells, not clones. The workbook is +/// borrowed for the whole of `emit_chunked` (it outlives every partition), so +/// copying ~6M `CellData` (addresses + values + formula strings) into a second +/// owned structure is pure waste — it doubled peak memory at the partition step +/// on the real 200 MB+ models. Borrowing keeps the partition at a few pointers +/// per cell. #[derive(Debug, Clone)] -pub struct SheetPartition { +pub struct SheetPartition<'a> { pub name: String, /// Cells that are literals / inputs (no formula) - pub input_cells: Vec, + pub input_cells: Vec<&'a CellData>, /// Cells that have formulas, in intra-sheet dependency order - pub formula_cells: Vec, + pub formula_cells: Vec<&'a CellData>, /// Names of other sheets this sheet depends on (cross-sheet refs) pub sheet_dependencies: BTreeSet, } @@ -49,7 +56,7 @@ pub struct SheetGraphEntry { // --------------------------------------------------------------------------- /// Partition a workbook into per-sheet groups with cross-sheet dependency metadata. -pub fn partition_sheets(workbook: &WorkbookData) -> Vec { +pub fn partition_sheets<'a>(workbook: &'a WorkbookData) -> Vec> { let sheet_names: HashSet = workbook.sheet_names.iter().cloned().collect(); // Process each sheet in parallel. We only need sheet-level edges here, so we @@ -59,20 +66,20 @@ pub fn partition_sheets(workbook: &WorkbookData) -> Vec { // ones — O(formula_cells × range_size) wasted work that hung this step on // multi-million-formula sheets. The cell-level dependency-graph contract // (write_dependency_graph) still uses the expanding extract_refs. - let partitions: Vec = workbook + let partitions: Vec> = workbook .sheets .par_iter() .map(|sheet| { - let mut input_cells = Vec::new(); - let mut formula_cells = Vec::new(); + let mut input_cells: Vec<&CellData> = Vec::new(); + let mut formula_cells: Vec<&CellData> = Vec::new(); let mut sheet_deps: BTreeSet = BTreeSet::new(); for cell in &sheet.cells { if let Some(formula) = &cell.formula { - formula_cells.push(cell.clone()); + formula_cells.push(cell); collect_sheet_deps(formula, &sheet.name, &sheet_names, &mut sheet_deps); } else if cell.value.is_some() { - input_cells.push(cell.clone()); + input_cells.push(cell); } } diff --git a/pipelines/rust/tests/test-lazy-engine.mjs b/pipelines/rust/tests/test-lazy-engine.mjs new file mode 100644 index 0000000..0472ec3 --- /dev/null +++ b/pipelines/rust/tests/test-lazy-engine.mjs @@ -0,0 +1,170 @@ +#!/usr/bin/env node +/** + * Tests for the opt-in `--lazy-engine` chunked orchestrator (#22, Wall A). + * + * Builds a small acyclic model two ways — default (eager) and `--lazy-engine` — + * with the real rust-parser, imports each engine.js, and asserts: + * + * - the lazy engine.js does NOT statically import sheet modules (no + * `... from './sheets/...'`), so importing it doesn't pull every module + * into memory; it exports run / load / runScoped + * - run() before load() throws the no-sheets-loaded guard + * - after `await load()`, run(inputs) === the EAGER engine's run(inputs) + * (base case and an override that propagates cross-sheet) + * - output-cone scoping: load({ sheets: [leaf] }) loads only the leaf's + * transitive dependency closure (not unrelated sheets), and run() then + * computes exactly that cone + * - runScoped(inputs, { cells: [...] }) loads the cone + runs in one await + * + * Needs the rust-parser binary. Skips (exit 0) if it isn't built — mirrors the + * other rust/tests/*.mjs. + * + * Usage: node pipelines/rust/tests/test-lazy-engine.mjs + */ + +import XLSX from 'xlsx'; +import { writeFileSync, existsSync, mkdtempSync, rmSync, readFileSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath, pathToFileURL } from 'url'; +import { tmpdir } from 'os'; +import { execFileSync } from 'child_process'; + +const __dir = dirname(fileURLToPath(import.meta.url)); +const ROOT = join(__dir, '..', '..', '..'); +const exe = process.platform === 'win32' ? '.exe' : ''; +const PARSER = [ + join(ROOT, 'pipelines/rust/target/release', `rust-parser${exe}`), + join(ROOT, 'pipelines/rust/target/debug', `rust-parser${exe}`), +].find(existsSync); + +if (!PARSER) { + console.log('SKIP: rust-parser not built (cd pipelines/rust && cargo build --release)'); + process.exit(0); +} + +let passed = 0; +let failed = 0; +function assert(cond, msg) { + if (cond) { passed++; } else { failed++; console.error(` FAIL: ${msg}`); } +} + +function valuesEqual(a, b) { + const ka = Object.keys(a); + const kb = Object.keys(b); + if (ka.length !== kb.length) return false; + for (const k of ka) { + if (!(k in b)) return false; + const x = a[k], y = b[k]; + if (x !== y && !(Number.isNaN(x) && Number.isNaN(y))) return false; + } + return true; +} + +/** + * Build a workbook from {sheetName: cells}, parse it (optionally with + * --lazy-engine), and import the resulting chunked engine.js. + */ +async function build(sheets, { lazy }) { + const wb = { SheetNames: Object.keys(sheets), Sheets: sheets }; + const tmp = mkdtempSync(join(tmpdir(), lazy ? 'lazy-eng-' : 'eager-eng-')); + const xlsx = join(tmp, 'm.xlsx'); + writeFileSync(xlsx, XLSX.write(wb, { type: 'buffer', bookType: 'xlsx' })); + const argv = [xlsx, join(tmp, 'out'), '--chunked']; + if (lazy) argv.push('--lazy-engine'); + execFileSync(PARSER, argv, { encoding: 'utf-8', stdio: 'pipe' }); + const chunked = join(tmp, 'out', 'chunked'); + const eng = await import(pathToFileURL(join(chunked, 'engine.js')).href); + return { + chunked, + eng, + src: readFileSync(join(chunked, 'engine.js'), 'utf-8'), + cleanup: () => rmSync(tmp, { recursive: true, force: true }), + }; +} + +// Acyclic model: Delta is independent; Gamma <- Beta <- Alpha (cross-sheet). +// Alpha!B1 = 10 (input) +// Beta!B1 = Alpha!B1 * 2 (= 20) +// Gamma!B1 = Beta!B1 + 5 (= 25) +// Delta!B1 = 100 (independent input) +// So the dependency cone of Gamma is {Gamma, Beta, Alpha} — Delta is outside it. +function model() { + const Alpha = { '!ref': 'A1:B1', A1: { t: 's', v: 'Alpha' }, B1: { t: 'n', v: 10 } }; + const Beta = { '!ref': 'A1:B1', A1: { t: 's', v: 'Beta' }, B1: { t: 'n', v: 20, f: 'Alpha!B1*2' } }; + const Gamma = { '!ref': 'A1:B1', A1: { t: 's', v: 'Gamma' }, B1: { t: 'n', v: 25, f: 'Beta!B1+5' } }; + const Delta = { '!ref': 'A1:B1', A1: { t: 's', v: 'Delta' }, B1: { t: 'n', v: 100 } }; + return { Alpha, Beta, Gamma, Delta }; +} + +// --------------------------------------------------------------------------- +console.log('Testing: lazy engine shape — no static sheet imports, exports run/load/runScoped'); +const eager = await build(model(), { lazy: false }); +const lazyAll = await build(model(), { lazy: true }); +{ + assert(!lazyAll.src.includes("from './sheets/"), + 'lazy engine.js has NO static `... from ./sheets/...` import'); + assert(lazyAll.src.includes("=> import('./sheets/"), + 'lazy engine.js uses dynamic import() loaders'); + assert(typeof lazyAll.eng.run === 'function', 'exports run()'); + assert(typeof lazyAll.eng.load === 'function', 'exports load()'); + assert(typeof lazyAll.eng.runScoped === 'function', 'exports runScoped()'); + // Sanity: the eager engine DOES statically import (contrast). + assert(eager.src.includes("from './sheets/"), 'eager engine.js DOES statically import (contrast)'); +} + +// --------------------------------------------------------------------------- +console.log('Testing: run() before load() throws; after load() it matches the eager engine'); +{ + let threw = false; + try { lazyAll.eng.run(); } catch (e) { threw = /no sheets loaded/i.test(String(e.message)); } + assert(threw, 'run() before load() throws the no-sheets-loaded guard'); + + const loaded = await lazyAll.eng.load(); + assert(loaded.count === 4, `load() (no opts) loads all sheets (got ${loaded.count})`); + + // Base case parity. + const eBase = eager.eng.run(); + const lBase = lazyAll.eng.run(); + assert(valuesEqual(eBase.values, lBase.values), 'lazy base-case values === eager base-case values'); + assert(lBase.values['Gamma!B1'] === 25 && lBase.values['Delta!B1'] === 100, 'lazy base values correct'); + + // Override that propagates cross-sheet: Alpha!B1 100 -> Beta 200 -> Gamma 205. + const eOv = eager.eng.run({ 'Alpha!B1': 100 }); + const lOv = lazyAll.eng.run({ 'Alpha!B1': 100 }); + assert(valuesEqual(eOv.values, lOv.values), 'lazy override values === eager override values'); + assert(lOv.values['Beta!B1'] === 200 && lOv.values['Gamma!B1'] === 205, 'override propagates through lazy engine'); +} + +// --------------------------------------------------------------------------- +console.log('Testing: output-cone scoping — load({sheets}) loads only the dependency closure'); +{ + const coneBuild = await build(model(), { lazy: true }); + const loaded = await coneBuild.eng.load({ sheets: ['Gamma'] }); + const set = new Set(loaded.loaded); + assert(loaded.count === 3, `cone of Gamma is 3 sheets (got ${loaded.count})`); + assert(set.has('Gamma') && set.has('Beta') && set.has('Alpha'), 'cone includes Gamma + its transitive deps Beta, Alpha'); + assert(!set.has('Delta'), 'cone EXCLUDES the unrelated Delta sheet'); + + const r = coneBuild.eng.run(); + assert(r.values['Gamma!B1'] === 25, 'cone run computes Gamma!B1 = 25'); + assert(!('Delta!B1' in r.values), 'unloaded Delta is not computed (absent from values)'); + coneBuild.cleanup(); +} + +// --------------------------------------------------------------------------- +console.log('Testing: runScoped() loads the cone of the requested cells and runs in one call'); +{ + const scopedBuild = await build(model(), { lazy: true }); + const r = await scopedBuild.eng.runScoped({}, { cells: ['Gamma!B1'] }); + assert(r.values['Gamma!B1'] === 25, 'runScoped({cells:[Gamma!B1]}) computes Gamma!B1 = 25'); + assert(!('Delta!B1' in r.values), 'runScoped scoped to the cell cone (Delta absent)'); + scopedBuild.cleanup(); +} + +eager.cleanup(); +lazyAll.cleanup(); + +// --------------------------------------------------------------------------- +console.log(''); +console.log(`Results: ${passed} passed, ${failed} failed, ${passed + failed} total`); +process.exit(failed > 0 ? 1 : 0);