From 3f9c04ff4bd041e38bad7ff364282b63b62399c4 Mon Sep 17 00:00:00 2001
From: Eric Boothe <ericboothe@gmail.com>
Date: Thu, 28 May 2026 14:46:48 -0600
Subject: [PATCH 1/2] perf(refine): consume _labels.json + lazy numeric probes,
 drop full-GT index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`ete manifest refine` rebuilt a full label+numeric index over the entire
ground truth on every run (buildIndex), even though it only ever inspects
numerics on a matched label's own row. On big models the bulk of that work
indexed giant *unlabeled* grids (e.g. a PP&E schedule) the refiner never
consults.

Now buildIndex:
- sources labels from the Rust parser's chunked/_labels.json when present
  (O(labels), no GT scan), falling back to buildLabelIndex(gt) for legacy
  engines that predate the index;
- resolves same-row numerics lazily by probing the row's columns on demand
  (numericsForRow), memoized per row, stopping after a long empty-column run.

Behavior-preserving: ranking/dedup/value-range logic is untouched, so the
existing manifest/ship-ready suites stay green. The remaining full pass is the
unavoidable JSON parse of the ground truth (a follow-up could lift that with a
parser-emitted row-values artifact; see ROADMAP).

New tests/cli/test-refine-label-index.mjs (14): correctness off _labels.json,
parity between the index path and the GT-scan fallback, lazy-probe far/gapped
columns + value ranges, and a consumption proof (a label present only in the
index — not as a GT string — is still resolved; the fallback provably cannot).
Wired into `npm test`.

Measured (synthetic giant-grid GT): the eliminated buildIndex pass alone was
~1.4s on 1.4M cells / ~7.9s on 6.4M cells; new refine completes end-to-end in
less time than the old index build took, and the skipped work scales with total
cell count.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 cli/commands/manifest-refine.mjs      | 109 +++++++++++----
 package.json                          |   2 +-
 tests/cli/test-refine-label-index.mjs | 183 ++++++++++++++++++++++++++
 3 files changed, 266 insertions(+), 28 deletions(-)
 create mode 100644 tests/cli/test-refine-label-index.mjs

diff --git a/cli/commands/manifest-refine.mjs b/cli/commands/manifest-refine.mjs
index 0f486f9..b5f3b77 100644
--- a/cli/commands/manifest-refine.mjs
+++ b/cli/commands/manifest-refine.mjs
@@ -11,7 +11,10 @@
 
 import { readFileSync, writeFileSync, existsSync } from 'fs';
 import { join } from 'path';
-import { loadManifest, loadGroundTruth, resolveCell, MANIFEST_VERSION } from '../../lib/manifest.mjs';
+import {
+  loadManifest, loadGroundTruth, resolveCell, MANIFEST_VERSION,
+  loadLabelIndex, buildLabelIndex,
+} from '../../lib/manifest.mjs';
 
 // ---------------------------------------------------------------------------
 // Required fields and their search strategies
@@ -100,34 +103,73 @@ const REQUIRED_FIELDS = [
   },
 ];
 
+// Excel's hard column ceiling (XFD = 16384). numericsForRow probes a row's
+// columns left-to-right and stops after this many consecutive empty columns —
+// generous enough to span any realistic financial layout (and far-right
+// restated copies lose to the canonical leftmost cell in ranking anyway), while
+// bounding the probe cost on a label-only row to a few hundred hash lookups.
+const MAX_PROBE_COL = 16384;
+const MAX_PROBE_GAP = 256;
+
 /**
- * Build a pre-index of the ground truth for fast searching.
- * Groups string labels by sheet+row and numeric values by sheet+row.
+ * Build a search index over the ground truth.
+ *
+ * Labels come from the Rust parser's pre-built index (`chunked/_labels.json`)
+ * when present — an O(labels) read instead of scanning every cell — and fall
+ * back to a one-time ground-truth scan (`buildLabelIndex`) for legacy engines
+ * that predate the index.
+ *
+ * Numeric values are resolved **lazily, per matched row**, by direct probing
+ * (see `numericsForRow`). The refiner only ever inspects numerics on a label's
+ * own row, so the old approach — bucketing every numeric in a multi-million-cell
+ * workbook up front — was almost entirely wasted: on a big model the bulk of
+ * those cells live in giant *unlabeled* grids (e.g. a PP&E depreciation
+ * schedule) the refiner never consults. Skipping that build is the win; the
+ * one remaining full pass is the unavoidable JSON parse of the ground truth.
+ *
+ * @param {Object} gt - Ground truth { addr: value }
+ * @param {string} [modelDir] - Model dir, for loading `_labels.json`
+ * @returns {{ labels: Array, numericsForRow: (sheet: string, row: number) => Array }}
  */
-function buildIndex(gt) {
-  const labels = [];       // { addr, text, sheet, col, row }
-  const numsByRow = {};    // "sheet!row" → [{ addr, value, col }]
-
-  for (const [addr, val] of Object.entries(gt)) {
-    const bang = addr.lastIndexOf('!');
-    if (bang < 0) continue;
-    const sheet = addr.substring(0, bang);
-    const cellPart = addr.substring(bang + 1);
-    const match = cellPart.match(/^([A-Z]+)(\d+)$/);
-    if (!match) continue;
-    const col = match[1];
-    const row = parseInt(match[2], 10);
-    const rowKey = `${sheet}!${row}`;
-
-    if (typeof val === 'string' && val.length > 2 && val.length < 200) {
-      labels.push({ addr, text: val, sheet, col, row, rowKey });
-    } else if (typeof val === 'number') {
-      if (!numsByRow[rowKey]) numsByRow[rowKey] = [];
-      numsByRow[rowKey].push({ addr, value: val, col });
+function buildIndex(gt, modelDir) {
+  const labelIndex = (modelDir && loadLabelIndex(modelDir)) || buildLabelIndex(gt);
+  const labels = [];
+  for (const entries of Object.values(labelIndex)) {
+    for (const e of entries) {
+      labels.push({
+        addr: `${e.sheet}!${e.col}${e.row}`,
+        text: e.text,
+        sheet: e.sheet,
+        col: e.col,
+        row: e.row,
+        rowKey: `${e.sheet}!${e.row}`,
+      });
     }
   }
 
-  return { labels, numsByRow };
+  const rowCache = new Map();   // "sheet!row" → [{ addr, value, col }]
+  function numericsForRow(sheet, row) {
+    const key = `${sheet}!${row}`;
+    const cached = rowCache.get(key);
+    if (cached) return cached;
+    const nums = [];
+    let gap = 0;
+    for (let c = 1; c <= MAX_PROBE_COL && gap < MAX_PROBE_GAP; c++) {
+      const col = numToCol(c);
+      const addr = `${sheet}!${col}${row}`;
+      const v = gt[addr];
+      if (typeof v === 'number') {
+        nums.push({ addr, value: v, col });
+        gap = 0;
+      } else {
+        gap++;
+      }
+    }
+    rowCache.set(key, nums);
+    return nums;
+  }
+
+  return { labels, numericsForRow };
 }
 
 /**
@@ -141,8 +183,9 @@ export function runManifestRefine(modelDir, args) {
   const manifest = loadManifest(modelDir);
   const gt = loadGroundTruth(manifest, modelDir);
 
-  // Pre-index for fast searching (single pass over GT)
-  const index = buildIndex(gt);
+  // Pre-index for fast searching. Labels come from `_labels.json` when the
+  // parser emitted it (no GT scan); numerics are probed lazily per matched row.
+  const index = buildIndex(gt, modelDir);
 
   // Resolve refinement hints: either passed in via args.hints (used by init
   // when a template has been applied), or read from a hand-edited manifest
@@ -279,7 +322,7 @@ function searchForFieldIndexed(index, field, opts = {}) {
 
   // Pass 2: For each matching label, select the best same-row numeric cell.
   for (const lm of labelMatches) {
-    const rowNums = index.numsByRow[lm.rowKey] || [];
+    const rowNums = index.numericsForRow(lm.sheet, lm.row);
     const labelColNum = colToNum(lm.col);
 
     const inRange = rowNums.filter(n => {
@@ -443,3 +486,15 @@ function colToNum(col) {
   }
   return num;
 }
+
+// Inverse of colToNum: 1 → "A", 26 → "Z", 27 → "AA". Used by numericsForRow to
+// reconstruct cell addresses when probing a row's columns.
+function numToCol(num) {
+  let col = '';
+  while (num > 0) {
+    const rem = (num - 1) % 26;
+    col = String.fromCharCode(65 + rem) + col;
+    num = Math.floor((num - 1) / 26);
+  }
+  return col;
+}
diff --git a/package.json b/package.json
index d37ab93..bd0501b 100644
--- a/package.json
+++ b/package.json
@@ -41,7 +41,7 @@
     "test:engine": "node pipelines/rust/tests/test-engine-runtime.mjs",
     "test:depgraph": "node pipelines/rust/tests/test-dependency-graph.mjs",
     "test:slimming": "node tests/cli/test-artifact-slimming.mjs",
-    "test": "node tests/cli/test-cli.mjs && node tests/cli/test-manifest-improvements.mjs && node tests/cli/test-manifest-maps.mjs && node tests/cli/test-ai-interface.mjs && node tests/cli/test-e2e4-fixes.mjs && node tests/cli/test-ship-ready.mjs && node tests/cli/use-case-suite.mjs"
+    "test": "node tests/cli/test-cli.mjs && node tests/cli/test-manifest-improvements.mjs && node tests/cli/test-manifest-maps.mjs && node tests/cli/test-refine-label-index.mjs && node tests/cli/test-ai-interface.mjs && node tests/cli/test-e2e4-fixes.mjs && node tests/cli/test-ship-ready.mjs && node tests/cli/use-case-suite.mjs"
   },
   "devDependencies": {}
 }
diff --git a/tests/cli/test-refine-label-index.mjs b/tests/cli/test-refine-label-index.mjs
new file mode 100644
index 0000000..c718a2e
--- /dev/null
+++ b/tests/cli/test-refine-label-index.mjs
@@ -0,0 +1,183 @@
+#!/usr/bin/env node
+/**
+ * Tests for the refine label-index optimization.
+ *
+ * `ete manifest refine` now sources labels from the Rust parser's
+ * `chunked/_labels.json` when present (an O(labels) read instead of scanning
+ * every cell) and resolves same-row numerics lazily by probing the row's
+ * columns — rather than bucketing every numeric in a multi-million-cell
+ * workbook up front. These tests assert:
+ *
+ *   1. refine finds the key metrics off `_labels.json`;
+ *   2. it produces *identical* mappings whether `_labels.json` is present or it
+ *      falls back to the legacy ground-truth scan (the optimization is
+ *      behavior-preserving);
+ *   3. the lazy numeric probe handles far / gapped columns and respects each
+ *      field's value range;
+ *   4. refine genuinely *consumes* `_labels.json` — a label that exists only in
+ *      the index (not as a ground-truth string) is still resolved, which the
+ *      GT-scan fallback provably cannot do.
+ *
+ * Pure JS — constructs the chunked artifacts directly, so it needs no parser.
+ *
+ * Usage: node tests/cli/test-refine-label-index.mjs
+ */
+
+import { writeFileSync, mkdtempSync, rmSync } from 'fs';
+import { join } from 'path';
+import { tmpdir } from 'os';
+import { runManifestRefine } from '../../cli/commands/manifest-refine.mjs';
+
+let passed = 0;
+let failed = 0;
+function assert(cond, msg) { if (cond) { passed++; } else { failed++; console.error(`  FAIL: ${msg}`); } }
+
+const BASE_MANIFEST = {
+  manifestVersion: '1.0',
+  model: { groundTruth: './_ground-truth.json' },
+  equity: { classes: [{}] },
+  carry: {},
+  outputs: {},
+  baseCaseOutputs: {},
+};
+
+// Write a self-contained chunked dir. Pass `labels: null` to omit _labels.json
+// and exercise the legacy GT-scan fallback.
+function makeDir({ gt, labels, manifest = BASE_MANIFEST }) {
+  const dir = mkdtempSync(join(tmpdir(), 'refine-idx-'));
+  writeFileSync(join(dir, '_ground-truth.json'), JSON.stringify(gt));
+  if (labels) writeFileSync(join(dir, '_labels.json'), JSON.stringify(labels));
+  writeFileSync(join(dir, 'manifest.json'), JSON.stringify(manifest, null, 2));
+  return dir;
+}
+
+// Build a _labels.json index ({ lower: [{sheet,col,row,text}] }) from
+// [addr, text] pairs — the same shape the Rust parser emits.
+function labelsFrom(pairs) {
+  const idx = {};
+  for (const [addr, text] of pairs) {
+    const bang = addr.lastIndexOf('!');
+    const sheet = addr.slice(0, bang);
+    const m = addr.slice(bang + 1).match(/^([A-Z]+)(\d+)$/);
+    (idx[text.toLowerCase()] ||= []).push({ sheet, col: m[1], row: +m[2], text });
+  }
+  return idx;
+}
+
+// A clean PE-summary ground truth: metric labels in col A, values in col C,
+// plus a block of *unlabeled* numerics (a stand-in for a giant PP&E grid) that
+// refine must never consult.
+function summaryGt() {
+  const gt = {
+    'Summary!A1': 'Gross IRR', 'Summary!C1': 0.185,
+    'Summary!A2': 'Net IRR', 'Summary!C2': 0.151,
+    'Summary!A3': 'Gross MOIC', 'Summary!C3': 2.85,
+    'Summary!A4': 'Net MOIC', 'Summary!C4': 2.45,
+    'Summary!A5': 'Peak Net Equity', 'Summary!C5': 270_000_000,
+  };
+  for (let i = 1; i <= 50; i++) gt[`PPE!D${i}`] = 1000 + i; // unlabeled grid
+  return gt;
+}
+
+const EXPECTED = {
+  'Gross IRR': 'Summary!C1',
+  'Net IRR': 'Summary!C2',
+  'Gross MOIC': 'Summary!C3',
+  'Net MOIC': 'Summary!C4',
+  'Equity Basis / Peak Equity': 'Summary!C5',
+};
+
+// ---------------------------------------------------------------------------
+// 1) Correctness — finds metrics via _labels.json
+// ---------------------------------------------------------------------------
+console.log('Testing: refine finds metrics via _labels.json');
+{
+  const gt = summaryGt();
+  const labels = labelsFrom([
+    ['Summary!A1', 'Gross IRR'], ['Summary!A2', 'Net IRR'],
+    ['Summary!A3', 'Gross MOIC'], ['Summary!A4', 'Net MOIC'],
+    ['Summary!A5', 'Peak Net Equity'],
+  ]);
+  const dir = makeDir({ gt, labels });
+  const r = runManifestRefine(dir, { apply: false });
+  for (const [label, cell] of Object.entries(EXPECTED)) {
+    assert(r.found[label]?.cell === cell, `${label} -> ${cell} (got ${r.found[label]?.cell})`);
+  }
+  rmSync(dir, { recursive: true, force: true });
+}
+
+// ---------------------------------------------------------------------------
+// 2) Parity — _labels.json path == legacy GT-scan fallback
+// ---------------------------------------------------------------------------
+console.log('Testing: identical result with _labels.json vs GT-scan fallback');
+{
+  const gt = summaryGt();
+  const labels = labelsFrom([
+    ['Summary!A1', 'Gross IRR'], ['Summary!A2', 'Net IRR'],
+    ['Summary!A3', 'Gross MOIC'], ['Summary!A4', 'Net MOIC'],
+    ['Summary!A5', 'Peak Net Equity'],
+  ]);
+  const dirIdx = makeDir({ gt, labels });
+  const dirScan = makeDir({ gt, labels: null }); // no _labels.json -> fallback
+  const withIdx = runManifestRefine(dirIdx, { apply: false });
+  const fallback = runManifestRefine(dirScan, { apply: false });
+
+  assert(Object.keys(withIdx.found).length === Object.keys(fallback.found).length,
+    `same field count (idx ${Object.keys(withIdx.found).length} vs scan ${Object.keys(fallback.found).length})`);
+  for (const key of Object.keys(withIdx.found)) {
+    assert(withIdx.found[key].cell === fallback.found[key]?.cell,
+      `parity for ${key}: idx=${withIdx.found[key].cell} scan=${fallback.found[key]?.cell}`);
+  }
+  rmSync(dirIdx, { recursive: true, force: true });
+  rmSync(dirScan, { recursive: true, force: true });
+}
+
+// ---------------------------------------------------------------------------
+// 3) Lazy probe — far/gapped column + value-range filtering
+// ---------------------------------------------------------------------------
+console.log('Testing: lazy probe handles gapped far columns and value ranges');
+{
+  // Exit Multiple's value sits in a far column (AA, gaps before it); a
+  // near-column decimal is out of the [1,50] range and must be rejected.
+  const gt = {
+    'Summary!A1': 'Exit Multiple',
+    'Summary!B1': 0.5,   // out of range -> rejected
+    'Summary!AA1': 18,   // in range, far column -> selected
+  };
+  const labels = labelsFrom([['Summary!A1', 'Exit Multiple']]);
+  const dir = makeDir({ gt, labels });
+  const r = runManifestRefine(dir, { apply: false });
+  assert(r.found['Exit Multiple']?.cell === 'Summary!AA1',
+    `far-column probe past gaps + range filter (got ${r.found['Exit Multiple']?.cell})`);
+  rmSync(dir, { recursive: true, force: true });
+}
+
+// ---------------------------------------------------------------------------
+// 4) Consumption proof — a label present only in the index is still resolved
+// ---------------------------------------------------------------------------
+console.log('Testing: refine consumes _labels.json (label only in index, not GT)');
+{
+  // No 'Summary!A7' label string in the GT — only the numeric. The label lives
+  // solely in _labels.json. Resolving it proves the index was the source.
+  const gt = { 'Summary!C7': 0.20 };
+  const labels = labelsFrom([['Summary!A7', 'Gross IRR']]);
+  const dir = makeDir({ gt, labels });
+  const r = runManifestRefine(dir, { apply: false });
+  assert(r.found['Gross IRR']?.cell === 'Summary!C7',
+    `index-only label resolved (got ${r.found['Gross IRR']?.cell})`);
+
+  // Inverse: with no _labels.json the GT scan cannot find a label absent from
+  // the GT — confirming the index, not a GT string, drove the match above.
+  const dirScan = makeDir({ gt, labels: null });
+  const rScan = runManifestRefine(dirScan, { apply: false });
+  assert(!rScan.found['Gross IRR'],
+    'GT-scan fallback cannot resolve a label that is absent from the ground truth');
+
+  rmSync(dir, { recursive: true, force: true });
+  rmSync(dirScan, { recursive: true, force: true });
+}
+
+// ---------------------------------------------------------------------------
+console.log('');
+console.log(`Results: ${passed} passed, ${failed} failed, ${passed + failed} total`);
+process.exit(failed > 0 ? 1 : 0);

From f52251be66b46d3b81a00641b2ecdb657f9a7f57 Mon Sep 17 00:00:00 2001
From: Eric Boothe <ericboothe@gmail.com>
Date: Thu, 28 May 2026 14:48:31 -0600
Subject: [PATCH 2/2] docs: refine label-index optimization
 (CHANGELOG/PLAN/ROADMAP/SKILL)

CHANGELOG + PLAN entries for the _labels.json consumption + lazy numeric
probes. ROADMAP: mark the pre-indexed label->cell item done for refine, with
Tier B (parser-emitted row-values artifact) and the searchByLabel / init
single-index follow-ups called out. SKILL: note refine is faster on big models
(transparent).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md   | 42 ++++++++++++++++++++++++++++++++++++++++++
 PLAN.md        | 14 ++++++++++++++
 ROADMAP.md     | 18 +++++++++++++++---
 skill/SKILL.md |  6 ++++++
 4 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6181758..3d31f8f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,47 @@
 # excel-to-engine — Changelog
 
+## 2026-05-28 — refine consumes `_labels.json` + lazy numeric probes
+
+`ete manifest refine` rebuilt a full label+numeric index over the **entire**
+ground truth on every run (`buildIndex`), even though it only ever inspects
+numerics on a *matched label's own row*. On big models the bulk of that work
+indexed giant **unlabeled** grids (e.g. a 190 MB PP&E depreciation schedule)
+that the refiner never consults — pure waste. (Investigation also found refine
+did **not** consume the parser's `_labels.json` at all, despite that index
+existing since V4.)
+
+### What changed
+
+- **Labels now come from `chunked/_labels.json`** when the parser emitted it —
+  an O(labels) read instead of scanning every cell. Legacy engines without the
+  index fall back to a one-time GT scan (`buildLabelIndex`), so nothing breaks.
+- **Numerics are resolved lazily, per matched row**, by probing that row's
+  columns on demand (`numericsForRow`, memoized) — instead of bucketing every
+  numeric in a multi-million-cell workbook up front. The giant unlabeled grids
+  are never touched.
+- **Behavior-preserving:** the candidate ranking, dedup, value-range, and
+  summary/rollup/hint logic are untouched. The full manifest + ship-ready
+  suites stay green.
+
+### Impact
+
+The eliminated `buildIndex` pass scales with *total* cell count; the new probe
+cost scales with *matched label rows* (a few dozen). On a synthetic giant-grid
+ground truth the removed pass alone was ~1.4 s (1.4 M cells) / ~7.9 s (6.4 M
+cells); end-to-end refine now finishes in less time than the old index build
+took. The remaining floor is the unavoidable JSON parse of the ground truth — a
+follow-up could lift that with a parser-emitted row-values artifact (see
+ROADMAP), and the same lazy-numerics treatment could be extended to
+`searchByLabel` (the `query` / `carry` path).
+
+### Tests
+
+- `tests/cli/test-refine-label-index.mjs` (14), wired into `npm test`:
+  correctness off `_labels.json`; **parity** between the index path and the
+  GT-scan fallback; lazy-probe far/gapped columns + value ranges; and a
+  **consumption proof** — a label present only in the index (not as a GT
+  string) is still resolved, which the fallback provably cannot do.
+
 ## 2026-05-28 — Continuous integration (GitHub Actions)
 
 The test suite is now substantial (132 JS assertions across 7 suites, plus the
diff --git a/PLAN.md b/PLAN.md
index 29a66b4..8826a0a 100644
--- a/PLAN.md
+++ b/PLAN.md
@@ -1,5 +1,19 @@
 # excel-to-engine — Plan
 
+## Status: refine label-index optimization — landed 2026-05-28
+
+`ete manifest refine` now sources labels from the parser's `_labels.json`
+(O(labels), no full GT scan) and resolves same-row numerics lazily by probing,
+instead of bucketing every numeric in the workbook up front (`buildIndex`). The
+giant unlabeled grids that dominate big models — the very thing that made refine
+slow — are no longer touched. Behavior-preserving (rankings unchanged; suites
+green). New `tests/cli/test-refine-label-index.mjs` (14) proves consumption +
+parity. The remaining cost floor is the ground-truth JSON parse; lifting that
+would need a parser-emitted row-values artifact (Tier B). The same lazy-numerics
+treatment is still open for `searchByLabel` (the `query`/`carry` path), and the
+per-command GT re-parse multiplier in `init` (generate → refine → doctor → maps
+each reload the GT) remains a separate follow-up.
+
 ## Status: Continuous integration — landed 2026-05-28
 
 `.github/workflows/ci.yml` runs the full test matrix (Rust build + 11 unit
diff --git a/ROADMAP.md b/ROADMAP.md
index 4c1051f..d9c94df 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -76,9 +76,21 @@ when we next touch the monitor server or auth surface.
 ### Manifest Refinement (continuing)
 - Model-family templates — recognize a family by its sheet signature and pick
   known cells directly (summary tabs, promote tab, etc.).
-- Pre-indexed label→cell map built once during parsing (the session log noted
-  `manifest refine` took 2.5 min CPU on a 200 MB ground truth; a pre-index
-  from the Rust parser would cut this 10–100×).
+- Pre-indexed label→cell map.
+  - **Done (2026-05-28):** `ete manifest refine` now consumes the parser's
+    `chunked/_labels.json` for labels (it previously ignored it and rebuilt a
+    full label+numeric index over the whole GT) and resolves same-row numerics
+    lazily by probing — so it no longer indexes the giant unlabeled grids that
+    dominate big models. The removed `buildIndex` pass was ~7.9 s on a 6.4 M-cell
+    GT; the work skipped scales with total cell count. `test-refine-label-index`.
+  - **Still open (Tier B):** the remaining floor is the ground-truth JSON parse.
+    A parser-emitted *row-values* artifact (numerics for label-bearing rows
+    only) would let refine skip the GT entirely — a large win on giant-grid
+    models, ~GT-sized (no win) on dense-label models, so gate it on a
+    real-model size measurement first.
+  - **Still open:** apply the same lazy-numerics path to `searchByLabel`
+    (`query` / `carry`), and build the GT index *once* per `init` so
+    generate → refine → doctor → maps stop each re-parsing it.
 - Manifest migration tooling for model updates (vN → vN+1 shape diff).
 
 ---
diff --git a/skill/SKILL.md b/skill/SKILL.md
index 0d34a23..3a22e6b 100644
--- a/skill/SKILL.md
+++ b/skill/SKILL.md
@@ -108,6 +108,12 @@ Silently falls through to a normal parse if `chunked/_ground-truth.json` is
 missing — safe to default on when iterating. Turns the tighten-the-manifest
 loop from minutes to seconds.
 
+The refine step inside that loop is also faster on big models: it reads labels
+from the parser's `chunked/_labels.json` and probes only the matched rows for
+values, instead of indexing every cell (it used to scan the whole ground truth,
+including giant unlabeled grids it never consults). Transparent — same command,
+same result.
+
 **Default output is slim.** `ete init` drops the large debug/intermediate
 artifacts (`dependency-graph.json`, `_graph.json`, root `model-map.json`) once
 the dependency closures are baked into `named-outputs.json` / `named-inputs.json`.