From 8554b0b91865f1b9a7ab389d2dfe532f4998843e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 11:40:55 +0200
Subject: [PATCH 01/24] feat(grist): add constants and init scaffold

---
 .../mascarade_eval/grist/__init__.py          | 36 +++++++++++++++++++
 mascarade-eval/tests/test_grist_constants.py  | 23 ++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 mascarade-eval/mascarade_eval/grist/__init__.py
 create mode 100644 mascarade-eval/tests/test_grist_constants.py

diff --git a/mascarade-eval/mascarade_eval/grist/__init__.py b/mascarade-eval/mascarade_eval/grist/__init__.py
new file mode 100644
index 0000000..ac27099
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/__init__.py
@@ -0,0 +1,36 @@
+# mascarade_eval/grist/__init__.py
+"""Grist-backed dataset management for the mascarade training corpus.
+
+Grist is the canonical source of truth. Mining ingests in insert-only
+mode (human edits in Grist are never overwritten); training and HF
+publication consume a deterministic export.
+"""
+from pathlib import Path
+
+GRIST_BASE = "https://grist.saillant.cc/api"
+
+# Known existing doc (held-out eval). The training doc ID is provided at
+# runtime via --doc or the GRIST_DOC_TRAINING env/file value.
+DOC_HELDOUT = "eGbbrpzN3TeLq3sUd2YFA2"
+
+KEY_FILE = Path.home() / ".config" / "electron-rare" / "grist.env"
+
+TRAINING_TABLE = "Mascarade_Training"
+REGISTRY_TABLE = "Datasets_Registry"
+EXPORTS_TABLE = "Exports"
+
+TRAINING_COLUMNS = (
+    "item_key", "domain", "system", "user_msg", "assistant_msg",
+    "extra_turns", "source", "exclure", "notes",
+)
+REGISTRY_COLUMNS = (
+    "name", "family", "domain", "hf_dataset_id", "license",
+    "n_items", "notes",
+)
+EXPORTS_COLUMNS = (
+    "export_id", "domain", "created_at", "n_items", "content_hash",
+    "output_file", "hf_dataset_id",
+)
+
+_ROOT = Path(__file__).resolve().parent.parent.parent  # .../mascarade-eval
+EXPORTS_DIR = _ROOT / "exports"
diff --git a/mascarade-eval/tests/test_grist_constants.py b/mascarade-eval/tests/test_grist_constants.py
new file mode 100644
index 0000000..0625521
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_constants.py
@@ -0,0 +1,23 @@
+# tests/test_grist_constants.py
+from mascarade_eval import grist
+
+
+def test_constants_present():
+    assert grist.GRIST_BASE == "https://grist.saillant.cc/api"
+    assert grist.DOC_HELDOUT == "eGbbrpzN3TeLq3sUd2YFA2"
+    assert grist.TRAINING_TABLE == "Mascarade_Training"
+    assert grist.REGISTRY_TABLE == "Datasets_Registry"
+    assert grist.EXPORTS_TABLE == "Exports"
+
+
+def test_training_columns_shape():
+    assert grist.TRAINING_COLUMNS == (
+        "item_key", "domain", "system", "user_msg", "assistant_msg",
+        "extra_turns", "source", "exclure", "notes",
+    )
+    assert "exclure" in grist.TRAINING_COLUMNS
+
+
+def test_exports_dir_under_repo_root():
+    # EXPORTS_DIR sits next to the heldout/ dir at the repo root.
+    assert grist.EXPORTS_DIR.name == "exports"

From 42e7af7f8e332cf9b5e943b7886b58ea5ef72bc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:05:24 +0200
Subject: [PATCH 02/24] feat(grist): add Grist REST client

---
 mascarade-eval/mascarade_eval/grist/client.py | 104 ++++++++++++++++++
 mascarade-eval/tests/test_grist_client.py     |  67 +++++++++++
 2 files changed, 171 insertions(+)
 create mode 100644 mascarade-eval/mascarade_eval/grist/client.py
 create mode 100644 mascarade-eval/tests/test_grist_client.py

diff --git a/mascarade-eval/mascarade_eval/grist/client.py b/mascarade-eval/mascarade_eval/grist/client.py
new file mode 100644
index 0000000..00d55b3
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/client.py
@@ -0,0 +1,104 @@
+# mascarade_eval/grist/client.py
+"""Thin Grist REST client. The HTTP transport is injectable for tests."""
+from __future__ import annotations
+
+import json
+import os
+import sys
+import urllib.error
+import urllib.request
+
+from . import GRIST_BASE, KEY_FILE
+
+_BOOL_COLS = {"exclure"}
+_INT_COLS = {"n_items", "n_rows"}
+
+
+def _col_type(name: str) -> str:
+    if name in _BOOL_COLS:
+        return "Bool"
+    if name in _INT_COLS:
+        return "Int"
+    return "Text"
+
+
+def load_grist_key() -> str:
+    """Return the Grist API key from env or ~/.config/electron-rare/grist.env."""
+    key = os.environ.get("GRIST_API_KEY")
+    if key:
+        return key
+    if KEY_FILE.exists():
+        for line in KEY_FILE.read_text().splitlines():
+            if line.strip().startswith("GRIST_API_KEY="):
+                return line.split("=", 1)[1].strip().strip('"')
+    sys.exit("GRIST_API_KEY not found (env or ~/.config/electron-rare/grist.env)")
+
+
+def load_doc_id(name: str) -> str | None:
+    """Return a doc ID stored as <name>= in the grist.env file, or None."""
+    env = os.environ.get(name)
+    if env:
+        return env
+    if KEY_FILE.exists():
+        for line in KEY_FILE.read_text().splitlines():
+            if line.strip().startswith(f"{name}="):
+                return line.split("=", 1)[1].strip().strip('"')
+    return None
+
+
+def _http_transport(method: str, url: str, key: str, body: dict | None) -> dict:
+    data = json.dumps(body).encode() if body is not None else None
+    req = urllib.request.Request(
+        url, data=data, method=method,
+        headers={"Authorization": f"Bearer {key}",
+                 "Content-Type": "application/json"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            raw = resp.read().decode("utf-8", "replace")
+            return json.loads(raw) if raw else {}
+    except urllib.error.HTTPError as exc:
+        detail = exc.read().decode("utf-8", "replace")[:300]
+        raise RuntimeError(f"Grist {method} {url} -> HTTP {exc.code}: {detail}")
+
+
+class GristClient:
+    """Records-level access to one Grist document."""
+
+    def __init__(self, doc_id: str, key: str, transport=_http_transport):
+        self.doc_id = doc_id
+        self.key = key
+        self._transport = transport
+
+    @classmethod
+    def from_env(cls, doc_id: str) -> "GristClient":
+        return cls(doc_id, load_grist_key())
+
+    def _api(self, method: str, path: str, body: dict | None = None) -> dict:
+        return self._transport(method, f"{GRIST_BASE}{path}", self.key, body)
+
+    def list_tables(self) -> set[str]:
+        resp = self._api("GET", f"/docs/{self.doc_id}/tables")
+        return {t["id"] for t in resp.get("tables", [])}
+
+    def create_table(self, table: str, columns: tuple[str, ...]) -> None:
+        cols = [{"id": c, "fields": {"label": c, "type": _col_type(c)}}
+                for c in columns]
+        self._api("POST", f"/docs/{self.doc_id}/tables",
+                  {"tables": [{"id": table, "columns": cols}]})
+
+    def ensure_table(self, table: str, columns: tuple[str, ...]) -> None:
+        if table not in self.list_tables():
+            self.create_table(table, columns)
+
+    def fetch_records(self, table: str) -> list[dict]:
+        resp = self._api("GET", f"/docs/{self.doc_id}/tables/{table}/records")
+        return [{"_id": r["id"], **r["fields"]} for r in resp.get("records", [])]
+
+    def add_records(self, table: str, rows: list[dict]) -> None:
+        if not rows:
+            return
+        for start in range(0, len(rows), 100):
+            chunk = rows[start:start + 100]
+            self._api("POST", f"/docs/{self.doc_id}/tables/{table}/records",
+                      {"records": [{"fields": r} for r in chunk]})
diff --git a/mascarade-eval/tests/test_grist_client.py b/mascarade-eval/tests/test_grist_client.py
new file mode 100644
index 0000000..0bbe9a1
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_client.py
@@ -0,0 +1,67 @@
+# tests/test_grist_client.py
+import pytest
+from mascarade_eval.grist.client import GristClient, load_grist_key
+
+
+def _recording_transport(log):
+    def transport(method, url, key, body):
+        log.append((method, url, body))
+        if method == "GET" and url.endswith("/tables"):
+            return {"tables": [{"id": "Existing"}]}
+        if method == "GET" and "/records" in url:
+            return {"records": [
+                {"id": 1, "fields": {"item_key": "k1", "exclure": False}},
+                {"id": 2, "fields": {"item_key": "k2", "exclure": True}},
+            ]}
+        return {}
+    return transport
+
+
+def test_list_tables_returns_ids():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    assert c.list_tables() == {"Existing"}
+    assert log[0][0] == "GET"
+    assert log[0][1] == "https://grist.saillant.cc/api/docs/doc1/tables"
+
+
+def test_fetch_records_flattens_id_into_fields():
+    c = GristClient("doc1", "key1", transport=_recording_transport([]))
+    rows = c.fetch_records("Mascarade_Training")
+    assert rows == [
+        {"_id": 1, "item_key": "k1", "exclure": False},
+        {"_id": 2, "item_key": "k2", "exclure": True},
+    ]
+
+
+def test_add_records_posts_fields_wrapped():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.add_records("T", [{"a": "1"}, {"a": "2"}])
+    method, url, body = log[-1]
+    assert method == "POST"
+    assert url.endswith("/docs/doc1/tables/T/records")
+    assert body == {"records": [{"fields": {"a": "1"}},
+                                {"fields": {"a": "2"}}]}
+
+
+def test_add_records_noop_on_empty():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.add_records("T", [])
+    assert log == []
+
+
+def test_create_table_types_exclure_as_bool():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.create_table("T", ("item_key", "exclure", "n_items"))
+    method, url, body = log[-1]
+    assert method == "POST"
+    cols = {col["id"]: col["fields"]["type"] for col in body["tables"][0]["columns"]}
+    assert cols == {"item_key": "Text", "exclure": "Bool", "n_items": "Int"}
+
+
+def test_load_grist_key_prefers_env(monkeypatch):
+    monkeypatch.setenv("GRIST_API_KEY", "env-key")
+    assert load_grist_key() == "env-key"

From 14dae57282897ad94a7fbb20775c80eb3751a5d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:13:34 +0200
Subject: [PATCH 03/24] feat(grist): message flatten/rebuild transforms

---
 .../mascarade_eval/grist/migrate.py           | 65 +++++++++++++++++
 .../tests/test_grist_migrate_transforms.py    | 73 +++++++++++++++++++
 2 files changed, 138 insertions(+)
 create mode 100644 mascarade-eval/mascarade_eval/grist/migrate.py
 create mode 100644 mascarade-eval/tests/test_grist_migrate_transforms.py

diff --git a/mascarade-eval/mascarade_eval/grist/migrate.py b/mascarade-eval/mascarade_eval/grist/migrate.py
new file mode 100644
index 0000000..7f434df
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/migrate.py
@@ -0,0 +1,65 @@
+# mascarade_eval/grist/migrate.py
+"""Backfill the training corpus from HuggingFace into Grist.
+
+Pure transforms (flatten_messages / rebuild_messages) are unit-tested;
+migrate_domain wires them to HF download + insert-only ingestion.
+"""
+from __future__ import annotations
+
+import json
+
+_ROLE_NORMAL = {"user": "user", "human": "user",
+                "assistant": "assistant", "gpt": "assistant",
+                "system": "system"}
+
+
+def _normalize(record: dict) -> list[dict]:
+    """Return [{role, content}, ...] from an OpenAI or ShareGPT record."""
+    raw = record.get("messages") or record.get("conversations") or []
+    out: list[dict] = []
+    for m in raw:
+        if not isinstance(m, dict):
+            continue
+        role = _ROLE_NORMAL.get(m.get("role") or m.get("from") or "")
+        if role is None:
+            continue
+        content = m.get("content") or m.get("value") or ""
+        out.append({"role": role, "content": content})
+    return out
+
+
+def flatten_messages(record: dict) -> dict:
+    """Collapse a chat record into editable columns.
+
+    Single-turn (<=1 system, exactly 1 user, exactly 1 assistant) maps to
+    system/user_msg/assistant_msg with empty extra_turns. Anything else
+    keeps the full normalized message list as JSON in extra_turns.
+    """
+    msgs = _normalize(record)
+    systems = [m for m in msgs if m["role"] == "system"]
+    users = [m for m in msgs if m["role"] == "user"]
+    assistants = [m for m in msgs if m["role"] == "assistant"]
+    single_turn = (len(systems) <= 1 and len(users) == 1
+                   and len(assistants) == 1 and len(msgs) == len(systems) + 2)
+    flat = {
+        "system": systems[0]["content"] if systems else "",
+        "user_msg": users[0]["content"] if users else "",
+        "assistant_msg": assistants[0]["content"] if assistants else "",
+        "extra_turns": "",
+    }
+    if not single_turn:
+        flat["extra_turns"] = json.dumps(msgs, ensure_ascii=False)
+    return flat
+
+
+def rebuild_messages(row: dict) -> dict:
+    """Inverse of flatten_messages: return {"messages": [...]}."""
+    extra = row.get("extra_turns") or ""
+    if extra:
+        return {"messages": json.loads(extra)}
+    msgs: list[dict] = []
+    if row.get("system"):
+        msgs.append({"role": "system", "content": row["system"]})
+    msgs.append({"role": "user", "content": row.get("user_msg", "")})
+    msgs.append({"role": "assistant", "content": row.get("assistant_msg", "")})
+    return {"messages": msgs}
diff --git a/mascarade-eval/tests/test_grist_migrate_transforms.py b/mascarade-eval/tests/test_grist_migrate_transforms.py
new file mode 100644
index 0000000..d81e547
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_migrate_transforms.py
@@ -0,0 +1,73 @@
+# tests/test_grist_migrate_transforms.py
+import json
+from mascarade_eval.grist.migrate import flatten_messages, rebuild_messages
+
+
+def test_flatten_single_turn_openai():
+    rec = {"messages": [
+        {"role": "system", "content": "S"},
+        {"role": "user", "content": "Q"},
+        {"role": "assistant", "content": "A"},
+    ]}
+    flat = flatten_messages(rec)
+    assert flat == {"system": "S", "user_msg": "Q",
+                    "assistant_msg": "A", "extra_turns": ""}
+
+
+def test_flatten_single_turn_sharegpt():
+    rec = {"conversations": [
+        {"from": "human", "value": "Q"},
+        {"from": "gpt", "value": "A"},
+    ]}
+    flat = flatten_messages(rec)
+    assert flat == {"system": "", "user_msg": "Q",
+                    "assistant_msg": "A", "extra_turns": ""}
+
+
+def test_flatten_multi_turn_keeps_extra_turns():
+    rec = {"messages": [
+        {"role": "user", "content": "Q1"},
+        {"role": "assistant", "content": "A1"},
+        {"role": "user", "content": "Q2"},
+        {"role": "assistant", "content": "A2"},
+    ]}
+    flat = flatten_messages(rec)
+    assert flat["user_msg"] == "Q1"
+    assert flat["assistant_msg"] == "A1"
+    parsed = json.loads(flat["extra_turns"])
+    assert parsed == [
+        {"role": "user", "content": "Q1"},
+        {"role": "assistant", "content": "A1"},
+        {"role": "user", "content": "Q2"},
+        {"role": "assistant", "content": "A2"},
+    ]
+
+
+def test_rebuild_single_turn_round_trip():
+    rec = {"messages": [
+        {"role": "system", "content": "S"},
+        {"role": "user", "content": "Q"},
+        {"role": "assistant", "content": "A"},
+    ]}
+    flat = flatten_messages(rec)
+    assert rebuild_messages(flat) == rec
+
+
+def test_rebuild_single_turn_no_system():
+    flat = {"system": "", "user_msg": "Q",
+            "assistant_msg": "A", "extra_turns": ""}
+    assert rebuild_messages(flat) == {"messages": [
+        {"role": "user", "content": "Q"},
+        {"role": "assistant", "content": "A"},
+    ]}
+
+
+def test_rebuild_multi_turn_uses_extra_turns():
+    rec = {"messages": [
+        {"role": "user", "content": "Q1"},
+        {"role": "assistant", "content": "A1"},
+        {"role": "user", "content": "Q2"},
+        {"role": "assistant", "content": "A2"},
+    ]}
+    flat = flatten_messages(rec)
+    assert rebuild_messages(flat) == rec

From ae73ef73ffd34724daf17082fc218bbb41ebf001 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:21:23 +0200
Subject: [PATCH 04/24] feat(grist): add insert-only ingestion core

Implement item_key (domain-prefixed SHA1), compute_delta
(skips existing keys + dedupes within batch), and ingest_rows
(ensure-table, fetch existing keys, insert delta only).
dry_run=True computes without writing.
Add FakeClient fixture in conftest.py for reuse in tasks 5/6/9.
---
 mascarade-eval/mascarade_eval/grist/ingest.py | 42 ++++++++++++
 mascarade-eval/tests/conftest.py              | 36 +++++++++++
 mascarade-eval/tests/test_grist_ingest.py     | 64 +++++++++++++++++++
 3 files changed, 142 insertions(+)
 create mode 100644 mascarade-eval/mascarade_eval/grist/ingest.py
 create mode 100644 mascarade-eval/tests/conftest.py
 create mode 100644 mascarade-eval/tests/test_grist_ingest.py

diff --git a/mascarade-eval/mascarade_eval/grist/ingest.py b/mascarade-eval/mascarade_eval/grist/ingest.py
new file mode 100644
index 0000000..9c391f9
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/ingest.py
@@ -0,0 +1,42 @@
+# mascarade_eval/grist/ingest.py
+"""Insert-only ingestion into Grist.
+
+This module holds the source-of-truth invariant: an existing item row is
+NEVER updated, so human edits in Grist survive re-ingestion.
+"""
+from __future__ import annotations
+
+import hashlib
+
+
+def item_key(domain: str, text: str) -> str:
+    """Stable key for an item: domain prefix + SHA1 of its text."""
+    digest = hashlib.sha1(text.encode("utf-8")).hexdigest()[:10]
+    return f"{domain}-{digest}"
+
+
+def compute_delta(existing_keys: set[str], incoming: list[dict],
+                  key_field: str = "item_key") -> list[dict]:
+    """Return only rows whose key is absent from Grist and unseen in batch."""
+    seen: set[str] = set(existing_keys)
+    delta: list[dict] = []
+    for row in incoming:
+        key = row[key_field]
+        if key in seen:
+            continue
+        seen.add(key)
+        delta.append(row)
+    return delta
+
+
+def ingest_rows(client, table: str, columns: tuple[str, ...],
+                rows: list[dict], key_field: str = "item_key",
+                dry_run: bool = False) -> dict:
+    """Insert-only ingestion. Returns {"inserted": n, "skipped": n}."""
+    client.ensure_table(table, columns)
+    existing = {r[key_field] for r in client.fetch_records(table)
+                if key_field in r}
+    delta = compute_delta(existing, rows, key_field)
+    if not dry_run:
+        client.add_records(table, delta)
+    return {"inserted": len(delta), "skipped": len(rows) - len(delta)}
diff --git a/mascarade-eval/tests/conftest.py b/mascarade-eval/tests/conftest.py
new file mode 100644
index 0000000..ff7c203
--- /dev/null
+++ b/mascarade-eval/tests/conftest.py
@@ -0,0 +1,36 @@
+# tests/conftest.py
+import pytest
+
+
+class FakeClient:
+    """In-memory stand-in for GristClient. Records all writes."""
+
+    def __init__(self, tables=None, records=None):
+        self.doc_id = "fake-doc"
+        self._tables = set(tables or [])
+        self._records = {t: list(rs) for t, rs in (records or {}).items()}
+        self.created = []
+        self.added = {}
+
+    def list_tables(self):
+        return set(self._tables)
+
+    def create_table(self, table, columns):
+        self._tables.add(table)
+        self.created.append((table, tuple(columns)))
+
+    def ensure_table(self, table, columns):
+        if table not in self._tables:
+            self.create_table(table, columns)
+
+    def fetch_records(self, table):
+        return [dict(r) for r in self._records.get(table, [])]
+
+    def add_records(self, table, rows):
+        self.added.setdefault(table, []).extend(rows)
+        self._records.setdefault(table, []).extend(rows)
+
+
+@pytest.fixture
+def fake_client():
+    return FakeClient
diff --git a/mascarade-eval/tests/test_grist_ingest.py b/mascarade-eval/tests/test_grist_ingest.py
new file mode 100644
index 0000000..ed32b2d
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_ingest.py
@@ -0,0 +1,64 @@
+# tests/test_grist_ingest.py
+from mascarade_eval.grist import TRAINING_TABLE, TRAINING_COLUMNS
+from mascarade_eval.grist.ingest import item_key, compute_delta, ingest_rows
+
+
+def test_item_key_is_deterministic_and_domain_prefixed():
+    k1 = item_key("kicad", "How do I add a net class?")
+    k2 = item_key("kicad", "How do I add a net class?")
+    assert k1 == k2
+    assert k1.startswith("kicad-")
+
+
+def test_item_key_differs_by_text():
+    assert item_key("kicad", "A") != item_key("kicad", "B")
+
+
+def test_compute_delta_skips_existing_keys():
+    existing = {"kicad-aaaaaaaaaa"}
+    incoming = [
+        {"item_key": "kicad-aaaaaaaaaa", "user_msg": "old"},
+        {"item_key": "kicad-bbbbbbbbbb", "user_msg": "new"},
+    ]
+    delta = compute_delta(existing, incoming)
+    assert [r["item_key"] for r in delta] == ["kicad-bbbbbbbbbb"]
+
+
+def test_compute_delta_dedupes_within_batch():
+    incoming = [
+        {"item_key": "k1", "user_msg": "x"},
+        {"item_key": "k1", "user_msg": "x-dup"},
+    ]
+    delta = compute_delta(set(), incoming)
+    assert len(delta) == 1
+    assert delta[0]["user_msg"] == "x"
+
+
+def test_ingest_rows_inserts_only_new(fake_client):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [{"item_key": "k1", "user_msg": "kept"}]},
+    )
+    rows = [
+        {"item_key": "k1", "user_msg": "WOULD OVERWRITE"},
+        {"item_key": "k2", "user_msg": "fresh"},
+    ]
+    report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows)
+    assert report == {"inserted": 1, "skipped": 1}
+    assert client.added[TRAINING_TABLE] == [{"item_key": "k2",
+                                             "user_msg": "fresh"}]
+
+
+def test_ingest_rows_creates_table_when_absent(fake_client):
+    client = fake_client(tables=[])
+    ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS,
+                [{"item_key": "k1"}])
+    assert client.created == [(TRAINING_TABLE, TRAINING_COLUMNS)]
+
+
+def test_ingest_rows_dry_run_writes_nothing(fake_client):
+    client = fake_client(tables=[TRAINING_TABLE])
+    report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS,
+                         [{"item_key": "k1"}], dry_run=True)
+    assert report == {"inserted": 1, "skipped": 0}
+    assert client.added == {}

From a168b17a3b81beda9ac079ffb516b5f27c21aa62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:27:14 +0200
Subject: [PATCH 05/24] feat(grist): add deterministic export

canonical_jsonl sorts by item_key and uses sort_keys=True so
the same Grist state always produces the same SHA256 digest.
export_domain filters exclure rows, writes a hashed .jsonl
snapshot, and journals one row to the Exports table.
dry_run=True computes the report without any I/O.
---
 mascarade-eval/mascarade_eval/grist/export.py | 64 +++++++++++++++++
 mascarade-eval/tests/test_grist_export.py     | 69 +++++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 mascarade-eval/mascarade_eval/grist/export.py
 create mode 100644 mascarade-eval/tests/test_grist_export.py

diff --git a/mascarade-eval/mascarade_eval/grist/export.py b/mascarade-eval/mascarade_eval/grist/export.py
new file mode 100644
index 0000000..61b943e
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/export.py
@@ -0,0 +1,64 @@
+# mascarade_eval/grist/export.py
+"""Deterministic Grist -> .jsonl snapshot export, journaled in Exports."""
+from __future__ import annotations
+
+import datetime
+import hashlib
+import json
+from pathlib import Path
+
+from . import EXPORTS_COLUMNS, EXPORTS_TABLE, TRAINING_TABLE
+from .migrate import rebuild_messages
+
+
+def canonical_jsonl(keyed_rows: list[tuple[str, dict]]) -> str:
+    """Serialize (sort_key, object) pairs to JSONL ordered by sort_key.
+
+    Same input set -> same bytes, regardless of input order. The sort key
+    itself is not written; only the object is.
+    """
+    ordered = sorted(keyed_rows, key=lambda kv: kv[0])
+    return "\n".join(json.dumps(obj, ensure_ascii=False, sort_keys=True)
+                     for _, obj in ordered)
+
+
+def content_hash(text: str) -> str:
+    """SHA256 hex digest of the canonical snapshot text."""
+    return hashlib.sha256(text.encode("utf-8")).hexdigest()
+
+
+def _timestamp() -> str:
+    return datetime.datetime.now(datetime.UTC).strftime("%Y%m%dT%H%M%SZ")
+
+
+def export_domain(client, domain: str, out_dir: Path,
+                  dry_run: bool = False) -> dict:
+    """Export one domain's non-excluded training rows to a hashed snapshot.
+
+    Returns a report dict matching the Exports row written to Grist.
+    """
+    rows = [r for r in client.fetch_records(TRAINING_TABLE)
+            if r.get("domain") == domain and not r.get("exclure")]
+    payload = canonical_jsonl(
+        [(r.get("item_key", ""), rebuild_messages(r)) for r in rows])
+    digest = content_hash(payload)
+    stamp = _timestamp()
+    filename = f"{domain}.{stamp}.jsonl"
+    report = {
+        "export_id": f"{domain}-{stamp}",
+        "domain": domain,
+        "created_at": stamp,
+        "n_items": len(rows),
+        "content_hash": digest,
+        "output_file": filename,
+        "hf_dataset_id": "",
+    }
+    if dry_run:
+        return report
+    out_dir = Path(out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    (out_dir / filename).write_text(payload + ("\n" if payload else ""),
+                                    encoding="utf-8")
+    client.ensure_table(EXPORTS_TABLE, EXPORTS_COLUMNS)
+    client.add_records(EXPORTS_TABLE, [report])
+    return report
diff --git a/mascarade-eval/tests/test_grist_export.py b/mascarade-eval/tests/test_grist_export.py
new file mode 100644
index 0000000..3c8321e
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_export.py
@@ -0,0 +1,69 @@
+# tests/test_grist_export.py
+import json
+from mascarade_eval.grist import TRAINING_TABLE, EXPORTS_TABLE
+from mascarade_eval.grist.export import (
+    canonical_jsonl, content_hash, export_domain,
+)
+
+
+def test_canonical_jsonl_sorts_by_key():
+    keyed = [("b", {"v": 2}), ("a", {"v": 1})]
+    lines = canonical_jsonl(keyed).splitlines()
+    assert json.loads(lines[0]) == {"v": 1}
+    assert json.loads(lines[1]) == {"v": 2}
+
+
+def test_canonical_jsonl_is_order_independent():
+    a = [("x", {"v": 1}), ("y", {"v": 2})]
+    b = [("y", {"v": 2}), ("x", {"v": 1})]
+    assert canonical_jsonl(a) == canonical_jsonl(b)
+
+
+def test_canonical_jsonl_omits_the_sort_key_from_output():
+    text = canonical_jsonl([("x", {"v": 1})])
+    assert json.loads(text) == {"v": 1}  # no "x", no item_key
+
+
+def test_content_hash_stable():
+    text = canonical_jsonl([("x", {"v": 1})])
+    assert content_hash(text) == content_hash(text)
+    assert len(content_hash(text)) == 64
+
+
+def test_export_domain_filters_excluded_and_writes_file(fake_client, tmp_path):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [
+            {"_id": 1, "item_key": "kicad-1", "domain": "kicad",
+             "user_msg": "Q1", "assistant_msg": "A1", "system": "",
+             "extra_turns": "", "source": "", "exclure": False, "notes": ""},
+            {"_id": 2, "item_key": "kicad-2", "domain": "kicad",
+             "user_msg": "Q2", "assistant_msg": "A2", "system": "",
+             "extra_turns": "", "source": "", "exclure": True, "notes": ""},
+        ]},
+    )
+    report = export_domain(client, "kicad", out_dir=tmp_path)
+    assert report["n_items"] == 1  # the excluded row is dropped
+    out_file = tmp_path / report["output_file"]
+    assert out_file.exists()
+    written = [json.loads(ln) for ln in out_file.read_text().splitlines()]
+    assert written == [{"messages": [
+        {"role": "user", "content": "Q1"},
+        {"role": "assistant", "content": "A1"},
+    ]}]
+    assert client.added[EXPORTS_TABLE][0]["domain"] == "kicad"
+    assert client.added[EXPORTS_TABLE][0]["content_hash"] == report["content_hash"]
+
+
+def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [
+            {"_id": 1, "item_key": "kicad-1", "domain": "kicad",
+             "user_msg": "Q", "assistant_msg": "A", "system": "",
+             "extra_turns": "", "exclure": False}]},
+    )
+    report = export_domain(client, "kicad", out_dir=tmp_path, dry_run=True)
+    assert report["n_items"] == 1
+    assert list(tmp_path.iterdir()) == []
+    assert client.added == {}

From a34041e865383447ad517d3bec5ad8fdd54dc7de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:33:10 +0200
Subject: [PATCH 06/24] fix(grist): drop orphan snapshot on export failure

---
 mascarade-eval/mascarade_eval/grist/export.py | 13 ++++++++----
 mascarade-eval/tests/test_grist_export.py     | 20 +++++++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/mascarade-eval/mascarade_eval/grist/export.py b/mascarade-eval/mascarade_eval/grist/export.py
index 61b943e..ec28cc5 100644
--- a/mascarade-eval/mascarade_eval/grist/export.py
+++ b/mascarade-eval/mascarade_eval/grist/export.py
@@ -57,8 +57,13 @@ def export_domain(client, domain: str, out_dir: Path,
         return report
     out_dir = Path(out_dir)
     out_dir.mkdir(parents=True, exist_ok=True)
-    (out_dir / filename).write_text(payload + ("\n" if payload else ""),
-                                    encoding="utf-8")
-    client.ensure_table(EXPORTS_TABLE, EXPORTS_COLUMNS)
-    client.add_records(EXPORTS_TABLE, [report])
+    out_path = out_dir / filename
+    out_path.write_text(payload + ("\n" if payload else ""),
+                        encoding="utf-8")
+    try:
+        client.ensure_table(EXPORTS_TABLE, EXPORTS_COLUMNS)
+        client.add_records(EXPORTS_TABLE, [report])
+    except Exception:
+        out_path.unlink(missing_ok=True)
+        raise
     return report
diff --git a/mascarade-eval/tests/test_grist_export.py b/mascarade-eval/tests/test_grist_export.py
index 3c8321e..f9baea9 100644
--- a/mascarade-eval/tests/test_grist_export.py
+++ b/mascarade-eval/tests/test_grist_export.py
@@ -1,5 +1,6 @@
 # tests/test_grist_export.py
 import json
+import pytest
 from mascarade_eval.grist import TRAINING_TABLE, EXPORTS_TABLE
 from mascarade_eval.grist.export import (
     canonical_jsonl, content_hash, export_domain,
@@ -67,3 +68,22 @@ def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path):
     assert report["n_items"] == 1
     assert list(tmp_path.iterdir()) == []
     assert client.added == {}
+
+
+def test_export_domain_removes_file_when_grist_logging_fails(
+        fake_client, tmp_path):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [
+            {"_id": 1, "item_key": "kicad-1", "domain": "kicad",
+             "user_msg": "Q", "assistant_msg": "A", "system": "",
+             "extra_turns": "", "exclure": False}]},
+    )
+
+    def boom(table, rows):
+        raise RuntimeError("grist down")
+
+    client.add_records = boom
+    with pytest.raises(RuntimeError, match="grist down"):
+        export_domain(client, "kicad", out_dir=tmp_path)
+    assert list(tmp_path.iterdir()) == []  # no orphaned snapshot file

From 2d572ef72d4dc5e45ee6d01fbe2990faf6940e9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:39:22 +0200
Subject: [PATCH 07/24] feat(grist): wire HF backfill into ingestion

---
 .../mascarade_eval/grist/migrate.py           | 62 +++++++++++++++++++
 .../tests/test_grist_migrate_domain.py        | 40 ++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 mascarade-eval/tests/test_grist_migrate_domain.py

diff --git a/mascarade-eval/mascarade_eval/grist/migrate.py b/mascarade-eval/mascarade_eval/grist/migrate.py
index 7f434df..c804030 100644
--- a/mascarade-eval/mascarade_eval/grist/migrate.py
+++ b/mascarade-eval/mascarade_eval/grist/migrate.py
@@ -8,6 +8,10 @@
 
 import json
 
+from mascarade_eval import HF_ORG
+from . import REGISTRY_COLUMNS, REGISTRY_TABLE, TRAINING_COLUMNS, TRAINING_TABLE
+from .ingest import ingest_rows, item_key
+
 _ROLE_NORMAL = {"user": "user", "human": "user",
                 "assistant": "assistant", "gpt": "assistant",
                 "system": "system"}
@@ -63,3 +67,61 @@ def rebuild_messages(row: dict) -> dict:
     msgs.append({"role": "user", "content": row.get("user_msg", "")})
     msgs.append({"role": "assistant", "content": row.get("assistant_msg", "")})
     return {"messages": msgs}
+
+
+def _download_training_records(domain: str) -> list[dict]:
+    """Download <domain>_chat.jsonl from HF and parse it into records."""
+    from huggingface_hub import hf_hub_download
+    path = hf_hub_download(
+        repo_id=f"{HF_ORG}/mascarade-{domain}-dataset",
+        filename=f"{domain}_chat.jsonl",
+        repo_type="dataset",
+    )
+    records: list[dict] = []
+    with open(path, encoding="utf-8") as fh:
+        for line in fh:
+            line = line.strip()
+            if line:
+                records.append(json.loads(line))
+    return records
+
+
+def _to_training_row(domain: str, record: dict) -> dict:
+    flat = flatten_messages(record)
+    return {
+        "item_key": item_key(domain, flat["user_msg"]),
+        "domain": domain,
+        "system": flat["system"],
+        "user_msg": flat["user_msg"],
+        "assistant_msg": flat["assistant_msg"],
+        "extra_turns": flat["extra_turns"],
+        "source": f"{HF_ORG}/mascarade-{domain}-dataset",
+        "exclure": False,
+        "notes": "",
+    }
+
+
+def migrate_domain(client, domain: str, records: list[dict] | None = None,
+                   dry_run: bool = False) -> dict:
+    """Backfill one domain's HF training data into Grist (insert-only).
+
+    Pass `records` to skip the HF download (used by tests).
+    """
+    if records is None:
+        records = _download_training_records(domain)
+    rows = [_to_training_row(domain, r) for r in records]
+    report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows,
+                         dry_run=dry_run)
+    if not dry_run:
+        client.ensure_table(REGISTRY_TABLE, REGISTRY_COLUMNS)
+        client.add_records(REGISTRY_TABLE, [{
+            "name": f"mascarade-{domain}-train",
+            "family": "mascarade-training",
+            "domain": domain,
+            "hf_dataset_id": f"{HF_ORG}/mascarade-{domain}-dataset",
+            "license": "CC-BY-SA-4.0",
+            "n_items": len(rows),
+            "notes": f"backfilled {report['inserted']} new, "
+                     f"{report['skipped']} already present",
+        }])
+    return report
diff --git a/mascarade-eval/tests/test_grist_migrate_domain.py b/mascarade-eval/tests/test_grist_migrate_domain.py
new file mode 100644
index 0000000..64a365e
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_migrate_domain.py
@@ -0,0 +1,40 @@
+# tests/test_grist_migrate_domain.py
+from mascarade_eval.grist import TRAINING_TABLE, REGISTRY_TABLE
+from mascarade_eval.grist.migrate import migrate_domain
+
+
+def test_migrate_domain_ingests_flattened_rows(fake_client):
+    client = fake_client(tables=[])
+    records = [
+        {"messages": [{"role": "user", "content": "Q1"},
+                      {"role": "assistant", "content": "A1"}]},
+        {"messages": [{"role": "user", "content": "Q2"},
+                      {"role": "assistant", "content": "A2"}]},
+    ]
+    report = migrate_domain(client, "kicad", records=records)
+    assert report["inserted"] == 2
+    added = client.added[TRAINING_TABLE]
+    assert {r["user_msg"] for r in added} == {"Q1", "Q2"}
+    assert all(r["domain"] == "kicad" for r in added)
+    assert all(r["item_key"].startswith("kicad-") for r in added)
+    assert all(r["exclure"] is False for r in added)
+
+
+def test_migrate_domain_is_idempotent(fake_client):
+    client = fake_client(tables=[])
+    records = [{"messages": [{"role": "user", "content": "Q"},
+                             {"role": "assistant", "content": "A"}]}]
+    migrate_domain(client, "kicad", records=records)
+    report2 = migrate_domain(client, "kicad", records=records)
+    assert report2 == {"inserted": 0, "skipped": 1}
+
+
+def test_migrate_domain_writes_registry_row(fake_client):
+    client = fake_client(tables=[])
+    records = [{"messages": [{"role": "user", "content": "Q"},
+                             {"role": "assistant", "content": "A"}]}]
+    migrate_domain(client, "kicad", records=records)
+    reg = client.added[REGISTRY_TABLE]
+    assert reg[0]["name"] == "mascarade-kicad-train"
+    assert reg[0]["family"] == "mascarade-training"
+    assert reg[0]["hf_dataset_id"] == "Ailiance-fr/mascarade-kicad-dataset"

From 0d86e6766def8454afbab3c8611a120e28de8a22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:46:24 +0200
Subject: [PATCH 08/24] feat(grist): add HuggingFace snapshot publisher

---
 .../mascarade_eval/grist/publish.py           | 31 +++++++++++++++++
 mascarade-eval/tests/test_grist_publish.py    | 34 +++++++++++++++++++
 2 files changed, 65 insertions(+)
 create mode 100644 mascarade-eval/mascarade_eval/grist/publish.py
 create mode 100644 mascarade-eval/tests/test_grist_publish.py

diff --git a/mascarade-eval/mascarade_eval/grist/publish.py b/mascarade-eval/mascarade_eval/grist/publish.py
new file mode 100644
index 0000000..22ed14c
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/publish.py
@@ -0,0 +1,31 @@
+# mascarade_eval/grist/publish.py
+"""Publish an exported snapshot to its HuggingFace dataset repo."""
+from __future__ import annotations
+
+from pathlib import Path
+
+
+def _hf_upload(*, path_or_fileobj, path_in_repo, repo_id, repo_type,
+               commit_message):
+    from huggingface_hub import upload_file
+    upload_file(path_or_fileobj=path_or_fileobj, path_in_repo=path_in_repo,
+                repo_id=repo_id, repo_type=repo_type,
+                commit_message=commit_message)
+
+
+def publish_snapshot(snapshot_path: str, hf_dataset_id: str,
+                     filename: str, uploader=_hf_upload) -> None:
+    """Upload one exported .jsonl snapshot to its HF dataset repo.
+
+    `uploader` is injected for testing; production uses huggingface_hub.
+    """
+    path = Path(snapshot_path)
+    if not path.exists():
+        raise FileNotFoundError(f"snapshot not found: {snapshot_path}")
+    uploader(
+        path_or_fileobj=str(path),
+        path_in_repo=filename,
+        repo_id=hf_dataset_id,
+        repo_type="dataset",
+        commit_message=f"dataset: refresh {filename} from Grist export",
+    )
diff --git a/mascarade-eval/tests/test_grist_publish.py b/mascarade-eval/tests/test_grist_publish.py
new file mode 100644
index 0000000..422ef04
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_publish.py
@@ -0,0 +1,34 @@
+# tests/test_grist_publish.py
+import pytest
+from mascarade_eval.grist.publish import publish_snapshot
+
+
+def test_publish_snapshot_uploads_with_expected_args(tmp_path):
+    snap = tmp_path / "kicad.20260519T120000Z.jsonl"
+    snap.write_text('{"messages": []}\n')
+    calls = []
+
+    def fake_upload(*, path_or_fileobj, path_in_repo, repo_id, repo_type,
+                    commit_message):
+        calls.append({
+            "path_or_fileobj": path_or_fileobj,
+            "path_in_repo": path_in_repo,
+            "repo_id": repo_id,
+            "repo_type": repo_type,
+            "commit_message": commit_message,
+        })
+
+    publish_snapshot(str(snap), "Ailiance-fr/mascarade-kicad-dataset",
+                     "kicad_chat.jsonl", uploader=fake_upload)
+    assert len(calls) == 1
+    assert calls[0]["repo_id"] == "Ailiance-fr/mascarade-kicad-dataset"
+    assert calls[0]["repo_type"] == "dataset"
+    assert calls[0]["path_in_repo"] == "kicad_chat.jsonl"
+    assert calls[0]["path_or_fileobj"] == str(snap)
+
+
+def test_publish_snapshot_rejects_missing_file(tmp_path):
+    with pytest.raises(FileNotFoundError):
+        publish_snapshot(str(tmp_path / "nope.jsonl"),
+                         "Ailiance-fr/mascarade-kicad-dataset",
+                         "kicad_chat.jsonl", uploader=lambda **k: None)

From 2ab86ba8c46c885d17fe370c71c1b2863170a9f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:51:55 +0200
Subject: [PATCH 09/24] feat(grist): add dataset management CLI

---
 mascarade-eval/mascarade_eval/grist/cli.py | 111 +++++++++++++++++++++
 mascarade-eval/tests/test_grist_cli.py     |  43 ++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 mascarade-eval/mascarade_eval/grist/cli.py
 create mode 100644 mascarade-eval/tests/test_grist_cli.py

diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py
new file mode 100644
index 0000000..aab2795
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/cli.py
@@ -0,0 +1,111 @@
+# mascarade_eval/grist/cli.py
+"""CLI for Grist-backed dataset management: ingest / export / migrate / publish.
+
+Run: python -m mascarade_eval.grist.cli <subcommand> [options]
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+from . import EXPORTS_DIR, TRAINING_COLUMNS, TRAINING_TABLE
+from .client import GristClient, load_doc_id
+from .export import export_domain
+from .ingest import item_key, ingest_rows
+from .migrate import flatten_messages, migrate_domain
+from .publish import publish_snapshot
+
+
+def build_parser() -> argparse.ArgumentParser:
+    ap = argparse.ArgumentParser(prog="grist-dataset", description=__doc__)
+    sub = ap.add_subparsers(dest="command", required=True)
+
+    p_ing = sub.add_parser("ingest", help="insert-only ingest a .jsonl")
+    p_ing.add_argument("--doc")
+    p_ing.add_argument("--jsonl", required=True)
+    p_ing.add_argument("--domain", required=True)
+    p_ing.add_argument("--dry-run", action="store_true")
+
+    p_exp = sub.add_parser("export", help="export a domain to a snapshot")
+    p_exp.add_argument("--doc")
+    p_exp.add_argument("--domain", required=True)
+    p_exp.add_argument("--dry-run", action="store_true")
+
+    p_mig = sub.add_parser("migrate", help="backfill a domain from HF")
+    p_mig.add_argument("--doc")
+    p_mig.add_argument("--domain", required=True)
+    p_mig.add_argument("--dry-run", action="store_true")
+
+    p_pub = sub.add_parser("publish", help="upload a snapshot to HF")
+    p_pub.add_argument("--snapshot", required=True)
+    p_pub.add_argument("--hf-dataset", required=True)
+    p_pub.add_argument("--filename", required=True)
+
+    return ap
+
+
+def resolve_doc(doc_arg: str | None) -> str:
+    """Return the doc ID from --doc or the GRIST_DOC_TRAINING env/file value."""
+    if doc_arg:
+        return doc_arg
+    doc = load_doc_id("GRIST_DOC_TRAINING")
+    if not doc:
+        sys.exit("no doc ID: pass --doc or set GRIST_DOC_TRAINING")
+    return doc
+
+
+def _ingest_jsonl_rows(domain: str, jsonl_path: str) -> list[dict]:
+    rows: list[dict] = []
+    for line in Path(jsonl_path).read_text(encoding="utf-8").splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError as exc:
+            print(f"[warn] skipped malformed line: {exc}", file=sys.stderr)
+            continue
+        flat = flatten_messages(record)
+        rows.append({
+            "item_key": item_key(domain, flat["user_msg"]),
+            "domain": domain,
+            "system": flat["system"],
+            "user_msg": flat["user_msg"],
+            "assistant_msg": flat["assistant_msg"],
+            "extra_turns": flat["extra_turns"],
+            "source": record.get("source", ""),
+            "exclure": False,
+            "notes": "",
+        })
+    return rows
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = build_parser().parse_args(argv)
+
+    if args.command == "publish":
+        publish_snapshot(args.snapshot, args.hf_dataset, args.filename)
+        print(f"published {args.snapshot} -> {args.hf_dataset}")
+        return 0
+
+    client = GristClient.from_env(resolve_doc(args.doc))
+
+    if args.command == "ingest":
+        rows = _ingest_jsonl_rows(args.domain, args.jsonl)
+        report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows,
+                             dry_run=args.dry_run)
+        print(f"ingest {args.domain}: {report}")
+    elif args.command == "export":
+        report = export_domain(client, args.domain, EXPORTS_DIR,
+                               dry_run=args.dry_run)
+        print(f"export {args.domain}: {report}")
+    elif args.command == "migrate":
+        report = migrate_domain(client, args.domain, dry_run=args.dry_run)
+        print(f"migrate {args.domain}: {report}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py
new file mode 100644
index 0000000..0886b21
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_cli.py
@@ -0,0 +1,43 @@
+# tests/test_grist_cli.py
+import pytest
+from mascarade_eval.grist.cli import build_parser, resolve_doc
+
+
+def test_parser_ingest_requires_doc_and_jsonl():
+    ns = build_parser().parse_args(
+        ["ingest", "--doc", "D", "--jsonl", "mine.jsonl", "--domain", "kicad"])
+    assert ns.command == "ingest"
+    assert ns.doc == "D"
+    assert ns.jsonl == "mine.jsonl"
+    assert ns.domain == "kicad"
+
+
+def test_parser_export_accepts_dry_run():
+    ns = build_parser().parse_args(
+        ["export", "--doc", "D", "--domain", "kicad", "--dry-run"])
+    assert ns.command == "export"
+    assert ns.dry_run is True
+
+
+def test_parser_migrate_and_publish():
+    p = build_parser()
+    m = p.parse_args(["migrate", "--doc", "D", "--domain", "kicad"])
+    assert m.command == "migrate"
+    pub = p.parse_args(
+        ["publish", "--snapshot", "exports/kicad.x.jsonl",
+         "--hf-dataset", "Ailiance-fr/mascarade-kicad-dataset",
+         "--filename", "kicad_chat.jsonl"])
+    assert pub.command == "publish"
+    assert pub.hf_dataset == "Ailiance-fr/mascarade-kicad-dataset"
+
+
+def test_resolve_doc_prefers_explicit_arg():
+    assert resolve_doc("explicit-id") == "explicit-id"
+
+
+def test_resolve_doc_errors_when_unset(monkeypatch):
+    monkeypatch.delenv("GRIST_DOC_TRAINING", raising=False)
+    monkeypatch.setattr("mascarade_eval.grist.cli.load_doc_id",
+                        lambda name: None)
+    with pytest.raises(SystemExit):
+        resolve_doc(None)

From 50c3c0edd0a8652eb84bf4eb71cfff37bc58d244 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 12:56:25 +0200
Subject: [PATCH 10/24] fix(grist): clean exit on missing ingest file

---
 mascarade-eval/mascarade_eval/grist/cli.py | 13 +++++++++++--
 mascarade-eval/tests/test_grist_cli.py     |  6 ++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py
index aab2795..d92b0d5 100644
--- a/mascarade-eval/mascarade_eval/grist/cli.py
+++ b/mascarade-eval/mascarade_eval/grist/cli.py
@@ -47,7 +47,10 @@ def build_parser() -> argparse.ArgumentParser:
 
 
 def resolve_doc(doc_arg: str | None) -> str:
-    """Return the doc ID from --doc or the GRIST_DOC_TRAINING env/file value."""
+    """Return the doc ID from --doc or the GRIST_DOC_TRAINING env/file value.
+
+    Exits the program (sys.exit) if neither source provides a doc ID.
+    """
     if doc_arg:
         return doc_arg
     doc = load_doc_id("GRIST_DOC_TRAINING")
@@ -57,8 +60,14 @@ def resolve_doc(doc_arg: str | None) -> str:
 
 
 def _ingest_jsonl_rows(domain: str, jsonl_path: str) -> list[dict]:
+    try:
+        text = Path(jsonl_path).read_text(encoding="utf-8")
+    except FileNotFoundError:
+        sys.exit(f"file not found: {jsonl_path}")
+    except UnicodeDecodeError as exc:
+        sys.exit(f"cannot decode {jsonl_path}: {exc}")
     rows: list[dict] = []
-    for line in Path(jsonl_path).read_text(encoding="utf-8").splitlines():
+    for line in text.splitlines():
         line = line.strip()
         if not line:
             continue
diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py
index 0886b21..b131364 100644
--- a/mascarade-eval/tests/test_grist_cli.py
+++ b/mascarade-eval/tests/test_grist_cli.py
@@ -41,3 +41,9 @@ def test_resolve_doc_errors_when_unset(monkeypatch):
                         lambda name: None)
     with pytest.raises(SystemExit):
         resolve_doc(None)
+
+
+def test_ingest_jsonl_rows_exits_on_missing_file(tmp_path):
+    from mascarade_eval.grist.cli import _ingest_jsonl_rows
+    with pytest.raises(SystemExit):
+        _ingest_jsonl_rows("kicad", str(tmp_path / "does-not-exist.jsonl"))

From 3423aaaaf9ac84c3ab3d7641475ff5415fcb68a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:04:21 +0200
Subject: [PATCH 11/24] test(grist): add round-trip check and docs

---
 mascarade-eval/mascarade_eval/grist/README.md | 33 +++++++++++++++++
 mascarade-eval/tests/test_grist_roundtrip.py  | 37 +++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 mascarade-eval/mascarade_eval/grist/README.md
 create mode 100644 mascarade-eval/tests/test_grist_roundtrip.py

diff --git a/mascarade-eval/mascarade_eval/grist/README.md b/mascarade-eval/mascarade_eval/grist/README.md
new file mode 100644
index 0000000..20998e7
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/README.md
@@ -0,0 +1,33 @@
+# mascarade_eval.grist — Grist-backed dataset management
+
+Grist is the canonical source of truth for the mascarade training corpus.
+Mining ingests in insert-only mode (edits made in Grist are never
+overwritten); training and HF publication consume a deterministic export.
+
+## One-time setup
+
+1. Create an empty Grist doc "Mascarade Training" at grist.saillant.cc.
+2. Add `GRIST_DOC_TRAINING=<doc-id>` to `~/.config/electron-rare/grist.env`
+   (the file already holds `GRIST_API_KEY`).
+
+## Commands
+
+Run with `uv run python -m mascarade_eval.grist.cli <subcommand>`.
+
+- `migrate --domain kicad` — backfill a domain's HF training data into
+  Grist (insert-only). Run once per domain to seed the doc.
+- `ingest --domain kicad --jsonl mine.jsonl` — insert-only ingest of a
+  new mining/curation file. Existing rows are never touched.
+- `export --domain kicad` — write a hashed `.jsonl` snapshot to
+  `exports/` and log a row in the `Exports` table.
+- `publish --snapshot exports/kicad.<ts>.jsonl --hf-dataset
+  Ailiance-fr/mascarade-kicad-dataset --filename kicad_chat.jsonl` —
+  upload a snapshot to its HF dataset repo.
+
+Add `--dry-run` to `ingest`, `export`, or `migrate` to preview without
+writing to Grist or disk.
+
+## Human review
+
+Edit rows directly in the Grist UI. To drop an item from future exports,
+tick its `exclure` checkbox — `export` filters those rows out.
diff --git a/mascarade-eval/tests/test_grist_roundtrip.py b/mascarade-eval/tests/test_grist_roundtrip.py
new file mode 100644
index 0000000..0521cf9
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_roundtrip.py
@@ -0,0 +1,37 @@
+# tests/test_grist_roundtrip.py
+import json
+from mascarade_eval.grist.migrate import migrate_domain
+from mascarade_eval.grist.export import export_domain
+
+
+def test_migrate_then_export_round_trips(fake_client, tmp_path):
+    source = [
+        {"messages": [{"role": "user", "content": "Q1"},
+                      {"role": "assistant", "content": "A1"}]},
+        {"messages": [{"role": "system", "content": "S"},
+                      {"role": "user", "content": "Q2"},
+                      {"role": "assistant", "content": "A2"}]},
+    ]
+    client = fake_client(tables=[])
+    migrate_domain(client, "kicad", records=source)
+    report = export_domain(client, "kicad", out_dir=tmp_path)
+
+    assert report["n_items"] == 2
+    out_file = tmp_path / report["output_file"]
+    exported = [json.loads(ln) for ln in out_file.read_text().splitlines()]
+
+    def norm(msgs):
+        return sorted(json.dumps(m, sort_keys=True) for m in msgs)
+
+    source_sets = {tuple(norm(r["messages"])) for r in source}
+    export_sets = {tuple(norm(r["messages"])) for r in exported}
+    assert source_sets == export_sets
+
+
+def test_double_ingest_inserts_zero_the_second_time(fake_client):
+    source = [{"messages": [{"role": "user", "content": "Q"},
+                            {"role": "assistant", "content": "A"}]}]
+    client = fake_client(tables=[])
+    migrate_domain(client, "kicad", records=source)
+    second = migrate_domain(client, "kicad", records=source)
+    assert second["inserted"] == 0

From af8a305f05add0be4460b406f2ba0ae957d009fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:14:41 +0200
Subject: [PATCH 12/24] feat(grist): add review-status constants

---
 .../mascarade_eval/grist/__init__.py          | 24 +++++++++++++-----
 mascarade-eval/tests/test_grist_constants.py  | 25 ++++++++++++++++---
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/mascarade-eval/mascarade_eval/grist/__init__.py b/mascarade-eval/mascarade_eval/grist/__init__.py
index ac27099..360dbec 100644
--- a/mascarade-eval/mascarade_eval/grist/__init__.py
+++ b/mascarade-eval/mascarade_eval/grist/__init__.py
@@ -3,15 +3,16 @@
 
 Grist is the canonical source of truth. Mining ingests in insert-only
 mode (human edits in Grist are never overwritten); training and HF
-publication consume a deterministic export.
+publication consume a deterministic export of human-validated rows.
 """
 from pathlib import Path
 
 GRIST_BASE = "https://grist.saillant.cc/api"
 
-# Known existing doc (held-out eval). The training doc ID is provided at
-# runtime via --doc or the GRIST_DOC_TRAINING env/file value.
-DOC_HELDOUT = "eGbbrpzN3TeLq3sUd2YFA2"
+# Known existing docs. The training doc ID is provided at runtime via
+# --doc or the GRIST_DOC_TRAINING env/file value.
+DOC_HELDOUT = "eGbbrpzN3TeLq3sUd2YFA2"      # ailiance-llm-workflow
+DOC_MASCARADE = "dhyrySCayizD1PNqCNhCPN"    # mascarade-data
 
 KEY_FILE = Path.home() / ".config" / "electron-rare" / "grist.env"
 
@@ -19,10 +20,21 @@
 REGISTRY_TABLE = "Datasets_Registry"
 EXPORTS_TABLE = "Exports"
 
+# Human-review columns appended to every validation-target table.
+REVIEW_COLUMNS = ("review_status", "reviewer", "reviewed_at", "review_note")
+REVIEW_STATUSES = ("pending", "validated", "rejected", "needs_fix")
+REVIEWER_CHOICES = ("clems",)
+
+# Existing tables that receive the review columns, keyed by doc ID.
+REVIEW_TARGETS = {
+    DOC_HELDOUT: ("Heldout_Items", "Datasets"),
+    DOC_MASCARADE: ("Mascarade_Eval_Items", "Bench_31_domains"),
+}
+
 TRAINING_COLUMNS = (
     "item_key", "domain", "system", "user_msg", "assistant_msg",
-    "extra_turns", "source", "exclure", "notes",
-)
+    "extra_turns", "source", "notes",
+) + REVIEW_COLUMNS
 REGISTRY_COLUMNS = (
     "name", "family", "domain", "hf_dataset_id", "license",
     "n_items", "notes",
diff --git a/mascarade-eval/tests/test_grist_constants.py b/mascarade-eval/tests/test_grist_constants.py
index 0625521..af5e5d5 100644
--- a/mascarade-eval/tests/test_grist_constants.py
+++ b/mascarade-eval/tests/test_grist_constants.py
@@ -5,19 +5,36 @@
 def test_constants_present():
     assert grist.GRIST_BASE == "https://grist.saillant.cc/api"
     assert grist.DOC_HELDOUT == "eGbbrpzN3TeLq3sUd2YFA2"
+    assert grist.DOC_MASCARADE == "dhyrySCayizD1PNqCNhCPN"
     assert grist.TRAINING_TABLE == "Mascarade_Training"
     assert grist.REGISTRY_TABLE == "Datasets_Registry"
     assert grist.EXPORTS_TABLE == "Exports"
 
 
-def test_training_columns_shape():
+def test_review_constants():
+    assert grist.REVIEW_COLUMNS == (
+        "review_status", "reviewer", "reviewed_at", "review_note")
+    assert grist.REVIEW_STATUSES == (
+        "pending", "validated", "rejected", "needs_fix")
+    assert grist.REVIEWER_CHOICES == ("clems",)
+
+
+def test_review_targets_cover_both_docs():
+    assert grist.REVIEW_TARGETS == {
+        grist.DOC_HELDOUT: ("Heldout_Items", "Datasets"),
+        grist.DOC_MASCARADE: ("Mascarade_Eval_Items", "Bench_31_domains"),
+    }
+
+
+def test_training_columns_end_with_review_columns():
     assert grist.TRAINING_COLUMNS == (
         "item_key", "domain", "system", "user_msg", "assistant_msg",
-        "extra_turns", "source", "exclure", "notes",
+        "extra_turns", "source", "notes",
+        "review_status", "reviewer", "reviewed_at", "review_note",
     )
-    assert "exclure" in grist.TRAINING_COLUMNS
+    assert "exclure" not in grist.TRAINING_COLUMNS
+    assert grist.TRAINING_COLUMNS[-4:] == grist.REVIEW_COLUMNS
 
 
 def test_exports_dir_under_repo_root():
-    # EXPORTS_DIR sits next to the heldout/ dir at the repo root.
     assert grist.EXPORTS_DIR.name == "exports"

From 40bd1b293b8dc9aa855fb58e2cda65bf00f35043 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:15:23 +0200
Subject: [PATCH 13/24] refactor(grist): producers write review_status

---
 mascarade-eval/mascarade_eval/grist/cli.py        | 2 +-
 mascarade-eval/mascarade_eval/grist/migrate.py    | 2 +-
 mascarade-eval/tests/test_grist_migrate_domain.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py
index d92b0d5..7204a24 100644
--- a/mascarade-eval/mascarade_eval/grist/cli.py
+++ b/mascarade-eval/mascarade_eval/grist/cli.py
@@ -85,8 +85,8 @@ def _ingest_jsonl_rows(domain: str, jsonl_path: str) -> list[dict]:
             "assistant_msg": flat["assistant_msg"],
             "extra_turns": flat["extra_turns"],
             "source": record.get("source", ""),
-            "exclure": False,
             "notes": "",
+            "review_status": "pending",
         })
     return rows
 
diff --git a/mascarade-eval/mascarade_eval/grist/migrate.py b/mascarade-eval/mascarade_eval/grist/migrate.py
index c804030..a36aa19 100644
--- a/mascarade-eval/mascarade_eval/grist/migrate.py
+++ b/mascarade-eval/mascarade_eval/grist/migrate.py
@@ -96,8 +96,8 @@ def _to_training_row(domain: str, record: dict) -> dict:
         "assistant_msg": flat["assistant_msg"],
         "extra_turns": flat["extra_turns"],
         "source": f"{HF_ORG}/mascarade-{domain}-dataset",
-        "exclure": False,
         "notes": "",
+        "review_status": "pending",
     }
 
 
diff --git a/mascarade-eval/tests/test_grist_migrate_domain.py b/mascarade-eval/tests/test_grist_migrate_domain.py
index 64a365e..400c8f4 100644
--- a/mascarade-eval/tests/test_grist_migrate_domain.py
+++ b/mascarade-eval/tests/test_grist_migrate_domain.py
@@ -17,7 +17,7 @@ def test_migrate_domain_ingests_flattened_rows(fake_client):
     assert {r["user_msg"] for r in added} == {"Q1", "Q2"}
     assert all(r["domain"] == "kicad" for r in added)
     assert all(r["item_key"].startswith("kicad-") for r in added)
-    assert all(r["exclure"] is False for r in added)
+    assert all(r["review_status"] == "pending" for r in added)
 
 
 def test_migrate_domain_is_idempotent(fake_client):

From f4b6158cf923348103ad512fd6c27f0f03e2d2c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:16:19 +0200
Subject: [PATCH 14/24] feat(grist): add column DDL to client

---
 mascarade-eval/mascarade_eval/grist/client.py | 36 +++++++++----
 mascarade-eval/tests/test_grist_client.py     | 51 ++++++++++++++++---
 2 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/mascarade-eval/mascarade_eval/grist/client.py b/mascarade-eval/mascarade_eval/grist/client.py
index 00d55b3..6340c9b 100644
--- a/mascarade-eval/mascarade_eval/grist/client.py
+++ b/mascarade-eval/mascarade_eval/grist/client.py
@@ -8,18 +8,23 @@
 import urllib.error
 import urllib.request
 
-from . import GRIST_BASE, KEY_FILE
+from . import GRIST_BASE, KEY_FILE, REVIEW_STATUSES, REVIEWER_CHOICES
 
-_BOOL_COLS = {"exclure"}
 _INT_COLS = {"n_items", "n_rows"}
+_CHOICE_COLS = {
+    "review_status": REVIEW_STATUSES,
+    "reviewer": REVIEWER_CHOICES,
+}
 
 
-def _col_type(name: str) -> str:
-    if name in _BOOL_COLS:
-        return "Bool"
+def _col_fields(name: str) -> dict:
+    """Grist column `fields` payload for a column id (label/type/options)."""
+    if name in _CHOICE_COLS:
+        opts = json.dumps({"choices": list(_CHOICE_COLS[name])})
+        return {"label": name, "type": "Choice", "widgetOptions": opts}
     if name in _INT_COLS:
-        return "Int"
-    return "Text"
+        return {"label": name, "type": "Int"}
+    return {"label": name, "type": "Text"}
 
 
 def load_grist_key() -> str:
@@ -63,7 +68,7 @@ def _http_transport(method: str, url: str, key: str, body: dict | None) -> dict:
 
 
 class GristClient:
-    """Records-level access to one Grist document."""
+    """Records- and column-level access to one Grist document."""
 
     def __init__(self, doc_id: str, key: str, transport=_http_transport):
         self.doc_id = doc_id
@@ -82,8 +87,7 @@ def list_tables(self) -> set[str]:
         return {t["id"] for t in resp.get("tables", [])}
 
     def create_table(self, table: str, columns: tuple[str, ...]) -> None:
-        cols = [{"id": c, "fields": {"label": c, "type": _col_type(c)}}
-                for c in columns]
+        cols = [{"id": c, "fields": _col_fields(c)} for c in columns]
         self._api("POST", f"/docs/{self.doc_id}/tables",
                   {"tables": [{"id": table, "columns": cols}]})
 
@@ -91,6 +95,18 @@ def ensure_table(self, table: str, columns: tuple[str, ...]) -> None:
         if table not in self.list_tables():
             self.create_table(table, columns)
 
+    def list_columns(self, table: str) -> set[str]:
+        resp = self._api(
+            "GET", f"/docs/{self.doc_id}/tables/{table}/columns")
+        return {c["id"] for c in resp.get("columns", [])}
+
+    def add_columns(self, table: str, columns: tuple[str, ...]) -> None:
+        if not columns:
+            return
+        cols = [{"id": c, "fields": _col_fields(c)} for c in columns]
+        self._api("POST", f"/docs/{self.doc_id}/tables/{table}/columns",
+                  {"columns": cols})
+
     def fetch_records(self, table: str) -> list[dict]:
         resp = self._api("GET", f"/docs/{self.doc_id}/tables/{table}/records")
         return [{"_id": r["id"], **r["fields"]} for r in resp.get("records", [])]
diff --git a/mascarade-eval/tests/test_grist_client.py b/mascarade-eval/tests/test_grist_client.py
index 0bbe9a1..9af0fd0 100644
--- a/mascarade-eval/tests/test_grist_client.py
+++ b/mascarade-eval/tests/test_grist_client.py
@@ -8,10 +8,14 @@ def transport(method, url, key, body):
         log.append((method, url, body))
         if method == "GET" and url.endswith("/tables"):
             return {"tables": [{"id": "Existing"}]}
+        if method == "GET" and url.endswith("/columns"):
+            return {"columns": [{"id": "item_key"}, {"id": "domain"}]}
         if method == "GET" and "/records" in url:
             return {"records": [
-                {"id": 1, "fields": {"item_key": "k1", "exclure": False}},
-                {"id": 2, "fields": {"item_key": "k2", "exclure": True}},
+                {"id": 1, "fields": {"item_key": "k1",
+                                     "review_status": "pending"}},
+                {"id": 2, "fields": {"item_key": "k2",
+                                     "review_status": "validated"}},
             ]}
         return {}
     return transport
@@ -29,8 +33,8 @@ def test_fetch_records_flattens_id_into_fields():
     c = GristClient("doc1", "key1", transport=_recording_transport([]))
     rows = c.fetch_records("Mascarade_Training")
     assert rows == [
-        {"_id": 1, "item_key": "k1", "exclure": False},
-        {"_id": 2, "item_key": "k2", "exclure": True},
+        {"_id": 1, "item_key": "k1", "review_status": "pending"},
+        {"_id": 2, "item_key": "k2", "review_status": "validated"},
     ]
 
 
@@ -52,14 +56,45 @@ def test_add_records_noop_on_empty():
     assert log == []
 
 
-def test_create_table_types_exclure_as_bool():
+def test_create_table_assigns_column_types():
     log = []
     c = GristClient("doc1", "key1", transport=_recording_transport(log))
-    c.create_table("T", ("item_key", "exclure", "n_items"))
+    c.create_table("T", ("item_key", "n_items", "review_status"))
     method, url, body = log[-1]
     assert method == "POST"
-    cols = {col["id"]: col["fields"]["type"] for col in body["tables"][0]["columns"]}
-    assert cols == {"item_key": "Text", "exclure": "Bool", "n_items": "Int"}
+    cols = {col["id"]: col["fields"]["type"]
+            for col in body["tables"][0]["columns"]}
+    assert cols == {"item_key": "Text", "n_items": "Int",
+                    "review_status": "Choice"}
+
+
+def test_list_columns_returns_ids():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    assert c.list_columns("Heldout_Items") == {"item_key", "domain"}
+    method, url, _ = log[-1]
+    assert method == "GET"
+    assert url.endswith("/docs/doc1/tables/Heldout_Items/columns")
+
+
+def test_add_columns_posts_choice_with_widget_options():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.add_columns("Heldout_Items", ("review_status", "review_note"))
+    method, url, body = log[-1]
+    assert method == "POST"
+    assert url.endswith("/docs/doc1/tables/Heldout_Items/columns")
+    by_id = {col["id"]: col["fields"] for col in body["columns"]}
+    assert by_id["review_status"]["type"] == "Choice"
+    assert "pending" in by_id["review_status"]["widgetOptions"]
+    assert by_id["review_note"]["type"] == "Text"
+
+
+def test_add_columns_noop_on_empty():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.add_columns("T", ())
+    assert log == []
 
 
 def test_load_grist_key_prefers_env(monkeypatch):

From 4a3a071c6e93db3fde6daa0d107ffbe5582173bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:17:09 +0200
Subject: [PATCH 15/24] feat(grist): add review-column schema migration

---
 mascarade-eval/mascarade_eval/grist/schema.py | 34 ++++++++++++++++++
 mascarade-eval/tests/conftest.py              | 12 ++++++-
 mascarade-eval/tests/test_grist_schema.py     | 36 +++++++++++++++++++
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 mascarade-eval/mascarade_eval/grist/schema.py
 create mode 100644 mascarade-eval/tests/test_grist_schema.py

diff --git a/mascarade-eval/mascarade_eval/grist/schema.py b/mascarade-eval/mascarade_eval/grist/schema.py
new file mode 100644
index 0000000..40a6895
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/schema.py
@@ -0,0 +1,34 @@
+# mascarade_eval/grist/schema.py
+"""Add the human-review columns to existing Grist tables (idempotent).
+
+A column already present on a table is never recreated, so re-running
+the migration is safe. New tables created by the pipeline already carry
+the review columns via TRAINING_COLUMNS.
+"""
+from __future__ import annotations
+
+from . import REVIEW_COLUMNS
+
+
+def ensure_review_columns(client, table: str) -> list[str]:
+    """Add any missing review column to one table. Returns columns added."""
+    existing = client.list_columns(table)
+    missing = [c for c in REVIEW_COLUMNS if c not in existing]
+    if missing:
+        client.add_columns(table, tuple(missing))
+    return missing
+
+
+def migrate_doc(client, tables: tuple[str, ...]) -> dict:
+    """Ensure review columns on each table that exists in the document.
+
+    A table absent from the document is reported as None (skipped).
+    """
+    present = client.list_tables()
+    report: dict = {}
+    for table in tables:
+        if table in present:
+            report[table] = ensure_review_columns(client, table)
+        else:
+            report[table] = None
+    return report
diff --git a/mascarade-eval/tests/conftest.py b/mascarade-eval/tests/conftest.py
index ff7c203..2b1f464 100644
--- a/mascarade-eval/tests/conftest.py
+++ b/mascarade-eval/tests/conftest.py
@@ -5,24 +5,34 @@
 class FakeClient:
     """In-memory stand-in for GristClient. Records all writes."""
 
-    def __init__(self, tables=None, records=None):
+    def __init__(self, tables=None, records=None, columns=None):
         self.doc_id = "fake-doc"
         self._tables = set(tables or [])
         self._records = {t: list(rs) for t, rs in (records or {}).items()}
+        self._columns = {t: list(cs) for t, cs in (columns or {}).items()}
         self.created = []
         self.added = {}
+        self.added_columns = {}
 
     def list_tables(self):
         return set(self._tables)
 
     def create_table(self, table, columns):
         self._tables.add(table)
+        self._columns[table] = list(columns)
         self.created.append((table, tuple(columns)))
 
     def ensure_table(self, table, columns):
         if table not in self._tables:
             self.create_table(table, columns)
 
+    def list_columns(self, table):
+        return set(self._columns.get(table, []))
+
+    def add_columns(self, table, columns):
+        self._columns.setdefault(table, []).extend(columns)
+        self.added_columns.setdefault(table, []).extend(columns)
+
     def fetch_records(self, table):
         return [dict(r) for r in self._records.get(table, [])]
 
diff --git a/mascarade-eval/tests/test_grist_schema.py b/mascarade-eval/tests/test_grist_schema.py
new file mode 100644
index 0000000..14ba110
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_schema.py
@@ -0,0 +1,36 @@
+# tests/test_grist_schema.py
+from mascarade_eval.grist import REVIEW_COLUMNS
+from mascarade_eval.grist.schema import ensure_review_columns, migrate_doc
+
+
+def test_ensure_review_columns_adds_all_when_absent(fake_client):
+    client = fake_client(tables=["Heldout_Items"],
+                         columns={"Heldout_Items": ["item_key", "prompt"]})
+    added = ensure_review_columns(client, "Heldout_Items")
+    assert added == list(REVIEW_COLUMNS)
+    assert client.added_columns["Heldout_Items"] == list(REVIEW_COLUMNS)
+
+
+def test_ensure_review_columns_is_idempotent(fake_client):
+    cols = ["item_key", *REVIEW_COLUMNS]
+    client = fake_client(tables=["Heldout_Items"],
+                         columns={"Heldout_Items": cols})
+    added = ensure_review_columns(client, "Heldout_Items")
+    assert added == []
+    assert "Heldout_Items" not in client.added_columns
+
+
+def test_ensure_review_columns_adds_only_missing(fake_client):
+    client = fake_client(
+        tables=["Datasets"],
+        columns={"Datasets": ["domain", "review_status", "reviewer"]})
+    added = ensure_review_columns(client, "Datasets")
+    assert added == ["reviewed_at", "review_note"]
+
+
+def test_migrate_doc_skips_absent_tables(fake_client):
+    client = fake_client(tables=["Heldout_Items"],
+                         columns={"Heldout_Items": ["item_key"]})
+    report = migrate_doc(client, ("Heldout_Items", "Mascarade_Training"))
+    assert report["Heldout_Items"] == list(REVIEW_COLUMNS)
+    assert report["Mascarade_Training"] is None

From 303bc13b42a4c4472be3e1a980582d90d7dbcbce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:17:57 +0200
Subject: [PATCH 16/24] feat(grist): add schema CLI subcommand

---
 mascarade-eval/mascarade_eval/grist/cli.py | 11 +++++++++++
 mascarade-eval/tests/test_grist_cli.py     | 16 ++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py
index 7204a24..e51971d 100644
--- a/mascarade-eval/mascarade_eval/grist/cli.py
+++ b/mascarade-eval/mascarade_eval/grist/cli.py
@@ -43,6 +43,8 @@ def build_parser() -> argparse.ArgumentParser:
     p_pub.add_argument("--hf-dataset", required=True)
     p_pub.add_argument("--filename", required=True)
 
+    sub.add_parser("schema", help="add review columns to existing tables")
+
     return ap
 
 
@@ -99,6 +101,15 @@ def main(argv: list[str] | None = None) -> int:
         print(f"published {args.snapshot} -> {args.hf_dataset}")
         return 0
 
+    if args.command == "schema":
+        from . import REVIEW_TARGETS
+        from .schema import migrate_doc
+        for doc_id, tables in REVIEW_TARGETS.items():
+            doc_client = GristClient.from_env(doc_id)
+            report = migrate_doc(doc_client, tables)
+            print(f"schema {doc_id}: {report}")
+        return 0
+
     client = GristClient.from_env(resolve_doc(args.doc))
 
     if args.command == "ingest":
diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py
index b131364..6d5213c 100644
--- a/mascarade-eval/tests/test_grist_cli.py
+++ b/mascarade-eval/tests/test_grist_cli.py
@@ -47,3 +47,19 @@ def test_ingest_jsonl_rows_exits_on_missing_file(tmp_path):
     from mascarade_eval.grist.cli import _ingest_jsonl_rows
     with pytest.raises(SystemExit):
         _ingest_jsonl_rows("kicad", str(tmp_path / "does-not-exist.jsonl"))
+
+
+def test_parser_accepts_schema_command():
+    ns = build_parser().parse_args(["schema"])
+    assert ns.command == "schema"
+
+
+def test_schema_command_runs_over_review_targets(monkeypatch, fake_client):
+    from mascarade_eval.grist import cli
+    made = fake_client(tables=["Heldout_Items"],
+                       columns={"Heldout_Items": ["item_key"]})
+    monkeypatch.setattr(cli.GristClient, "from_env",
+                        classmethod(lambda c, doc: made))
+    rc = cli.main(["schema"])
+    assert rc == 0
+    assert made.added_columns["Heldout_Items"]

From f942b61b12cbf94994d0583a1ea282e56cd42e1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:20:34 +0200
Subject: [PATCH 17/24] feat(grist): gate export on review_status

export_domain now ships only rows with review_status=validated;
--include-pending re-includes pending rows. Completes the exclure ->
review_status amendment across the round-trip test and the package
README, which the plan's affected-files list had missed.
---
 mascarade-eval/mascarade_eval/grist/README.md |  7 ++-
 mascarade-eval/mascarade_eval/grist/cli.py    |  5 +-
 mascarade-eval/mascarade_eval/grist/export.py | 20 +++++--
 mascarade-eval/tests/test_grist_cli.py        |  6 +++
 mascarade-eval/tests/test_grist_export.py     | 52 ++++++++++++-------
 mascarade-eval/tests/test_grist_roundtrip.py  |  5 +-
 6 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/mascarade-eval/mascarade_eval/grist/README.md b/mascarade-eval/mascarade_eval/grist/README.md
index 20998e7..b4aa209 100644
--- a/mascarade-eval/mascarade_eval/grist/README.md
+++ b/mascarade-eval/mascarade_eval/grist/README.md
@@ -29,5 +29,8 @@ writing to Grist or disk.
 
 ## Human review
 
-Edit rows directly in the Grist UI. To drop an item from future exports,
-tick its `exclure` checkbox — `export` filters those rows out.
+Edit rows directly in the Grist UI. Each row carries a `review_status`
+(`pending` / `validated` / `rejected` / `needs_fix`); `export` ships only
+`validated` rows. Pass `--include-pending` to `export` to also include
+rows still awaiting review. See `docs/grist-native-views-recipe.md` and
+`docs/grist-widget-setup.md` for the review surfaces.
diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py
index e51971d..2fa5fe7 100644
--- a/mascarade-eval/mascarade_eval/grist/cli.py
+++ b/mascarade-eval/mascarade_eval/grist/cli.py
@@ -32,6 +32,8 @@ def build_parser() -> argparse.ArgumentParser:
     p_exp.add_argument("--doc")
     p_exp.add_argument("--domain", required=True)
     p_exp.add_argument("--dry-run", action="store_true")
+    p_exp.add_argument("--include-pending", action="store_true",
+                       help="also export rows still pending review")
 
     p_mig = sub.add_parser("migrate", help="backfill a domain from HF")
     p_mig.add_argument("--doc")
@@ -119,7 +121,8 @@ def main(argv: list[str] | None = None) -> int:
         print(f"ingest {args.domain}: {report}")
     elif args.command == "export":
         report = export_domain(client, args.domain, EXPORTS_DIR,
-                               dry_run=args.dry_run)
+                               dry_run=args.dry_run,
+                               include_pending=args.include_pending)
         print(f"export {args.domain}: {report}")
     elif args.command == "migrate":
         report = migrate_domain(client, args.domain, dry_run=args.dry_run)
diff --git a/mascarade-eval/mascarade_eval/grist/export.py b/mascarade-eval/mascarade_eval/grist/export.py
index ec28cc5..f1095c9 100644
--- a/mascarade-eval/mascarade_eval/grist/export.py
+++ b/mascarade-eval/mascarade_eval/grist/export.py
@@ -31,14 +31,28 @@ def _timestamp() -> str:
     return datetime.datetime.now(datetime.UTC).strftime("%Y%m%dT%H%M%SZ")
 
 
+def _is_exportable(row: dict, include_pending: bool) -> bool:
+    """A row ships only when validated (or pending, if explicitly allowed).
+
+    `rejected` and `needs_fix` rows are always excluded. A row with no
+    review_status is treated as `pending`.
+    """
+    status = row.get("review_status") or "pending"
+    if status == "validated":
+        return True
+    return include_pending and status == "pending"
+
+
 def export_domain(client, domain: str, out_dir: Path,
-                  dry_run: bool = False) -> dict:
-    """Export one domain's non-excluded training rows to a hashed snapshot.
+                  dry_run: bool = False,
+                  include_pending: bool = False) -> dict:
+    """Export one domain's human-validated training rows to a hashed snapshot.
 
     Returns a report dict matching the Exports row written to Grist.
     """
     rows = [r for r in client.fetch_records(TRAINING_TABLE)
-            if r.get("domain") == domain and not r.get("exclure")]
+            if r.get("domain") == domain
+            and _is_exportable(r, include_pending)]
     payload = canonical_jsonl(
         [(r.get("item_key", ""), rebuild_messages(r)) for r in rows])
     digest = content_hash(payload)
diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py
index 6d5213c..c8a1824 100644
--- a/mascarade-eval/tests/test_grist_cli.py
+++ b/mascarade-eval/tests/test_grist_cli.py
@@ -63,3 +63,9 @@ def test_schema_command_runs_over_review_targets(monkeypatch, fake_client):
     rc = cli.main(["schema"])
     assert rc == 0
     assert made.added_columns["Heldout_Items"]
+
+
+def test_parser_export_accepts_include_pending():
+    ns = build_parser().parse_args(
+        ["export", "--doc", "D", "--domain", "kicad", "--include-pending"])
+    assert ns.include_pending is True
diff --git a/mascarade-eval/tests/test_grist_export.py b/mascarade-eval/tests/test_grist_export.py
index f9baea9..3629308 100644
--- a/mascarade-eval/tests/test_grist_export.py
+++ b/mascarade-eval/tests/test_grist_export.py
@@ -7,6 +7,13 @@
 )
 
 
+def _row(key, status, q="Q", a="A"):
+    return {"_id": key, "item_key": f"kicad-{key}", "domain": "kicad",
+            "user_msg": q, "assistant_msg": a, "system": "",
+            "extra_turns": "", "source": "", "notes": "",
+            "review_status": status}
+
+
 def test_canonical_jsonl_sorts_by_key():
     keyed = [("b", {"v": 2}), ("a", {"v": 1})]
     lines = canonical_jsonl(keyed).splitlines()
@@ -22,7 +29,7 @@ def test_canonical_jsonl_is_order_independent():
 
 def test_canonical_jsonl_omits_the_sort_key_from_output():
     text = canonical_jsonl([("x", {"v": 1})])
-    assert json.loads(text) == {"v": 1}  # no "x", no item_key
+    assert json.loads(text) == {"v": 1}
 
 
 def test_content_hash_stable():
@@ -31,38 +38,46 @@ def test_content_hash_stable():
     assert len(content_hash(text)) == 64
 
 
-def test_export_domain_filters_excluded_and_writes_file(fake_client, tmp_path):
+def test_export_domain_ships_only_validated_rows(fake_client, tmp_path):
     client = fake_client(
         tables=[TRAINING_TABLE],
         records={TRAINING_TABLE: [
-            {"_id": 1, "item_key": "kicad-1", "domain": "kicad",
-             "user_msg": "Q1", "assistant_msg": "A1", "system": "",
-             "extra_turns": "", "source": "", "exclure": False, "notes": ""},
-            {"_id": 2, "item_key": "kicad-2", "domain": "kicad",
-             "user_msg": "Q2", "assistant_msg": "A2", "system": "",
-             "extra_turns": "", "source": "", "exclure": True, "notes": ""},
+            _row(1, "validated", q="Q1", a="A1"),
+            _row(2, "rejected", q="Q2", a="A2"),
+            _row(3, "pending", q="Q3", a="A3"),
+            _row(4, "needs_fix", q="Q4", a="A4"),
         ]},
     )
     report = export_domain(client, "kicad", out_dir=tmp_path)
-    assert report["n_items"] == 1  # the excluded row is dropped
+    assert report["n_items"] == 1  # only the validated row
     out_file = tmp_path / report["output_file"]
-    assert out_file.exists()
     written = [json.loads(ln) for ln in out_file.read_text().splitlines()]
     assert written == [{"messages": [
         {"role": "user", "content": "Q1"},
         {"role": "assistant", "content": "A1"},
     ]}]
-    assert client.added[EXPORTS_TABLE][0]["domain"] == "kicad"
     assert client.added[EXPORTS_TABLE][0]["content_hash"] == report["content_hash"]
 
 
-def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path):
+def test_export_domain_include_pending_adds_pending_only(fake_client, tmp_path):
     client = fake_client(
         tables=[TRAINING_TABLE],
         records={TRAINING_TABLE: [
-            {"_id": 1, "item_key": "kicad-1", "domain": "kicad",
-             "user_msg": "Q", "assistant_msg": "A", "system": "",
-             "extra_turns": "", "exclure": False}]},
+            _row(1, "validated"),
+            _row(2, "pending"),
+            _row(3, "rejected"),
+            _row(4, ""),  # missing status -> treated as pending
+        ]},
+    )
+    report = export_domain(client, "kicad", out_dir=tmp_path,
+                           include_pending=True)
+    assert report["n_items"] == 3  # validated + pending + empty, not rejected
+
+
+def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [_row(1, "validated")]},
     )
     report = export_domain(client, "kicad", out_dir=tmp_path, dry_run=True)
     assert report["n_items"] == 1
@@ -74,10 +89,7 @@ def test_export_domain_removes_file_when_grist_logging_fails(
         fake_client, tmp_path):
     client = fake_client(
         tables=[TRAINING_TABLE],
-        records={TRAINING_TABLE: [
-            {"_id": 1, "item_key": "kicad-1", "domain": "kicad",
-             "user_msg": "Q", "assistant_msg": "A", "system": "",
-             "extra_turns": "", "exclure": False}]},
+        records={TRAINING_TABLE: [_row(1, "validated")]},
     )
 
     def boom(table, rows):
@@ -86,4 +98,4 @@ def boom(table, rows):
     client.add_records = boom
     with pytest.raises(RuntimeError, match="grist down"):
         export_domain(client, "kicad", out_dir=tmp_path)
-    assert list(tmp_path.iterdir()) == []  # no orphaned snapshot file
+    assert list(tmp_path.iterdir()) == []
diff --git a/mascarade-eval/tests/test_grist_roundtrip.py b/mascarade-eval/tests/test_grist_roundtrip.py
index 0521cf9..eefbaee 100644
--- a/mascarade-eval/tests/test_grist_roundtrip.py
+++ b/mascarade-eval/tests/test_grist_roundtrip.py
@@ -14,7 +14,10 @@ def test_migrate_then_export_round_trips(fake_client, tmp_path):
     ]
     client = fake_client(tables=[])
     migrate_domain(client, "kicad", records=source)
-    report = export_domain(client, "kicad", out_dir=tmp_path)
+    # Migrated rows start as review_status=pending; include them so the
+    # round-trip exercises message semantics independent of review state.
+    report = export_domain(client, "kicad", out_dir=tmp_path,
+                           include_pending=True)
 
     assert report["n_items"] == 2
     out_file = tmp_path / report["output_file"]

From 7546cee3116728b9a6e54c105ac41a34ec75a51c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:21:10 +0200
Subject: [PATCH 18/24] docs(grist): add native views and form recipe

---
 .../docs/grist-native-views-recipe.md         | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 mascarade-eval/docs/grist-native-views-recipe.md

diff --git a/mascarade-eval/docs/grist-native-views-recipe.md b/mascarade-eval/docs/grist-native-views-recipe.md
new file mode 100644
index 0000000..2f8857d
--- /dev/null
+++ b/mascarade-eval/docs/grist-native-views-recipe.md
@@ -0,0 +1,71 @@
+# Grist native review views — operator recipe
+
+Manual Grist UI steps for the parts of the human-review layer that are
+not API-scriptable. Run the schema migration first
+(`python -m mascarade_eval.grist.cli schema`) so the review columns
+exist.
+
+## 1. review_status choice colors
+
+For each table carrying `review_status` (`Heldout_Items`, `Datasets`
+in doc *ailiance-llm-workflow*; `Mascarade_Eval_Items`,
+`Bench_31_domains` in doc *mascarade-data*, plus `Mascarade_Training`):
+
+1. Open the table, click the `review_status` column header → **Column
+   options**.
+2. Under **CHOICES**, confirm the four values are present: `pending`,
+   `validated`, `rejected`, `needs_fix`.
+3. Set the chip color of each: pending = grey `#E8E8E8`,
+   validated = green `#C6E5B3`, rejected = red `#F2B5B5`,
+   needs_fix = amber `#F5D9A6`.
+
+## 2. Bench_31_domains review page (doc mascarade-data)
+
+1. **Add Page** → name it `Bench review`.
+2. Add a **Table** widget bound to `Bench_31_domains`.
+3. Add a filter on `review_status` and a second on `domain`; save the
+   view so the filters persist.
+4. Conditional formatting (column header → **Column options** →
+   **Add conditional style**):
+   - `judge_score`: red when `$judge_score < 50`, amber when
+     `$judge_score < 70`, green otherwise.
+   - `validator_score`: red when `$validator_score < 50`, green when
+     `$validator_score >= 70`.
+   - `ppl`: red when `$ppl > 20`, amber when `$ppl > 10`.
+5. Add a **Card List** widget on the same page bound to
+   `Bench_31_domains`, linked to the table widget, showing `model`,
+   `domain`, `judge_score`, `judge_rationale`, `validator_score`,
+   `review_status`, `reviewer`, `review_note` — this is the per-row
+   review surface.
+
+## 3. Datasets review view (doc ailiance-llm-workflow)
+
+1. **Add Page** → `Datasets review`.
+2. Add a **Table** widget bound to `Datasets`, filtered on
+   `review_status`.
+3. Show `domain`, `name`, `n_rows`, `license`, `hf_dataset_id`,
+   `review_status`, `reviewer`, `review_note`.
+
+## 4. Read-only scoreboards
+
+For `Bench_public`, `Bench_niches_ppl`, `Bench_gateway`,
+`Bench_lift_v1`, `Bench_lift_v2`: add one page `Scoreboards` with a
+Table widget per table. Apply conditional formatting on the score
+columns (green high / red low) as in section 2. No review columns —
+these tables are reference only.
+
+## 5. Bench entry form (doc mascarade-data)
+
+1. **Add Page** → `Bench entry`.
+2. Add a **Form** widget bound to `Bench_31_domains`.
+3. Keep only these fields on the form: `model`, `domain`, `ppl`,
+   `task_score`, `task_metric`, `judge_score`, `source`, `date`.
+   Remove pipeline-only fields (`validator_image_digest`, `run_id`,
+   `host`, `runtime_s`, `tokens_per_s`, …).
+4. Click **Publish** and copy the share URL — this is the manual
+   bench-result entry form. Automated runs keep writing via the API.
+
+## 6. Clean-up
+
+Delete the empty default `Table1` (columns A/B/C) in each of the three
+documents.

From 5f1ba2adab891f909a07e5edca3242c89a1d6b30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:22:02 +0200
Subject: [PATCH 19/24] feat(grist): add review console widget

---
 .../widgets/review-console/index.html         | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 mascarade-eval/widgets/review-console/index.html

diff --git a/mascarade-eval/widgets/review-console/index.html b/mascarade-eval/widgets/review-console/index.html
new file mode 100644
index 0000000..0032f4f
--- /dev/null
+++ b/mascarade-eval/widgets/review-console/index.html
@@ -0,0 +1,141 @@
+<!doctype html>
+<html lang="fr">
+<head>
+<meta charset="utf-8">
+<title>Review Console</title>
+<script src="https://grist.saillant.cc/grist-plugin-api.js"></script>
+<style>
+  body { font: 14px/1.5 system-ui, sans-serif; margin: 0; padding: 16px;
+         color: #1a1a1a; }
+  #progress { color: #666; font-size: 13px; margin-bottom: 8px; }
+  .field-label { font-weight: 600; font-size: 12px; text-transform: uppercase;
+                 color: #888; margin-top: 12px; }
+  .field-value { white-space: pre-wrap; word-break: break-word;
+                 background: #f6f6f6; border-radius: 6px; padding: 8px; }
+  #context { color: #555; font-size: 13px; margin-bottom: 4px; }
+  #note { width: 100%; box-sizing: border-box; margin-top: 12px; padding: 6px; }
+  #buttons { display: flex; gap: 8px; margin-top: 12px; }
+  button { flex: 1; padding: 10px; border: 0; border-radius: 6px;
+           font-size: 14px; cursor: pointer; }
+  .validate { background: #C6E5B3; }
+  .reject   { background: #F2B5B5; }
+  .needsfix { background: #F5D9A6; }
+  .skip     { background: #E8E8E8; }
+  #done { font-size: 16px; color: #2a7; padding: 24px 0; }
+  #empty { color: #888; padding: 24px 0; }
+</style>
+</head>
+<body>
+  <div id="progress"></div>
+  <div id="card" hidden>
+    <div id="context"></div>
+    <div class="field-label">Item</div>
+    <div class="field-value" id="primary"></div>
+    <div class="field-label" id="secondary-label">Référence</div>
+    <div class="field-value" id="secondary"></div>
+    <input id="note" placeholder="note de revue (optionnelle)">
+    <div id="buttons">
+      <button class="validate" id="b-validate">✓ Valider (V)</button>
+      <button class="reject"   id="b-reject">✗ Rejeter (R)</button>
+      <button class="needsfix" id="b-needsfix">~ À corriger (F)</button>
+      <button class="skip"     id="b-skip">→ Passer (S)</button>
+    </div>
+  </div>
+  <div id="done" hidden>Tous les items en attente sont revus ✓</div>
+  <div id="empty" hidden>Aucune ligne dans cette table.</div>
+<script>
+"use strict";
+const REVIEWER = "clems";   // adjust to the reviewer's Grist choice value
+
+let rows = [];      // [{id, status, primary, secondary, context}]
+let queue = [];     // ids still pending
+let cursor = 0;
+
+const $ = (id) => document.getElementById(id);
+
+function rebuild(records) {
+  rows = records.map((rec) => {
+    const m = grist.mapColumnNames(rec) || {};
+    let ctx = m.context;
+    if (Array.isArray(ctx)) ctx = ctx.filter(Boolean).join(" · ");
+    return {
+      id: rec.id,
+      status: rec.review_status || "pending",
+      primary: m.primary == null ? "" : String(m.primary),
+      secondary: m.secondary == null ? "" : String(m.secondary),
+      context: ctx == null ? "" : String(ctx),
+    };
+  });
+  queue = rows.filter((r) => r.status === "pending").map((r) => r.id);
+  if (cursor >= queue.length) cursor = 0;
+  render();
+}
+
+function render() {
+  const total = rows.length;
+  const reviewed = rows.filter((r) => r.status !== "pending").length;
+  $("progress").textContent = total === 0 ? ""
+    : `revus ${reviewed} / ${total} — en attente ${queue.length}`;
+  $("empty").hidden = total !== 0;
+  const item = queue.length
+    ? rows.find((r) => r.id === queue[cursor]) : null;
+  $("card").hidden = !item;
+  $("done").hidden = !(total > 0 && !item);
+  if (!item) return;
+  $("context").textContent = item.context;
+  $("primary").textContent = item.primary;
+  $("secondary").textContent = item.secondary;
+  $("secondary-label").hidden = !item.secondary;
+  $("secondary").hidden = !item.secondary;
+}
+
+async function decide(status) {
+  if (!queue.length) return;
+  const id = queue[cursor];
+  await grist.selectedTable.update({
+    id,
+    fields: {
+      review_status: status,
+      reviewer: REVIEWER,
+      reviewed_at: new Date().toISOString(),
+      review_note: $("note").value,
+    },
+  });
+  $("note").value = "";
+  // grist.onRecords refires after the update and rebuilds the queue.
+}
+
+function skip() {
+  if (!queue.length) return;
+  cursor = (cursor + 1) % queue.length;
+  render();
+}
+
+$("b-validate").onclick = () => decide("validated");
+$("b-reject").onclick = () => decide("rejected");
+$("b-needsfix").onclick = () => decide("needs_fix");
+$("b-skip").onclick = skip;
+
+document.addEventListener("keydown", (e) => {
+  if (e.target.tagName === "INPUT") return;
+  const k = e.key.toLowerCase();
+  if (k === "v") decide("validated");
+  else if (k === "r") decide("rejected");
+  else if (k === "f") decide("needs_fix");
+  else if (k === "s" || e.key === "ArrowRight") skip();
+});
+
+grist.ready({
+  requiredAccess: "full",
+  columns: [
+    { name: "primary", title: "Texte principal (prompt / user_msg)" },
+    { name: "secondary", title: "Référence (reference / assistant_msg)",
+      optional: true },
+    { name: "context", title: "Contexte (domain, source)",
+      optional: true, allowMultiple: true },
+  ],
+});
+grist.onRecords(rebuild);
+</script>
+</body>
+</html>

From 038ba88cdb6a347408e3cee2cdedb345d95c2b50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:22:34 +0200
Subject: [PATCH 20/24] docs(grist): add widget setup recipe

---
 mascarade-eval/docs/grist-widget-setup.md | 67 +++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 mascarade-eval/docs/grist-widget-setup.md

diff --git a/mascarade-eval/docs/grist-widget-setup.md b/mascarade-eval/docs/grist-widget-setup.md
new file mode 100644
index 0000000..607325b
--- /dev/null
+++ b/mascarade-eval/docs/grist-widget-setup.md
@@ -0,0 +1,67 @@
+# Review Console widget — hosting, wiring, smoke test
+
+The widget at `widgets/review-console/index.html` is a static file. It
+must be served over HTTPS and registered in Grist as a Custom URL
+widget.
+
+## 1. Host the static file
+
+Serve the file behind the existing electron-server cloudflared tunnel.
+
+```bash
+# from the repo, on the dev machine
+scp widgets/review-console/index.html \
+    electron-server:/srv/grist-widgets/review-console/index.html
+```
+
+On electron-server, expose `/srv/grist-widgets/` via the existing
+static file server / Caddy / nginx and add a cloudflared route so the
+file is reachable at:
+
+```
+https://grist-widgets.saillant.cc/review-console/index.html
+```
+
+Verify: `curl -sI https://grist-widgets.saillant.cc/review-console/index.html`
+should return `HTTP/2 200`.
+
+> Hosting touches shared infra (cloudflared, electron-server) — confirm
+> with the operator before applying the route.
+
+## 2. Add a review page in Grist
+
+In doc *ailiance-llm-workflow*:
+
+1. **Add Page** → `Heldout review`.
+2. Add a **Custom** widget. Select **Custom URL** and paste
+   `https://grist-widgets.saillant.cc/review-console/index.html`.
+3. Bind the widget to the `Heldout_Items` table.
+4. When prompted, grant the widget **Full document access** (it must
+   write the review columns).
+5. Open the widget's **Column mapping**:
+   - `primary` → `prompt`
+   - `secondary` → `reference`
+   - `context` → `domain`, `source`
+
+Repeat for the future `Mascarade_Training` table (map `primary` →
+`user_msg`, `secondary` → `assistant_msg`) and for
+`Mascarade_Eval_Items` in doc *mascarade-data* (map `primary` →
+`question`, `secondary` → `reference`).
+
+## 3. Smoke-test checklist
+
+On the `Heldout review` page:
+
+- [ ] The progress line shows `revus 0 / 400 — en attente 400`.
+- [ ] The first pending item's prompt and reference render in full.
+- [ ] Pressing `V` writes `review_status = validated`, `reviewer`,
+      `reviewed_at` (ISO-8601) and advances to the next item; the
+      progress counter increments.
+- [ ] Pressing `R` and `F` write `rejected` / `needs_fix`.
+- [ ] A value typed in the note field lands in `review_note` and the
+      field clears after the decision.
+- [ ] `S` / `→` skips without writing.
+- [ ] After every pending row is decided, the widget shows
+      "Tous les items en attente sont revus ✓".
+- [ ] Re-running `python -m mascarade_eval.grist.cli export --domain
+      <d>` ships only the rows marked `validated`.

From e680d678979cf5f8cfa815272c33296d75f001bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 13:29:03 +0200
Subject: [PATCH 21/24] docs(grist): point widget recipe at live URL

---
 mascarade-eval/docs/grist-widget-setup.md | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/mascarade-eval/docs/grist-widget-setup.md b/mascarade-eval/docs/grist-widget-setup.md
index 607325b..0f786d7 100644
--- a/mascarade-eval/docs/grist-widget-setup.md
+++ b/mascarade-eval/docs/grist-widget-setup.md
@@ -6,27 +6,26 @@ widget.
 
 ## 1. Host the static file
 
-Serve the file behind the existing electron-server cloudflared tunnel.
+The widget is served from the `zacus-static` nginx bind-mount on
+electron-server (no new traefik route or cloudflared hostname needed —
+Grist only needs any HTTPS URL).
 
 ```bash
 # from the repo, on the dev machine
+ssh electron-server 'mkdir -p \
+    /home/electron/saillant-sites/zacus-static/review-console'
 scp widgets/review-console/index.html \
-    electron-server:/srv/grist-widgets/review-console/index.html
+    electron-server:/home/electron/saillant-sites/zacus-static/review-console/index.html
 ```
 
-On electron-server, expose `/srv/grist-widgets/` via the existing
-static file server / Caddy / nginx and add a cloudflared route so the
-file is reachable at:
+Live URL (verified `HTTP 200`):
 
 ```
-https://grist-widgets.saillant.cc/review-console/index.html
+https://zacus.saillant.cc/review-console/index.html
 ```
 
-Verify: `curl -sI https://grist-widgets.saillant.cc/review-console/index.html`
-should return `HTTP/2 200`.
-
-> Hosting touches shared infra (cloudflared, electron-server) — confirm
-> with the operator before applying the route.
+To redeploy after editing the widget, re-run the `scp` above — nginx
+serves the mounted directory live, no container restart.
 
 ## 2. Add a review page in Grist
 
@@ -34,7 +33,7 @@ In doc *ailiance-llm-workflow*:
 
 1. **Add Page** → `Heldout review`.
 2. Add a **Custom** widget. Select **Custom URL** and paste
-   `https://grist-widgets.saillant.cc/review-console/index.html`.
+   `https://zacus.saillant.cc/review-console/index.html`.
 3. Bind the widget to the `Heldout_Items` table.
 4. When prompted, grant the widget **Full document access** (it must
    write the review columns).

From 6e4505802693a8f2e3343a9c739103d24a38f2ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 17:03:24 +0200
Subject: [PATCH 22/24] docs(grist): host widget on admin.ailiance.fr

---
 mascarade-eval/docs/grist-widget-setup.md | 47 +++++++++++++++--------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/mascarade-eval/docs/grist-widget-setup.md b/mascarade-eval/docs/grist-widget-setup.md
index 0f786d7..3ba91a0 100644
--- a/mascarade-eval/docs/grist-widget-setup.md
+++ b/mascarade-eval/docs/grist-widget-setup.md
@@ -4,28 +4,43 @@ The widget at `widgets/review-console/index.html` is a static file. It
 must be served over HTTPS and registered in Grist as a Custom URL
 widget.
 
-## 1. Host the static file
+## 1. Hosting
 
-The widget is served from the `zacus-static` nginx bind-mount on
-electron-server (no new traefik route or cloudflared hostname needed —
-Grist only needs any HTTPS URL).
+The widget is served by a dedicated `review-widget` nginx container in
+`/home/electron/saillant-sites/` on electron-server, exposed through
+traefik on the existing `admin.ailiance.fr` hostname under `/review`
+(a `Host && PathPrefix` router — no new cloudflared hostname needed).
 
-```bash
-# from the repo, on the dev machine
-ssh electron-server 'mkdir -p \
-    /home/electron/saillant-sites/zacus-static/review-console'
-scp widgets/review-console/index.html \
-    electron-server:/home/electron/saillant-sites/zacus-static/review-console/index.html
+Compose service (`saillant-sites/docker-compose.yml`):
+
+```yaml
+  review-widget:
+    image: nginx:alpine
+    container_name: review-widget
+    restart: unless-stopped
+    networks: [traefik]
+    labels:
+      - traefik.enable=true
+      - traefik.docker.network=traefik
+      - traefik.http.routers.review-admin.rule=Host(`admin.ailiance.fr`) && PathPrefix(`/review`)
+      - traefik.http.routers.review-admin.entrypoints=websecure
+      - traefik.http.routers.review-admin.tls.certresolver=letsencrypt
+      - traefik.http.routers.review-admin.service=review-widget
+      - traefik.http.services.review-widget.loadbalancer.server.port=80
+    volumes:
+      - ./train-static:/usr/share/nginx/html:ro
 ```
 
-Live URL (verified `HTTP 200`):
+The widget file lives at `saillant-sites/train-static/review/index.html`.
+Redeploy after editing the widget (nginx serves the mount live, no
+restart):
 
-```
-https://zacus.saillant.cc/review-console/index.html
+```bash
+scp widgets/review-console/index.html \
+    electron-server:/home/electron/saillant-sites/train-static/review/index.html
 ```
 
-To redeploy after editing the widget, re-run the `scp` above — nginx
-serves the mounted directory live, no container restart.
+Live URL (verified `HTTP 200`): `https://admin.ailiance.fr/review/`
 
 ## 2. Add a review page in Grist
 
@@ -33,7 +48,7 @@ In doc *ailiance-llm-workflow*:
 
 1. **Add Page** → `Heldout review`.
 2. Add a **Custom** widget. Select **Custom URL** and paste
-   `https://zacus.saillant.cc/review-console/index.html`.
+   `https://admin.ailiance.fr/review/`.
 3. Bind the widget to the `Heldout_Items` table.
 4. When prompted, grant the widget **Full document access** (it must
    write the review columns).

From 794e8498680e68c91b0fe1821ebc3a903477b575 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 17:16:12 +0200
Subject: [PATCH 23/24] fix(grist): widget reads all columns for queue

---
 mascarade-eval/widgets/review-console/index.html | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mascarade-eval/widgets/review-console/index.html b/mascarade-eval/widgets/review-console/index.html
index 0032f4f..434e22a 100644
--- a/mascarade-eval/widgets/review-console/index.html
+++ b/mascarade-eval/widgets/review-console/index.html
@@ -135,7 +135,9 @@
       optional: true, allowMultiple: true },
   ],
 });
-grist.onRecords(rebuild);
+// includeColumns:"all" — the queue logic reads review_status, which is
+// not a mapped column, so the default "shown" set would omit it.
+grist.onRecords(rebuild, { includeColumns: "all" });
 </script>
 </body>
 </html>

From 8a0a8cbc60115b09481bb47b657341d263d0ba16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=27=C3=A9lectron=20rare?=
 <108685187+electron-rare@users.noreply.github.com>
Date: Tue, 19 May 2026 17:21:56 +0200
Subject: [PATCH 24/24] fix(grist): widget reads full table via docApi

---
 .../widgets/review-console/index.html         | 41 +++++++++++++------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/mascarade-eval/widgets/review-console/index.html b/mascarade-eval/widgets/review-console/index.html
index 434e22a..f698881 100644
--- a/mascarade-eval/widgets/review-console/index.html
+++ b/mascarade-eval/widgets/review-console/index.html
@@ -47,23 +47,40 @@
 "use strict";
 const REVIEWER = "clems";   // adjust to the reviewer's Grist choice value
 
+let mapping = null; // column mapping from onRecords (display columns)
 let rows = [];      // [{id, status, primary, secondary, context}]
 let queue = [];     // ids still pending
 let cursor = 0;
 
 const $ = (id) => document.getElementById(id);
 
-function rebuild(records) {
-  rows = records.map((rec) => {
-    const m = grist.mapColumnNames(rec) || {};
-    let ctx = m.context;
-    if (Array.isArray(ctx)) ctx = ctx.filter(Boolean).join(" · ");
+// Grist restricts onRecords/fetchSelectedTable to mapped columns, so the
+// review_* columns are invisible there. fetchTable via docApi (full
+// access) returns every column — that is the only channel that sees
+// review_status. onRecords is kept only to supply the column mapping
+// and to fire on every data change.
+async function refresh() {
+  if (!mapping) return;
+  const tableId = await grist.getSelectedTableId();
+  const data = await grist.docApi.fetchTable(tableId);
+  const ids = data.id || [];
+  const ctxCols = Array.isArray(mapping.context)
+    ? mapping.context
+    : (mapping.context ? [mapping.context] : []);
+  const cell = (name, i) => (name && data[name]) ? data[name][i] : null;
+  rows = ids.map((id, i) => {
+    const ctx = ctxCols
+      .map((c) => (data[c] ? data[c][i] : null))
+      .filter((v) => v != null && v !== "")
+      .join(" · ");
+    const p = cell(mapping.primary, i);
+    const s = cell(mapping.secondary, i);
     return {
-      id: rec.id,
-      status: rec.review_status || "pending",
-      primary: m.primary == null ? "" : String(m.primary),
-      secondary: m.secondary == null ? "" : String(m.secondary),
-      context: ctx == null ? "" : String(ctx),
+      id,
+      status: (data.review_status && data.review_status[i]) || "pending",
+      primary: p == null ? "" : String(p),
+      secondary: s == null ? "" : String(s),
+      context: ctx,
     };
   });
   queue = rows.filter((r) => r.status === "pending").map((r) => r.id);
@@ -135,9 +152,7 @@
       optional: true, allowMultiple: true },
   ],
 });
-// includeColumns:"all" — the queue logic reads review_status, which is
-// not a mapped column, so the default "shown" set would omit it.
-grist.onRecords(rebuild, { includeColumns: "all" });
+grist.onRecords((records, m) => { mapping = m; void refresh(); });
 </script>
 </body>
 </html>