diff --git a/mascarade-eval/docs/grist-native-views-recipe.md b/mascarade-eval/docs/grist-native-views-recipe.md
new file mode 100644
index 0000000..2f8857d
--- /dev/null
+++ b/mascarade-eval/docs/grist-native-views-recipe.md
@@ -0,0 +1,71 @@
+# Grist native review views — operator recipe
+
+Manual Grist UI steps for the parts of the human-review layer that are
+not API-scriptable. Run the schema migration first
+(`python -m mascarade_eval.grist.cli schema`) so the review columns
+exist.
+
+## 1. review_status choice colors
+
+For each table carrying `review_status` (`Heldout_Items`, `Datasets`
+in doc *ailiance-llm-workflow*; `Mascarade_Eval_Items`,
+`Bench_31_domains` in doc *mascarade-data*, plus `Mascarade_Training`):
+
+1. Open the table, click the `review_status` column header → **Column
+   options**.
+2. Under **CHOICES**, confirm the four values are present: `pending`,
+   `validated`, `rejected`, `needs_fix`.
+3. Set the chip color of each: pending = grey `#E8E8E8`,
+   validated = green `#C6E5B3`, rejected = red `#F2B5B5`,
+   needs_fix = amber `#F5D9A6`.
+
+## 2. Bench_31_domains review page (doc mascarade-data)
+
+1. **Add Page** → name it `Bench review`.
+2. Add a **Table** widget bound to `Bench_31_domains`.
+3. Add a filter on `review_status` and a second on `domain`; save the
+   view so the filters persist.
+4. Conditional formatting (column header → **Column options** →
+   **Add conditional style**):
+   - `judge_score`: red when `$judge_score < 50`, amber when
+     `$judge_score < 70`, green otherwise.
+   - `validator_score`: red when `$validator_score < 50`, green when
+     `$validator_score >= 70`.
+   - `ppl`: red when `$ppl > 20`, amber when `$ppl > 10`.
+5. Add a **Card List** widget on the same page bound to
+   `Bench_31_domains`, linked to the table widget, showing `model`,
+   `domain`, `judge_score`, `judge_rationale`, `validator_score`,
+   `review_status`, `reviewer`, `review_note` — this is the per-row
+   review surface.
+
+## 3. Datasets review view (doc ailiance-llm-workflow)
+
+1. **Add Page** → `Datasets review`.
+2. Add a **Table** widget bound to `Datasets`, filtered on
+   `review_status`.
+3. Show `domain`, `name`, `n_rows`, `license`, `hf_dataset_id`,
+   `review_status`, `reviewer`, `review_note`.
+
+## 4. Read-only scoreboards
+
+For `Bench_public`, `Bench_niches_ppl`, `Bench_gateway`,
+`Bench_lift_v1`, `Bench_lift_v2`: add one page `Scoreboards` with a
+Table widget per table. Apply conditional formatting on the score
+columns (green high / red low) as in section 2. No review columns —
+these tables are reference only.
+
+## 5. Bench entry form (doc mascarade-data)
+
+1. **Add Page** → `Bench entry`.
+2. Add a **Form** widget bound to `Bench_31_domains`.
+3. Keep only these fields on the form: `model`, `domain`, `ppl`,
+   `task_score`, `task_metric`, `judge_score`, `source`, `date`.
+   Remove pipeline-only fields (`validator_image_digest`, `run_id`,
+   `host`, `runtime_s`, `tokens_per_s`, …).
+4. Click **Publish** and copy the share URL — this is the manual
+   bench-result entry form. Automated runs keep writing via the API.
+
+## 6. Clean-up
+
+Delete the empty default `Table1` (columns A/B/C) in each of the three
+documents.
diff --git a/mascarade-eval/docs/grist-widget-setup.md b/mascarade-eval/docs/grist-widget-setup.md
new file mode 100644
index 0000000..3ba91a0
--- /dev/null
+++ b/mascarade-eval/docs/grist-widget-setup.md
@@ -0,0 +1,81 @@
+# Review Console widget — hosting, wiring, smoke test
+
+The widget at `widgets/review-console/index.html` is a static file. It
+must be served over HTTPS and registered in Grist as a Custom URL
+widget.
+
+## 1. Hosting
+
+The widget is served by a dedicated `review-widget` nginx container in
+`/home/electron/saillant-sites/` on electron-server, exposed through
+traefik on the existing `admin.ailiance.fr` hostname under `/review`
+(a `Host && PathPrefix` router — no new cloudflared hostname needed).
+
+Compose service (`saillant-sites/docker-compose.yml`):
+
+```yaml
+  review-widget:
+    image: nginx:alpine
+    container_name: review-widget
+    restart: unless-stopped
+    networks: [traefik]
+    labels:
+      - traefik.enable=true
+      - traefik.docker.network=traefik
+      - traefik.http.routers.review-admin.rule=Host(`admin.ailiance.fr`) && PathPrefix(`/review`)
+      - traefik.http.routers.review-admin.entrypoints=websecure
+      - traefik.http.routers.review-admin.tls.certresolver=letsencrypt
+      - traefik.http.routers.review-admin.service=review-widget
+      - traefik.http.services.review-widget.loadbalancer.server.port=80
+    volumes:
+      - ./train-static:/usr/share/nginx/html:ro
+```
+
+The widget file lives at `saillant-sites/train-static/review/index.html`.
+Redeploy after editing the widget (nginx serves the mount live, no
+restart):
+
+```bash
+scp widgets/review-console/index.html \
+    electron-server:/home/electron/saillant-sites/train-static/review/index.html
+```
+
+Live URL (verified `HTTP 200`): `https://admin.ailiance.fr/review/`
+
+## 2. Add a review page in Grist
+
+In doc *ailiance-llm-workflow*:
+
+1. **Add Page** → `Heldout review`.
+2. Add a **Custom** widget. Select **Custom URL** and paste
+   `https://admin.ailiance.fr/review/`.
+3. Bind the widget to the `Heldout_Items` table.
+4. When prompted, grant the widget **Full document access** (it must
+   write the review columns).
+5. Open the widget's **Column mapping**:
+   - `primary` → `prompt`
+   - `secondary` → `reference`
+   - `context` → `domain`, `source`
+
+Repeat for the future `Mascarade_Training` table (map `primary` →
+`user_msg`, `secondary` → `assistant_msg`) and for
+`Mascarade_Eval_Items` in doc *mascarade-data* (map `primary` →
+`question`, `secondary` → `reference`).
+
+## 3. Smoke-test checklist
+
+On the `Heldout review` page:
+
+- [ ] The progress line shows `revus 0 / 400 — en attente 400`.
+- [ ] The first pending item's prompt and reference render in full.
+- [ ] Pressing `V` writes `review_status = validated`, `reviewer`,
+      `reviewed_at` (ISO-8601) and advances to the next item; the
+      progress counter increments.
+- [ ] Pressing `R` and `F` write `rejected` / `needs_fix`.
+- [ ] A value typed in the note field lands in `review_note` and the
+      field clears after the decision.
+- [ ] `S` / `→` skips without writing.
+- [ ] After every pending row is decided, the widget shows
+      "Tous les items en attente sont revus ✓".
+- [ ] Re-running `python -m mascarade_eval.grist.cli export --domain
+      <d>` ships only the rows marked `validated`.
diff --git a/mascarade-eval/mascarade_eval/grist/README.md b/mascarade-eval/mascarade_eval/grist/README.md
new file mode 100644
index 0000000..b4aa209
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/README.md
@@ -0,0 +1,36 @@
+# mascarade_eval.grist — Grist-backed dataset management
+
+Grist is the canonical source of truth for the mascarade training corpus.
+Mining ingests in insert-only mode (edits made in Grist are never
+overwritten); training and HF publication consume a deterministic export.
+
+## One-time setup
+
+1. Create an empty Grist doc "Mascarade Training" at grist.saillant.cc.
+2. Add `GRIST_DOC_TRAINING=<doc-id>` to `~/.config/electron-rare/grist.env`
+   (the file already holds `GRIST_API_KEY`).
+
+## Commands
+
+Run with `uv run python -m mascarade_eval.grist.cli <subcommand>`.
+
+- `migrate --domain kicad` — backfill a domain's HF training data into
+  Grist (insert-only). Run once per domain to seed the doc.
+- `ingest --domain kicad --jsonl mine.jsonl` — insert-only ingest of a
+  new mining/curation file. Existing rows are never touched.
+- `export --domain kicad` — write a hashed `.jsonl` snapshot to
+  `exports/` and log a row in the `Exports` table.
+- `publish --snapshot exports/kicad.<ts>.jsonl --hf-dataset
+  Ailiance-fr/mascarade-kicad-dataset --filename kicad_chat.jsonl` —
+  upload a snapshot to its HF dataset repo.
+
+Add `--dry-run` to `ingest`, `export`, or `migrate` to preview without
+writing to Grist or disk.
+
+## Human review
+
+Edit rows directly in the Grist UI. Each row carries a `review_status`
+(`pending` / `validated` / `rejected` / `needs_fix`); `export` ships only
+`validated` rows. Pass `--include-pending` to `export` to also include
+rows still awaiting review. See `docs/grist-native-views-recipe.md` and
+`docs/grist-widget-setup.md` for the review surfaces.
diff --git a/mascarade-eval/mascarade_eval/grist/__init__.py b/mascarade-eval/mascarade_eval/grist/__init__.py
new file mode 100644
index 0000000..360dbec
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/__init__.py
@@ -0,0 +1,48 @@
+# mascarade_eval/grist/__init__.py
+"""Grist-backed dataset management for the mascarade training corpus.
+
+Grist is the canonical source of truth. Mining ingests in insert-only
+mode (human edits in Grist are never overwritten); training and HF
+publication consume a deterministic export of human-validated rows.
+"""
+from pathlib import Path
+
+GRIST_BASE = "https://grist.saillant.cc/api"
+
+# Known existing docs. The training doc ID is provided at runtime via
+# --doc or the GRIST_DOC_TRAINING env/file value.
+DOC_HELDOUT = "eGbbrpzN3TeLq3sUd2YFA2"      # ailiance-llm-workflow
+DOC_MASCARADE = "dhyrySCayizD1PNqCNhCPN"    # mascarade-data
+
+KEY_FILE = Path.home() / ".config" / "electron-rare" / "grist.env"
+
+TRAINING_TABLE = "Mascarade_Training"
+REGISTRY_TABLE = "Datasets_Registry"
+EXPORTS_TABLE = "Exports"
+
+# Human-review columns appended to every validation-target table.
+REVIEW_COLUMNS = ("review_status", "reviewer", "reviewed_at", "review_note")
+REVIEW_STATUSES = ("pending", "validated", "rejected", "needs_fix")
+REVIEWER_CHOICES = ("clems",)
+
+# Existing tables that receive the review columns, keyed by doc ID.
+REVIEW_TARGETS = {
+    DOC_HELDOUT: ("Heldout_Items", "Datasets"),
+    DOC_MASCARADE: ("Mascarade_Eval_Items", "Bench_31_domains"),
+}
+
+TRAINING_COLUMNS = (
+    "item_key", "domain", "system", "user_msg", "assistant_msg",
+    "extra_turns", "source", "notes",
+) + REVIEW_COLUMNS
+REGISTRY_COLUMNS = (
+    "name", "family", "domain", "hf_dataset_id", "license",
+    "n_items", "notes",
+)
+EXPORTS_COLUMNS = (
+    "export_id", "domain", "created_at", "n_items", "content_hash",
+    "output_file", "hf_dataset_id",
+)
+
+_ROOT = Path(__file__).resolve().parent.parent.parent  # .../mascarade-eval
+EXPORTS_DIR = _ROOT / "exports"
diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py
new file mode 100644
index 0000000..2fa5fe7
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/cli.py
@@ -0,0 +1,134 @@
+# mascarade_eval/grist/cli.py
+"""CLI for Grist-backed dataset management: ingest / export / migrate / publish.
+
+Run: python -m mascarade_eval.grist.cli <subcommand> [options]
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+from . import EXPORTS_DIR, TRAINING_COLUMNS, TRAINING_TABLE
+from .client import GristClient, load_doc_id
+from .export import export_domain
+from .ingest import item_key, ingest_rows
+from .migrate import flatten_messages, migrate_domain
+from .publish import publish_snapshot
+
+
+def build_parser() -> argparse.ArgumentParser:
+    ap = argparse.ArgumentParser(prog="grist-dataset", description=__doc__)
+    sub = ap.add_subparsers(dest="command", required=True)
+
+    p_ing = sub.add_parser("ingest", help="insert-only ingest a .jsonl")
+    p_ing.add_argument("--doc")
+    p_ing.add_argument("--jsonl", required=True)
+    p_ing.add_argument("--domain", required=True)
+    p_ing.add_argument("--dry-run", action="store_true")
+
+    p_exp = sub.add_parser("export", help="export a domain to a snapshot")
+    p_exp.add_argument("--doc")
+    p_exp.add_argument("--domain", required=True)
+    p_exp.add_argument("--dry-run", action="store_true")
+    p_exp.add_argument("--include-pending", action="store_true",
+                       help="also export rows still pending review")
+
+    p_mig = sub.add_parser("migrate", help="backfill a domain from HF")
+    p_mig.add_argument("--doc")
+    p_mig.add_argument("--domain", required=True)
+    p_mig.add_argument("--dry-run", action="store_true")
+
+    p_pub = sub.add_parser("publish", help="upload a snapshot to HF")
+    p_pub.add_argument("--snapshot", required=True)
+    p_pub.add_argument("--hf-dataset", required=True)
+    p_pub.add_argument("--filename", required=True)
+
+    sub.add_parser("schema", help="add review columns to existing tables")
+
+    return ap
+
+
+def resolve_doc(doc_arg: str | None) -> str:
+    """Return the doc ID from --doc or the GRIST_DOC_TRAINING env/file value.
+
+    Exits the program (sys.exit) if neither source provides a doc ID.
+    """
+    if doc_arg:
+        return doc_arg
+    doc = load_doc_id("GRIST_DOC_TRAINING")
+    if not doc:
+        sys.exit("no doc ID: pass --doc or set GRIST_DOC_TRAINING")
+    return doc
+
+
+def _ingest_jsonl_rows(domain: str, jsonl_path: str) -> list[dict]:
+    try:
+        text = Path(jsonl_path).read_text(encoding="utf-8")
+    except FileNotFoundError:
+        sys.exit(f"file not found: {jsonl_path}")
+    except UnicodeDecodeError as exc:
+        sys.exit(f"cannot decode {jsonl_path}: {exc}")
+    rows: list[dict] = []
+    for line in text.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            record = json.loads(line)
+        except json.JSONDecodeError as exc:
+            print(f"[warn] skipped malformed line: {exc}", file=sys.stderr)
+            continue
+        flat = flatten_messages(record)
+        rows.append({
+            "item_key": item_key(domain, flat["user_msg"]),
+            "domain": domain,
+            "system": flat["system"],
+            "user_msg": flat["user_msg"],
+            "assistant_msg": flat["assistant_msg"],
+            "extra_turns": flat["extra_turns"],
+            "source": record.get("source", ""),
+            "notes": "",
+            "review_status": "pending",
+        })
+    return rows
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = build_parser().parse_args(argv)
+
+    if args.command == "publish":
+        publish_snapshot(args.snapshot, args.hf_dataset, args.filename)
+        print(f"published {args.snapshot} -> {args.hf_dataset}")
+        return 0
+
+    if args.command == "schema":
+        from . import REVIEW_TARGETS
+        from .schema import migrate_doc
+        for doc_id, tables in REVIEW_TARGETS.items():
+            doc_client = GristClient.from_env(doc_id)
+            report = migrate_doc(doc_client, tables)
+            print(f"schema {doc_id}: {report}")
+        return 0
+
+    client = GristClient.from_env(resolve_doc(args.doc))
+
+    if args.command == "ingest":
+        rows = _ingest_jsonl_rows(args.domain, args.jsonl)
+        report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows,
+                             dry_run=args.dry_run)
+        print(f"ingest {args.domain}: {report}")
+    elif args.command == "export":
+        report = export_domain(client, args.domain, EXPORTS_DIR,
+                               dry_run=args.dry_run,
+                               include_pending=args.include_pending)
+        print(f"export {args.domain}: {report}")
+    elif args.command == "migrate":
+        report = migrate_domain(client, args.domain, dry_run=args.dry_run)
+        print(f"migrate {args.domain}: {report}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/mascarade-eval/mascarade_eval/grist/client.py b/mascarade-eval/mascarade_eval/grist/client.py
new file mode 100644
index 0000000..6340c9b
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/client.py
@@ -0,0 +1,120 @@
+# mascarade_eval/grist/client.py
+"""Thin Grist REST client. The HTTP transport is injectable for tests."""
+from __future__ import annotations
+
+import json
+import os
+import sys
+import urllib.error
+import urllib.request
+
+from . import GRIST_BASE, KEY_FILE, REVIEW_STATUSES, REVIEWER_CHOICES
+
+_INT_COLS = {"n_items", "n_rows"}
+_CHOICE_COLS = {
+    "review_status": REVIEW_STATUSES,
+    "reviewer": REVIEWER_CHOICES,
+}
+
+
+def _col_fields(name: str) -> dict:
+    """Grist column `fields` payload for a column id (label/type/options)."""
+    if name in _CHOICE_COLS:
+        opts = json.dumps({"choices": list(_CHOICE_COLS[name])})
+        return {"label": name, "type": "Choice", "widgetOptions": opts}
+    if name in _INT_COLS:
+        return {"label": name, "type": "Int"}
+    return {"label": name, "type": "Text"}
+
+
+def load_grist_key() -> str:
+    """Return the Grist API key from env or ~/.config/electron-rare/grist.env."""
+    key = os.environ.get("GRIST_API_KEY")
+    if key:
+        return key
+    if KEY_FILE.exists():
+        for line in KEY_FILE.read_text().splitlines():
+            if line.strip().startswith("GRIST_API_KEY="):
+                return line.split("=", 1)[1].strip().strip('"')
+    sys.exit("GRIST_API_KEY not found (env or ~/.config/electron-rare/grist.env)")
+
+
+def load_doc_id(name: str) -> str | None:
+    """Return a doc ID stored as <name>= in the grist.env file, or None."""
+    env = os.environ.get(name)
+    if env:
+        return env
+    if KEY_FILE.exists():
+        for line in KEY_FILE.read_text().splitlines():
+            if line.strip().startswith(f"{name}="):
+                return line.split("=", 1)[1].strip().strip('"')
+    return None
+
+
+def _http_transport(method: str, url: str, key: str, body: dict | None) -> dict:
+    data = json.dumps(body).encode() if body is not None else None
+    req = urllib.request.Request(
+        url, data=data, method=method,
+        headers={"Authorization": f"Bearer {key}",
+                 "Content-Type": "application/json"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            raw = resp.read().decode("utf-8", "replace")
+            return json.loads(raw) if raw else {}
+    except urllib.error.HTTPError as exc:
+        detail = exc.read().decode("utf-8", "replace")[:300]
+        raise RuntimeError(f"Grist {method} {url} -> HTTP {exc.code}: {detail}")
+
+
+class GristClient:
+    """Records- and column-level access to one Grist document."""
+
+    def __init__(self, doc_id: str, key: str, transport=_http_transport):
+        self.doc_id = doc_id
+        self.key = key
+        self._transport = transport
+
+    @classmethod
+    def from_env(cls, doc_id: str) -> "GristClient":
+        return cls(doc_id, load_grist_key())
+
+    def _api(self, method: str, path: str, body: dict | None = None) -> dict:
+        return self._transport(method, f"{GRIST_BASE}{path}", self.key, body)
+
+    def list_tables(self) -> set[str]:
+        resp = self._api("GET", f"/docs/{self.doc_id}/tables")
+        return {t["id"] for t in resp.get("tables", [])}
+
+    def create_table(self, table: str, columns: tuple[str, ...]) -> None:
+        cols = [{"id": c, "fields": _col_fields(c)} for c in columns]
+        self._api("POST", f"/docs/{self.doc_id}/tables",
+                  {"tables": [{"id": table, "columns": cols}]})
+
+    def ensure_table(self, table: str, columns: tuple[str, ...]) -> None:
+        if table not in self.list_tables():
+            self.create_table(table, columns)
+
+    def list_columns(self, table: str) -> set[str]:
+        resp = self._api(
+            "GET", f"/docs/{self.doc_id}/tables/{table}/columns")
+        return {c["id"] for c in resp.get("columns", [])}
+
+    def add_columns(self, table: str, columns: tuple[str, ...]) -> None:
+        if not columns:
+            return
+        cols = [{"id": c, "fields": _col_fields(c)} for c in columns]
+        self._api("POST", f"/docs/{self.doc_id}/tables/{table}/columns",
+                  {"columns": cols})
+
+    def fetch_records(self, table: str) -> list[dict]:
+        resp = self._api("GET", f"/docs/{self.doc_id}/tables/{table}/records")
+        return [{"_id": r["id"], **r["fields"]} for r in resp.get("records", [])]
+
+    def add_records(self, table: str, rows: list[dict]) -> None:
+        if not rows:
+            return
+        for start in range(0, len(rows), 100):
+            chunk = rows[start:start + 100]
+            self._api("POST", f"/docs/{self.doc_id}/tables/{table}/records",
+                      {"records": [{"fields": r} for r in chunk]})
diff --git a/mascarade-eval/mascarade_eval/grist/export.py b/mascarade-eval/mascarade_eval/grist/export.py
new file mode 100644
index 0000000..f1095c9
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/export.py
@@ -0,0 +1,83 @@
+# mascarade_eval/grist/export.py
+"""Deterministic Grist -> .jsonl snapshot export, journaled in Exports."""
+from __future__ import annotations
+
+import datetime
+import hashlib
+import json
+from pathlib import Path
+
+from . import EXPORTS_COLUMNS, EXPORTS_TABLE, TRAINING_TABLE
+from .migrate import rebuild_messages
+
+
+def canonical_jsonl(keyed_rows: list[tuple[str, dict]]) -> str:
+    """Serialize (sort_key, object) pairs to JSONL ordered by sort_key.
+
+    Same input set -> same bytes, regardless of input order. The sort key
+    itself is not written; only the object is.
+    """
+    ordered = sorted(keyed_rows, key=lambda kv: kv[0])
+    return "\n".join(json.dumps(obj, ensure_ascii=False, sort_keys=True)
+                     for _, obj in ordered)
+
+
+def content_hash(text: str) -> str:
+    """SHA256 hex digest of the canonical snapshot text."""
+    return hashlib.sha256(text.encode("utf-8")).hexdigest()
+
+
+def _timestamp() -> str:
+    return datetime.datetime.now(datetime.UTC).strftime("%Y%m%dT%H%M%SZ")
+
+
+def _is_exportable(row: dict, include_pending: bool) -> bool:
+    """A row ships only when validated (or pending, if explicitly allowed).
+
+    `rejected` and `needs_fix` rows are always excluded. A row with no
+    review_status is treated as `pending`.
+    """
+    status = row.get("review_status") or "pending"
+    if status == "validated":
+        return True
+    return include_pending and status == "pending"
+
+
+def export_domain(client, domain: str, out_dir: Path,
+                  dry_run: bool = False,
+                  include_pending: bool = False) -> dict:
+    """Export one domain's human-validated training rows to a hashed snapshot.
+
+    Returns a report dict matching the Exports row written to Grist.
+    """
+    rows = [r for r in client.fetch_records(TRAINING_TABLE)
+            if r.get("domain") == domain
+            and _is_exportable(r, include_pending)]
+    payload = canonical_jsonl(
+        [(r.get("item_key", ""), rebuild_messages(r)) for r in rows])
+    digest = content_hash(payload)
+    stamp = _timestamp()
+    filename = f"{domain}.{stamp}.jsonl"
+    report = {
+        "export_id": f"{domain}-{stamp}",
+        "domain": domain,
+        "created_at": stamp,
+        "n_items": len(rows),
+        "content_hash": digest,
+        "output_file": filename,
+        "hf_dataset_id": "",
+    }
+    if dry_run:
+        return report
+    out_dir = Path(out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_path = out_dir / filename
+    out_path.write_text(payload + ("\n" if payload else ""),
+                        encoding="utf-8")
+    try:
+        client.ensure_table(EXPORTS_TABLE, EXPORTS_COLUMNS)
+        client.add_records(EXPORTS_TABLE, [report])
+    except Exception:
+        out_path.unlink(missing_ok=True)
+        raise
+    return report
diff --git a/mascarade-eval/mascarade_eval/grist/ingest.py b/mascarade-eval/mascarade_eval/grist/ingest.py
new file mode 100644
index 0000000..9c391f9
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/ingest.py
@@ -0,0 +1,42 @@
+# mascarade_eval/grist/ingest.py
+"""Insert-only ingestion into Grist.
+
+This module holds the source-of-truth invariant: an existing item row is
+NEVER updated, so human edits in Grist survive re-ingestion.
+"""
+from __future__ import annotations
+
+import hashlib
+
+
+def item_key(domain: str, text: str) -> str:
+    """Stable key for an item: domain prefix + SHA1 of its text."""
+    digest = hashlib.sha1(text.encode("utf-8")).hexdigest()[:10]
+    return f"{domain}-{digest}"
+
+
+def compute_delta(existing_keys: set[str], incoming: list[dict],
+                  key_field: str = "item_key") -> list[dict]:
+    """Return only rows whose key is absent from Grist and unseen in batch."""
+    seen: set[str] = set(existing_keys)
+    delta: list[dict] = []
+    for row in incoming:
+        key = row[key_field]
+        if key in seen:
+            continue
+        seen.add(key)
+        delta.append(row)
+    return delta
+
+
+def ingest_rows(client, table: str, columns: tuple[str, ...],
+                rows: list[dict], key_field: str = "item_key",
+                dry_run: bool = False) -> dict:
+    """Insert-only ingestion. Returns {"inserted": n, "skipped": n}."""
+    client.ensure_table(table, columns)
+    existing = {r[key_field] for r in client.fetch_records(table)
+                if key_field in r}
+    delta = compute_delta(existing, rows, key_field)
+    if not dry_run:
+        client.add_records(table, delta)
+    return {"inserted": len(delta), "skipped": len(rows) - len(delta)}
diff --git a/mascarade-eval/mascarade_eval/grist/migrate.py b/mascarade-eval/mascarade_eval/grist/migrate.py
new file mode 100644
index 0000000..a36aa19
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/migrate.py
@@ -0,0 +1,127 @@
+# mascarade_eval/grist/migrate.py
+"""Backfill the training corpus from HuggingFace into Grist.
+
+Pure transforms (flatten_messages / rebuild_messages) are unit-tested;
+migrate_domain wires them to HF download + insert-only ingestion.
+"""
+from __future__ import annotations
+
+import json
+
+from mascarade_eval import HF_ORG
+from . import REGISTRY_COLUMNS, REGISTRY_TABLE, TRAINING_COLUMNS, TRAINING_TABLE
+from .ingest import ingest_rows, item_key
+
+_ROLE_NORMAL = {"user": "user", "human": "user",
+                "assistant": "assistant", "gpt": "assistant",
+                "system": "system"}
+
+
+def _normalize(record: dict) -> list[dict]:
+    """Return [{role, content}, ...] from an OpenAI or ShareGPT record."""
+    raw = record.get("messages") or record.get("conversations") or []
+    out: list[dict] = []
+    for m in raw:
+        if not isinstance(m, dict):
+            continue
+        role = _ROLE_NORMAL.get(m.get("role") or m.get("from") or "")
+        if role is None:
+            continue
+        content = m.get("content") or m.get("value") or ""
+        out.append({"role": role, "content": content})
+    return out
+
+
+def flatten_messages(record: dict) -> dict:
+    """Collapse a chat record into editable columns.
+
+    Single-turn (<=1 system, exactly 1 user, exactly 1 assistant) maps to
+    system/user_msg/assistant_msg with empty extra_turns. Anything else
+    keeps the full normalized message list as JSON in extra_turns.
+    """
+    msgs = _normalize(record)
+    systems = [m for m in msgs if m["role"] == "system"]
+    users = [m for m in msgs if m["role"] == "user"]
+    assistants = [m for m in msgs if m["role"] == "assistant"]
+    single_turn = (len(systems) <= 1 and len(users) == 1
+                   and len(assistants) == 1 and len(msgs) == len(systems) + 2)
+    flat = {
+        "system": systems[0]["content"] if systems else "",
+        "user_msg": users[0]["content"] if users else "",
+        "assistant_msg": assistants[0]["content"] if assistants else "",
+        "extra_turns": "",
+    }
+    if not single_turn:
+        flat["extra_turns"] = json.dumps(msgs, ensure_ascii=False)
+    return flat
+
+
+def rebuild_messages(row: dict) -> dict:
+    """Inverse of flatten_messages: return {"messages": [...]}."""
+    extra = row.get("extra_turns") or ""
+    if extra:
+        return {"messages": json.loads(extra)}
+    msgs: list[dict] = []
+    if row.get("system"):
+        msgs.append({"role": "system", "content": row["system"]})
+    msgs.append({"role": "user", "content": row.get("user_msg", "")})
+    msgs.append({"role": "assistant", "content": row.get("assistant_msg", "")})
+    return {"messages": msgs}
+
+
+def _download_training_records(domain: str) -> list[dict]:
+    """Download <domain>_chat.jsonl from HF and parse it into records."""
+    from huggingface_hub import hf_hub_download
+    path = hf_hub_download(
+        repo_id=f"{HF_ORG}/mascarade-{domain}-dataset",
+        filename=f"{domain}_chat.jsonl",
+        repo_type="dataset",
+    )
+    records: list[dict] = []
+    with open(path, encoding="utf-8") as fh:
+        for line in fh:
+            line = line.strip()
+            if line:
+                records.append(json.loads(line))
+    return records
+
+
+def _to_training_row(domain: str, record: dict) -> dict:
+    flat = flatten_messages(record)
+    return {
+        "item_key": item_key(domain, flat["user_msg"]),
+        "domain": domain,
+        "system": flat["system"],
+        "user_msg": flat["user_msg"],
+        "assistant_msg": flat["assistant_msg"],
+        "extra_turns": flat["extra_turns"],
+        "source": f"{HF_ORG}/mascarade-{domain}-dataset",
+        "notes": "",
+        "review_status": "pending",
+    }
+
+
+def migrate_domain(client, domain: str, records: list[dict] | None = None,
+                   dry_run: bool = False) -> dict:
+    """Backfill one domain's HF training data into Grist (insert-only).
+
+    Pass `records` to skip the HF download (used by tests).
+    """
+    if records is None:
+        records = _download_training_records(domain)
+    rows = [_to_training_row(domain, r) for r in records]
+    report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows,
+                         dry_run=dry_run)
+    if not dry_run:
+        client.ensure_table(REGISTRY_TABLE, REGISTRY_COLUMNS)
+        client.add_records(REGISTRY_TABLE, [{
+            "name": f"mascarade-{domain}-train",
+            "family": "mascarade-training",
+            "domain": domain,
+            "hf_dataset_id": f"{HF_ORG}/mascarade-{domain}-dataset",
+            "license": "CC-BY-SA-4.0",
+            "n_items": len(rows),
+            "notes": f"backfilled {report['inserted']} new, "
+                     f"{report['skipped']} already present",
+        }])
+    return report
diff --git a/mascarade-eval/mascarade_eval/grist/publish.py b/mascarade-eval/mascarade_eval/grist/publish.py
new file mode 100644
index 0000000..22ed14c
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/publish.py
@@ -0,0 +1,31 @@
+# mascarade_eval/grist/publish.py
+"""Publish an exported snapshot to its HuggingFace dataset repo."""
+from __future__ import annotations
+
+from pathlib import Path
+
+
+def _hf_upload(*, path_or_fileobj, path_in_repo, repo_id, repo_type,
+               commit_message):
+    from huggingface_hub import upload_file
+    upload_file(path_or_fileobj=path_or_fileobj, path_in_repo=path_in_repo,
+                repo_id=repo_id, repo_type=repo_type,
+                commit_message=commit_message)
+
+
+def publish_snapshot(snapshot_path: str, hf_dataset_id: str,
+                     filename: str, uploader=_hf_upload) -> None:
+    """Upload one exported .jsonl snapshot to its HF dataset repo.
+
+    `uploader` is injected for testing; production uses huggingface_hub.
+    """
+    path = Path(snapshot_path)
+    if not path.exists():
+        raise FileNotFoundError(f"snapshot not found: {snapshot_path}")
+    uploader(
+        path_or_fileobj=str(path),
+        path_in_repo=filename,
+        repo_id=hf_dataset_id,
+        repo_type="dataset",
+        commit_message=f"dataset: refresh {filename} from Grist export",
+    )
diff --git a/mascarade-eval/mascarade_eval/grist/schema.py b/mascarade-eval/mascarade_eval/grist/schema.py
new file mode 100644
index 0000000..40a6895
--- /dev/null
+++ b/mascarade-eval/mascarade_eval/grist/schema.py
@@ -0,0 +1,34 @@
+# mascarade_eval/grist/schema.py
+"""Add the human-review columns to existing Grist tables (idempotent).
+
+A column already present on a table is never recreated, so re-running
+the migration is safe. New tables created by the pipeline already carry
+the review columns via TRAINING_COLUMNS.
+"""
+from __future__ import annotations
+
+from . import REVIEW_COLUMNS
+
+
+def ensure_review_columns(client, table: str) -> list[str]:
+    """Add any missing review column to one table. Returns columns added."""
+    existing = client.list_columns(table)
+    missing = [c for c in REVIEW_COLUMNS if c not in existing]
+    if missing:
+        client.add_columns(table, tuple(missing))
+    return missing
+
+
+def migrate_doc(client, tables: tuple[str, ...]) -> dict:
+    """Ensure review columns on each table that exists in the document.
+
+    A table absent from the document is reported as None (skipped).
+    """
+    present = client.list_tables()
+    report: dict = {}
+    for table in tables:
+        if table in present:
+            report[table] = ensure_review_columns(client, table)
+        else:
+            report[table] = None
+    return report
diff --git a/mascarade-eval/tests/conftest.py b/mascarade-eval/tests/conftest.py
new file mode 100644
index 0000000..2b1f464
--- /dev/null
+++ b/mascarade-eval/tests/conftest.py
@@ -0,0 +1,46 @@
+# tests/conftest.py
+import pytest
+
+
+class FakeClient:
+    """In-memory stand-in for GristClient. Records all writes."""
+
+    def __init__(self, tables=None, records=None, columns=None):
+        self.doc_id = "fake-doc"
+        self._tables = set(tables or [])
+        self._records = {t: list(rs) for t, rs in (records or {}).items()}
+        self._columns = {t: list(cs) for t, cs in (columns or {}).items()}
+        self.created = []
+        self.added = {}
+        self.added_columns = {}
+
+    def list_tables(self):
+        return set(self._tables)
+
+    def create_table(self, table, columns):
+        self._tables.add(table)
+        self._columns[table] = list(columns)
+        self.created.append((table, tuple(columns)))
+
+    def ensure_table(self, table, columns):
+        if table not in self._tables:
+            self.create_table(table, columns)
+
+    def list_columns(self, table):
+        return set(self._columns.get(table, []))
+
+    def add_columns(self, table, columns):
+        self._columns.setdefault(table, []).extend(columns)
+        self.added_columns.setdefault(table, []).extend(columns)
+
+    def fetch_records(self, table):
+        return [dict(r) for r in self._records.get(table, [])]
+
+    def add_records(self, table, rows):
+        self.added.setdefault(table, []).extend(rows)
+        self._records.setdefault(table, []).extend(rows)
+
+
+@pytest.fixture
+def fake_client():
+    return FakeClient
diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py
new file mode 100644
index 0000000..c8a1824
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_cli.py
@@ -0,0 +1,71 @@
+# tests/test_grist_cli.py
+import pytest
+from mascarade_eval.grist.cli import build_parser, resolve_doc
+
+
+def test_parser_ingest_requires_doc_and_jsonl():
+    ns = build_parser().parse_args(
+        ["ingest", "--doc", "D", "--jsonl", "mine.jsonl", "--domain", "kicad"])
+    assert ns.command == "ingest"
+    assert ns.doc == "D"
+    assert ns.jsonl == "mine.jsonl"
+    assert ns.domain == "kicad"
+
+
+def test_parser_export_accepts_dry_run():
+    ns = build_parser().parse_args(
+        ["export", "--doc", "D", "--domain", "kicad", "--dry-run"])
+    assert ns.command == "export"
+    assert ns.dry_run is True
+
+
+def test_parser_migrate_and_publish():
+    p = build_parser()
+    m = p.parse_args(["migrate", "--doc", "D", "--domain", "kicad"])
+    assert m.command == "migrate"
+    pub = p.parse_args(
+        ["publish", "--snapshot", "exports/kicad.x.jsonl",
+         "--hf-dataset", "Ailiance-fr/mascarade-kicad-dataset",
+         "--filename", "kicad_chat.jsonl"])
+    assert pub.command == "publish"
+    assert pub.hf_dataset == "Ailiance-fr/mascarade-kicad-dataset"
+
+
+def test_resolve_doc_prefers_explicit_arg():
+    assert resolve_doc("explicit-id") == "explicit-id"
+
+
+def test_resolve_doc_errors_when_unset(monkeypatch):
+    monkeypatch.delenv("GRIST_DOC_TRAINING", raising=False)
+    monkeypatch.setattr("mascarade_eval.grist.cli.load_doc_id",
+                        lambda name: None)
+    with pytest.raises(SystemExit):
+        resolve_doc(None)
+
+
+def test_ingest_jsonl_rows_exits_on_missing_file(tmp_path):
+    from mascarade_eval.grist.cli import _ingest_jsonl_rows
+    with pytest.raises(SystemExit):
+        _ingest_jsonl_rows("kicad", str(tmp_path / "does-not-exist.jsonl"))
+
+
+def test_parser_accepts_schema_command():
+    ns = build_parser().parse_args(["schema"])
+    assert ns.command == "schema"
+
+
+def test_schema_command_runs_over_review_targets(monkeypatch, fake_client):
+    from mascarade_eval.grist import cli
+    made = fake_client(tables=["Heldout_Items"],
+                       columns={"Heldout_Items": ["item_key"]})
+    monkeypatch.setattr(cli.GristClient, "from_env",
+                        classmethod(lambda c, doc: made))
+    rc = cli.main(["schema"])
+    assert rc == 0
+    assert made.added_columns["Heldout_Items"]
+
+
+def test_parser_export_accepts_include_pending():
+    ns = build_parser().parse_args(
+        ["export", "--doc", "D", "--domain", "kicad", "--include-pending"])
+    assert ns.include_pending is True
diff --git a/mascarade-eval/tests/test_grist_client.py b/mascarade-eval/tests/test_grist_client.py
new file mode 100644
index 0000000..9af0fd0
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_client.py
@@ -0,0 +1,102 @@
+# tests/test_grist_client.py
+import pytest
+from mascarade_eval.grist.client import GristClient, load_grist_key
+
+
+def _recording_transport(log):
+    def transport(method, url, key, body):
+        log.append((method, url, body))
+        if method == "GET" and url.endswith("/tables"):
+            return {"tables": [{"id": "Existing"}]}
+        if method == "GET" and url.endswith("/columns"):
+            return {"columns": [{"id": "item_key"}, {"id": "domain"}]}
+        if method == "GET" and "/records" in url:
+            return {"records": [
+                {"id": 1, "fields": {"item_key": "k1",
+                                     "review_status": "pending"}},
+                {"id": 2, "fields": {"item_key": "k2",
+                                     "review_status": "validated"}},
+            ]}
+        return {}
+    return transport
+
+
+def test_list_tables_returns_ids():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    assert c.list_tables() == {"Existing"}
+    assert log[0][0] == "GET"
+    assert log[0][1] == "https://grist.saillant.cc/api/docs/doc1/tables"
+
+
+def test_fetch_records_flattens_id_into_fields():
+    c = GristClient("doc1", "key1", transport=_recording_transport([]))
+    rows = c.fetch_records("Mascarade_Training")
+    assert rows == [
+        {"_id": 1, "item_key": "k1", "review_status": "pending"},
+        {"_id": 2, "item_key": "k2", "review_status": "validated"},
+    ]
+
+
+def test_add_records_posts_fields_wrapped():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.add_records("T", [{"a": "1"}, {"a": "2"}])
+    method, url, body = log[-1]
+    assert method == "POST"
+    assert url.endswith("/docs/doc1/tables/T/records")
+    assert body == {"records": [{"fields": {"a": "1"}},
+                                {"fields": {"a": "2"}}]}
+
+
+def test_add_records_noop_on_empty():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.add_records("T", [])
+    assert log == []
+
+
+def test_create_table_assigns_column_types():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.create_table("T", ("item_key", "n_items", "review_status"))
+    method, url, body = log[-1]
+    assert method == "POST"
+    cols = {col["id"]: col["fields"]["type"]
+            for col in body["tables"][0]["columns"]}
+    assert cols == {"item_key": "Text", "n_items": "Int",
+                    "review_status": "Choice"}
+
+
+def test_list_columns_returns_ids():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    assert c.list_columns("Heldout_Items") == {"item_key", "domain"}
+    method, url, _ = log[-1]
+    assert method == "GET"
+    assert url.endswith("/docs/doc1/tables/Heldout_Items/columns")
+
+
+def test_add_columns_posts_choice_with_widget_options():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.add_columns("Heldout_Items", ("review_status", "review_note"))
+    method, url, body = log[-1]
+    assert method == "POST"
+    assert url.endswith("/docs/doc1/tables/Heldout_Items/columns")
+    by_id = {col["id"]: col["fields"] for col in body["columns"]}
+    assert by_id["review_status"]["type"] == "Choice"
+    assert "pending" in by_id["review_status"]["widgetOptions"]
+    assert by_id["review_note"]["type"] == "Text"
+
+
+def test_add_columns_noop_on_empty():
+    log = []
+    c = GristClient("doc1", "key1", transport=_recording_transport(log))
+    c.add_columns("T", ())
+    assert log == []
+
+
+def test_load_grist_key_prefers_env(monkeypatch):
+    monkeypatch.setenv("GRIST_API_KEY", "env-key")
+    assert load_grist_key() == "env-key"
diff --git a/mascarade-eval/tests/test_grist_constants.py b/mascarade-eval/tests/test_grist_constants.py
new file mode 100644
index 0000000..af5e5d5
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_constants.py
@@ -0,0 +1,40 @@
+# tests/test_grist_constants.py
+from mascarade_eval import grist
+
+
+def test_constants_present():
+    assert grist.GRIST_BASE == "https://grist.saillant.cc/api"
+    assert grist.DOC_HELDOUT == "eGbbrpzN3TeLq3sUd2YFA2"
+    assert grist.DOC_MASCARADE == "dhyrySCayizD1PNqCNhCPN"
+    assert grist.TRAINING_TABLE == "Mascarade_Training"
+    assert grist.REGISTRY_TABLE == "Datasets_Registry"
+    assert grist.EXPORTS_TABLE == "Exports"
+
+
+def test_review_constants():
+    assert grist.REVIEW_COLUMNS == (
+        "review_status", "reviewer", "reviewed_at", "review_note")
+    assert grist.REVIEW_STATUSES == (
+        "pending", "validated", "rejected", "needs_fix")
+    assert grist.REVIEWER_CHOICES == ("clems",)
+
+
+def test_review_targets_cover_both_docs():
+    assert grist.REVIEW_TARGETS == {
+        grist.DOC_HELDOUT: ("Heldout_Items", "Datasets"),
+        grist.DOC_MASCARADE: ("Mascarade_Eval_Items", "Bench_31_domains"),
+    }
+
+
+def test_training_columns_end_with_review_columns():
+    assert grist.TRAINING_COLUMNS == (
+        "item_key", "domain", "system", "user_msg", "assistant_msg",
+        "extra_turns", "source", "notes",
+        "review_status", "reviewer", "reviewed_at", "review_note",
+    )
+    assert "exclure" not in grist.TRAINING_COLUMNS
+    assert grist.TRAINING_COLUMNS[-4:] == grist.REVIEW_COLUMNS
+
+
+def test_exports_dir_under_repo_root():
+    assert grist.EXPORTS_DIR.name == "exports"
diff --git a/mascarade-eval/tests/test_grist_export.py b/mascarade-eval/tests/test_grist_export.py
new file mode 100644
index 0000000..3629308
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_export.py
@@ -0,0 +1,101 @@
+# tests/test_grist_export.py
+import json
+import pytest
+from mascarade_eval.grist import TRAINING_TABLE, EXPORTS_TABLE
+from mascarade_eval.grist.export import (
+    canonical_jsonl, content_hash, export_domain,
+)
+
+
+def _row(key, status, q="Q", a="A"):
+    return {"_id": key, "item_key": f"kicad-{key}", "domain": "kicad",
+            "user_msg": q, "assistant_msg": a, "system": "",
+            "extra_turns": "", "source": "", "notes": "",
+            "review_status": status}
+
+
+def test_canonical_jsonl_sorts_by_key():
+    keyed = [("b", {"v": 2}), ("a", {"v": 1})]
+    lines = canonical_jsonl(keyed).splitlines()
+    assert json.loads(lines[0]) == {"v": 1}
+    assert json.loads(lines[1]) == {"v": 2}
+
+
+def test_canonical_jsonl_is_order_independent():
+    a = [("x", {"v": 1}), ("y", {"v": 2})]
+    b = [("y", {"v": 2}), ("x", {"v": 1})]
+    assert canonical_jsonl(a) == canonical_jsonl(b)
+
+
+def test_canonical_jsonl_omits_the_sort_key_from_output():
+    text = canonical_jsonl([("x", {"v": 1})])
+    assert json.loads(text) == {"v": 1}
+
+
+def test_content_hash_stable():
+    text = canonical_jsonl([("x", {"v": 1})])
+    assert content_hash(text) == content_hash(text)
+    assert len(content_hash(text)) == 64
+
+
+def test_export_domain_ships_only_validated_rows(fake_client, tmp_path):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [
+            _row(1, "validated", q="Q1", a="A1"),
+            _row(2, "rejected", q="Q2", a="A2"),
+            _row(3, "pending", q="Q3", a="A3"),
+            _row(4, "needs_fix", q="Q4", a="A4"),
+        ]},
+    )
+    report = export_domain(client, "kicad", out_dir=tmp_path)
+    assert report["n_items"] == 1  # only the validated row
+    out_file = tmp_path / report["output_file"]
+    written = [json.loads(ln) for ln in out_file.read_text().splitlines()]
+    assert written == [{"messages": [
+        {"role": "user", "content": "Q1"},
+        {"role": "assistant", "content": "A1"},
+    ]}]
+    assert client.added[EXPORTS_TABLE][0]["content_hash"] == report["content_hash"]
+
+
+def test_export_domain_include_pending_adds_pending_only(fake_client, tmp_path):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [
+            _row(1, "validated"),
+            _row(2, "pending"),
+            _row(3, "rejected"),
+            _row(4, ""),  # missing status -> treated as pending
+        ]},
+    )
+    report = export_domain(client, "kicad", out_dir=tmp_path,
+                           include_pending=True)
+    assert report["n_items"] == 3  # validated + pending + empty, not rejected
+
+
+def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [_row(1, "validated")]},
+    )
+    report = export_domain(client, "kicad", out_dir=tmp_path, dry_run=True)
+    assert report["n_items"] == 1
+    assert list(tmp_path.iterdir()) == []
+    assert client.added == {}
+
+
+def test_export_domain_removes_file_when_grist_logging_fails(
+        fake_client, tmp_path):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [_row(1, "validated")]},
+    )
+
+    def boom(table, rows):
+        raise RuntimeError("grist down")
+
+    client.add_records = boom
+    with pytest.raises(RuntimeError, match="grist down"):
+        export_domain(client, "kicad", out_dir=tmp_path)
+    assert list(tmp_path.iterdir()) == []
diff --git a/mascarade-eval/tests/test_grist_ingest.py b/mascarade-eval/tests/test_grist_ingest.py
new file mode 100644
index 0000000..ed32b2d
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_ingest.py
@@ -0,0 +1,64 @@
+# tests/test_grist_ingest.py
+from mascarade_eval.grist import TRAINING_TABLE, TRAINING_COLUMNS
+from mascarade_eval.grist.ingest import item_key, compute_delta, ingest_rows
+
+
+def test_item_key_is_deterministic_and_domain_prefixed():
+    k1 = item_key("kicad", "How do I add a net class?")
+    k2 = item_key("kicad", "How do I add a net class?")
+    assert k1 == k2
+    assert k1.startswith("kicad-")
+
+
+def test_item_key_differs_by_text():
+    assert item_key("kicad", "A") != item_key("kicad", "B")
+
+
+def test_compute_delta_skips_existing_keys():
+    existing = {"kicad-aaaaaaaaaa"}
+    incoming = [
+        {"item_key": "kicad-aaaaaaaaaa", "user_msg": "old"},
+        {"item_key": "kicad-bbbbbbbbbb", "user_msg": "new"},
+    ]
+    delta = compute_delta(existing, incoming)
+    assert [r["item_key"] for r in delta] == ["kicad-bbbbbbbbbb"]
+
+
+def test_compute_delta_dedupes_within_batch():
+    incoming = [
+        {"item_key": "k1", "user_msg": "x"},
+        {"item_key": "k1", "user_msg": "x-dup"},
+    ]
+    delta = compute_delta(set(), incoming)
+    assert len(delta) == 1
+    assert delta[0]["user_msg"] == "x"
+
+
+def test_ingest_rows_inserts_only_new(fake_client):
+    client = fake_client(
+        tables=[TRAINING_TABLE],
+        records={TRAINING_TABLE: [{"item_key": "k1", "user_msg": "kept"}]},
+    )
+    rows = [
+        {"item_key": "k1", "user_msg": "WOULD OVERWRITE"},
+        {"item_key": "k2", "user_msg": "fresh"},
+    ]
+    report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows)
+    assert report == {"inserted": 1, "skipped": 1}
+    assert client.added[TRAINING_TABLE] == [{"item_key": "k2",
+                                             "user_msg": "fresh"}]
+
+
+def test_ingest_rows_creates_table_when_absent(fake_client):
+    client = fake_client(tables=[])
+    ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS,
+                [{"item_key": "k1"}])
+    assert client.created == [(TRAINING_TABLE, TRAINING_COLUMNS)]
+
+
+def test_ingest_rows_dry_run_writes_nothing(fake_client):
+    client = fake_client(tables=[TRAINING_TABLE])
+    report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS,
+                         [{"item_key": "k1"}], dry_run=True)
+    assert report == {"inserted": 1, "skipped": 0}
+    assert client.added == {}
diff --git a/mascarade-eval/tests/test_grist_migrate_domain.py b/mascarade-eval/tests/test_grist_migrate_domain.py
new file mode 100644
index 0000000..400c8f4
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_migrate_domain.py
@@ -0,0 +1,40 @@
+# tests/test_grist_migrate_domain.py
+from mascarade_eval.grist import TRAINING_TABLE, REGISTRY_TABLE
+from mascarade_eval.grist.migrate import migrate_domain
+
+
+def test_migrate_domain_ingests_flattened_rows(fake_client):
+    client = fake_client(tables=[])
+    records = [
+        {"messages": [{"role": "user", "content": "Q1"},
+                      {"role": "assistant", "content": "A1"}]},
+        {"messages": [{"role": "user", "content": "Q2"},
+                      {"role": "assistant", "content": "A2"}]},
+    ]
+    report = migrate_domain(client, "kicad", records=records)
+    assert report["inserted"] == 2
+    added = client.added[TRAINING_TABLE]
+    assert {r["user_msg"] for r in added} == {"Q1", "Q2"}
+    assert all(r["domain"] == "kicad" for r in added)
+    assert all(r["item_key"].startswith("kicad-") for r in added)
+    assert all(r["review_status"] == "pending" for r in added)
+
+
+def test_migrate_domain_is_idempotent(fake_client):
+    client = fake_client(tables=[])
+    records = [{"messages": [{"role": "user", "content": "Q"},
+                             {"role": "assistant", "content": "A"}]}]
+    migrate_domain(client, "kicad", records=records)
+    report2 = migrate_domain(client, "kicad", records=records)
+    assert report2 == {"inserted": 0, "skipped": 1}
+
+
+def test_migrate_domain_writes_registry_row(fake_client):
+    client = fake_client(tables=[])
+    records = [{"messages": [{"role": "user", "content": "Q"},
+                             {"role": "assistant", "content": "A"}]}]
+    migrate_domain(client, "kicad", records=records)
+    reg = client.added[REGISTRY_TABLE]
+    assert reg[0]["name"] == "mascarade-kicad-train"
+    assert reg[0]["family"] == "mascarade-training"
+    assert reg[0]["hf_dataset_id"] == "Ailiance-fr/mascarade-kicad-dataset"
diff --git a/mascarade-eval/tests/test_grist_migrate_transforms.py b/mascarade-eval/tests/test_grist_migrate_transforms.py
new file mode 100644
index 0000000..d81e547
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_migrate_transforms.py
@@ -0,0 +1,73 @@
+# tests/test_grist_migrate_transforms.py
+import json
+from mascarade_eval.grist.migrate import flatten_messages, rebuild_messages
+
+
+def test_flatten_single_turn_openai():
+    rec = {"messages": [
+        {"role": "system", "content": "S"},
+        {"role": "user", "content": "Q"},
+        {"role": "assistant", "content": "A"},
+    ]}
+    flat = flatten_messages(rec)
+    assert flat == {"system": "S", "user_msg": "Q",
+                    "assistant_msg": "A", "extra_turns": ""}
+
+
+def test_flatten_single_turn_sharegpt():
+    rec = {"conversations": [
+        {"from": "human", "value": "Q"},
+        {"from": "gpt", "value": "A"},
+    ]}
+    flat = flatten_messages(rec)
+    assert flat == {"system": "", "user_msg": "Q",
+                    "assistant_msg": "A", "extra_turns": ""}
+
+
+def test_flatten_multi_turn_keeps_extra_turns():
+    rec = {"messages": [
+        {"role": "user", "content": "Q1"},
+        {"role": "assistant", "content": "A1"},
+        {"role": "user", "content": "Q2"},
+        {"role": "assistant", "content": "A2"},
+    ]}
+    flat = flatten_messages(rec)
+    assert flat["user_msg"] == "Q1"
+    assert flat["assistant_msg"] == "A1"
+    parsed = json.loads(flat["extra_turns"])
+    assert parsed == [
+        {"role": "user", "content": "Q1"},
+        {"role": "assistant", "content": "A1"},
+        {"role": "user", "content": "Q2"},
+        {"role": "assistant", "content": "A2"},
+    ]
+
+
+def test_rebuild_single_turn_round_trip():
+    rec = {"messages": [
+        {"role": "system", "content": "S"},
+        {"role": "user", "content": "Q"},
+        {"role": "assistant", "content": "A"},
+    ]}
+    flat = flatten_messages(rec)
+    assert rebuild_messages(flat) == rec
+
+
+def test_rebuild_single_turn_no_system():
+    flat = {"system": "", "user_msg": "Q",
+            "assistant_msg": "A", "extra_turns": ""}
+    assert rebuild_messages(flat) == {"messages": [
+        {"role": "user", "content": "Q"},
+        {"role": "assistant", "content": "A"},
+    ]}
+
+
+def test_rebuild_multi_turn_uses_extra_turns():
+    rec = {"messages": [
+        {"role": "user", "content": "Q1"},
+        {"role": "assistant", "content": "A1"},
+        {"role": "user", "content": "Q2"},
+        {"role": "assistant", "content": "A2"},
+    ]}
+    flat = flatten_messages(rec)
+    assert rebuild_messages(flat) == rec
diff --git a/mascarade-eval/tests/test_grist_publish.py b/mascarade-eval/tests/test_grist_publish.py
new file mode 100644
index 0000000..422ef04
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_publish.py
@@ -0,0 +1,34 @@
+# tests/test_grist_publish.py
+import pytest
+from mascarade_eval.grist.publish import publish_snapshot
+
+
+def test_publish_snapshot_uploads_with_expected_args(tmp_path):
+    snap = tmp_path / "kicad.20260519T120000Z.jsonl"
+    snap.write_text('{"messages": []}\n')
+    calls = []
+
+    def fake_upload(*, path_or_fileobj, path_in_repo, repo_id, repo_type,
+                    commit_message):
+        calls.append({
+            "path_or_fileobj": path_or_fileobj,
+            "path_in_repo": path_in_repo,
+            "repo_id": repo_id,
+            "repo_type": repo_type,
+            "commit_message": commit_message,
+        })
+
+    publish_snapshot(str(snap), "Ailiance-fr/mascarade-kicad-dataset",
+                     "kicad_chat.jsonl", uploader=fake_upload)
+    assert len(calls) == 1
+    assert calls[0]["repo_id"] == "Ailiance-fr/mascarade-kicad-dataset"
+    assert calls[0]["repo_type"] == "dataset"
+    assert calls[0]["path_in_repo"] == "kicad_chat.jsonl"
+    assert calls[0]["path_or_fileobj"] == str(snap)
+
+
+def test_publish_snapshot_rejects_missing_file(tmp_path):
+    with pytest.raises(FileNotFoundError):
+        publish_snapshot(str(tmp_path / "nope.jsonl"),
+                         "Ailiance-fr/mascarade-kicad-dataset",
+                         "kicad_chat.jsonl", uploader=lambda **k: None)
diff --git a/mascarade-eval/tests/test_grist_roundtrip.py b/mascarade-eval/tests/test_grist_roundtrip.py
new file mode 100644
index 0000000..eefbaee
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_roundtrip.py
@@ -0,0 +1,40 @@
+# tests/test_grist_roundtrip.py
+import json
+from mascarade_eval.grist.migrate import migrate_domain
+from mascarade_eval.grist.export import export_domain
+
+
+def test_migrate_then_export_round_trips(fake_client, tmp_path):
+    source = [
+        {"messages": [{"role": "user", "content": "Q1"},
+                      {"role": "assistant", "content": "A1"}]},
+        {"messages": [{"role": "system", "content": "S"},
+                      {"role": "user", "content": "Q2"},
+                      {"role": "assistant", "content": "A2"}]},
+    ]
+    client = fake_client(tables=[])
+    migrate_domain(client, "kicad", records=source)
+    # Migrated rows start as review_status=pending; include them so the
+    # round-trip exercises message semantics independent of review state.
+    report = export_domain(client, "kicad", out_dir=tmp_path,
+                           include_pending=True)
+
+    assert report["n_items"] == 2
+    out_file = tmp_path / report["output_file"]
+    exported = [json.loads(ln) for ln in out_file.read_text().splitlines()]
+
+    def norm(msgs):
+        return sorted(json.dumps(m, sort_keys=True) for m in msgs)
+
+    source_sets = {tuple(norm(r["messages"])) for r in source}
+    export_sets = {tuple(norm(r["messages"])) for r in exported}
+    assert source_sets == export_sets
+
+
+def test_double_ingest_inserts_zero_the_second_time(fake_client):
+    source = [{"messages": [{"role": "user", "content": "Q"},
+                            {"role": "assistant", "content": "A"}]}]
+    client = fake_client(tables=[])
+    migrate_domain(client, "kicad", records=source)
+    second = migrate_domain(client, "kicad", records=source)
+    assert second["inserted"] == 0
diff --git a/mascarade-eval/tests/test_grist_schema.py b/mascarade-eval/tests/test_grist_schema.py
new file mode 100644
index 0000000..14ba110
--- /dev/null
+++ b/mascarade-eval/tests/test_grist_schema.py
@@ -0,0 +1,36 @@
+# tests/test_grist_schema.py
+from mascarade_eval.grist import REVIEW_COLUMNS
+from mascarade_eval.grist.schema import ensure_review_columns, migrate_doc
+
+
+def test_ensure_review_columns_adds_all_when_absent(fake_client):
+    client = fake_client(tables=["Heldout_Items"],
+                         columns={"Heldout_Items": ["item_key", "prompt"]})
+    added = ensure_review_columns(client, "Heldout_Items")
+    assert added == list(REVIEW_COLUMNS)
+    assert client.added_columns["Heldout_Items"] == list(REVIEW_COLUMNS)
+
+
+def test_ensure_review_columns_is_idempotent(fake_client):
+    cols = ["item_key", *REVIEW_COLUMNS]
+    client = fake_client(tables=["Heldout_Items"],
+                         columns={"Heldout_Items": cols})
+    added = ensure_review_columns(client, "Heldout_Items")
+    assert added == []
+    assert "Heldout_Items" not in client.added_columns
+
+
+def test_ensure_review_columns_adds_only_missing(fake_client):
+    client = fake_client(
+        tables=["Datasets"],
+        columns={"Datasets": ["domain", "review_status", "reviewer"]})
+    added = ensure_review_columns(client, "Datasets")
+    assert added == ["reviewed_at", "review_note"]
+
+
+def test_migrate_doc_skips_absent_tables(fake_client):
+    client = fake_client(tables=["Heldout_Items"],
+                         columns={"Heldout_Items": ["item_key"]})
+    report = migrate_doc(client, ("Heldout_Items", "Mascarade_Training"))
+    assert report["Heldout_Items"] == list(REVIEW_COLUMNS)
+    assert report["Mascarade_Training"] is None
diff --git a/mascarade-eval/widgets/review-console/index.html b/mascarade-eval/widgets/review-console/index.html
new file mode 100644
index 0000000..f698881
--- /dev/null
+++ b/mascarade-eval/widgets/review-console/index.html
@@ -0,0 +1,158 @@
+<!doctype html>
+<html lang="fr">
+<head>
+<meta charset="utf-8">
+<title>Review Console</title>
+<script src="https://grist.saillant.cc/grist-plugin-api.js"></script>
+<style>
+  body { font: 14px/1.5 system-ui, sans-serif; margin: 0; padding: 16px;
+         color: #1a1a1a; }
+  #progress { color: #666; font-size: 13px; margin-bottom: 8px; }
+  .field-label { font-weight: 600; font-size: 12px; text-transform: uppercase;
+                 color: #888; margin-top: 12px; }
+  .field-value { white-space: pre-wrap; word-break: break-word;
+                 background: #f6f6f6; border-radius: 6px; padding: 8px; }
+  #context { color: #555; font-size: 13px; margin-bottom: 4px; }
+  #note { width: 100%; box-sizing: border-box; margin-top: 12px; padding: 6px; }
+  #buttons { display: flex; gap: 8px; margin-top: 12px; }
+  button { flex: 1; padding: 10px; border: 0; border-radius: 6px;
+           font-size: 14px; cursor: pointer; }
+  .validate { background: #C6E5B3; }
+  .reject   { background: #F2B5B5; }
+  .needsfix { background: #F5D9A6; }
+  .skip     { background: #E8E8E8; }
+  #done { font-size: 16px; color: #2a7; padding: 24px 0; }
+  #empty { color: #888; padding: 24px 0; }
+</style>
+</head>
+<body>
+  <div id="progress"></div>
+  <div id="card" hidden>
+    <div id="context"></div>
+    <div class="field-label">Item</div>
+    <div class="field-value" id="primary"></div>
+    <div class="field-label" id="secondary-label">Référence</div>
+    <div class="field-value" id="secondary"></div>
+    <input id="note" placeholder="note de revue (optionnelle)">
+    <div id="buttons">
+      <button class="validate" id="b-validate">✓ Valider (V)</button>
+      <button class="reject"   id="b-reject">✗ Rejeter (R)</button>
+      <button class="needsfix" id="b-needsfix">~ À corriger (F)</button>
+      <button class="skip"     id="b-skip">→ Passer (S)</button>
+    </div>
+  </div>
+  <div id="done" hidden>Tous les items en attente sont revus ✓</div>
+  <div id="empty" hidden>Aucune ligne dans cette table.</div>
+<script>
+"use strict";
+const REVIEWER = "clems";   // adjust to the reviewer's Grist choice value
+
+let mapping = null; // column mapping from onRecords (display columns)
+let rows = [];      // [{id, status, primary, secondary, context}]
+let queue = [];     // ids still pending
+let cursor = 0;
+
+const $ = (id) => document.getElementById(id);
+
+// Grist restricts onRecords/fetchSelectedTable to mapped columns, so the
+// review_* columns are invisible there. fetchTable via docApi (full
+// access) returns every column — that is the only channel that sees
+// review_status. onRecords is kept only to supply the column mapping
+// and to fire on every data change.
+async function refresh() {
+  if (!mapping) return;
+  const tableId = await grist.getSelectedTableId();
+  const data = await grist.docApi.fetchTable(tableId);
+  const ids = data.id || [];
+  const ctxCols = Array.isArray(mapping.context)
+    ? mapping.context
+    : (mapping.context ? [mapping.context] : []);
+  const cell = (name, i) => (name && data[name]) ? data[name][i] : null;
+  rows = ids.map((id, i) => {
+    const ctx = ctxCols
+      .map((c) => (data[c] ? data[c][i] : null))
+      .filter((v) => v != null && v !== "")
+      .join(" · ");
+    const p = cell(mapping.primary, i);
+    const s = cell(mapping.secondary, i);
+    return {
+      id,
+      status: (data.review_status && data.review_status[i]) || "pending",
+      primary: p == null ? "" : String(p),
+      secondary: s == null ? "" : String(s),
+      context: ctx,
+    };
+  });
+  queue = rows.filter((r) => r.status === "pending").map((r) => r.id);
+  if (cursor >= queue.length) cursor = 0;
+  render();
+}
+
+function render() {
+  const total = rows.length;
+  const reviewed = rows.filter((r) => r.status !== "pending").length;
+  $("progress").textContent = total === 0 ? ""
+    : `revus ${reviewed} / ${total} — en attente ${queue.length}`;
+  $("empty").hidden = total !== 0;
+  const item = queue.length
+    ? rows.find((r) => r.id === queue[cursor]) : null;
+  $("card").hidden = !item;
+  $("done").hidden = !(total > 0 && !item);
+  if (!item) return;
+  $("context").textContent = item.context;
+  $("primary").textContent = item.primary;
+  $("secondary").textContent = item.secondary;
+  $("secondary-label").hidden = !item.secondary;
+  $("secondary").hidden = !item.secondary;
+}
+
+async function decide(status) {
+  if (!queue.length) return;
+  const id = queue[cursor];
+  await grist.selectedTable.update({
+    id,
+    fields: {
+      review_status: status,
+      reviewer: REVIEWER,
+      reviewed_at: new Date().toISOString(),
+      review_note: $("note").value,
+    },
+  });
+  $("note").value = "";
+  // grist.onRecords refires after the update and rebuilds the queue.
+}
+
+function skip() {
+  if (!queue.length) return;
+  cursor = (cursor + 1) % queue.length;
+  render();
+}
+
+$("b-validate").onclick = () => decide("validated");
+$("b-reject").onclick = () => decide("rejected");
+$("b-needsfix").onclick = () => decide("needs_fix");
+$("b-skip").onclick = skip;
+
+document.addEventListener("keydown", (e) => {
+  if (e.target.tagName === "INPUT") return;
+  const k = e.key.toLowerCase();
+  if (k === "v") decide("validated");
+  else if (k === "r") decide("rejected");
+  else if (k === "f") decide("needs_fix");
+  else if (k === "s" || e.key === "ArrowRight") skip();
+});
+
+grist.ready({
+  requiredAccess: "full",
+  columns: [
+    { name: "primary", title: "Texte principal (prompt / user_msg)" },
+    { name: "secondary", title: "Référence (reference / assistant_msg)",
+      optional: true },
+    { name: "context", title: "Contexte (domain, source)",
+      optional: true, allowMultiple: true },
+  ],
+});
+grist.onRecords((records, m) => { mapping = m; void refresh(); });
+</script>
+</body>
+</html>