From 8554b0b91865f1b9a7ab389d2dfe532f4998843e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 11:40:55 +0200 Subject: [PATCH 01/24] feat(grist): add constants and init scaffold --- .../mascarade_eval/grist/__init__.py | 36 +++++++++++++++++++ mascarade-eval/tests/test_grist_constants.py | 23 ++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 mascarade-eval/mascarade_eval/grist/__init__.py create mode 100644 mascarade-eval/tests/test_grist_constants.py diff --git a/mascarade-eval/mascarade_eval/grist/__init__.py b/mascarade-eval/mascarade_eval/grist/__init__.py new file mode 100644 index 0000000..ac27099 --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/__init__.py @@ -0,0 +1,36 @@ +# mascarade_eval/grist/__init__.py +"""Grist-backed dataset management for the mascarade training corpus. + +Grist is the canonical source of truth. Mining ingests in insert-only +mode (human edits in Grist are never overwritten); training and HF +publication consume a deterministic export. +""" +from pathlib import Path + +GRIST_BASE = "https://grist.saillant.cc/api" + +# Known existing doc (held-out eval). The training doc ID is provided at +# runtime via --doc or the GRIST_DOC_TRAINING env/file value. +DOC_HELDOUT = "eGbbrpzN3TeLq3sUd2YFA2" + +KEY_FILE = Path.home() / ".config" / "electron-rare" / "grist.env" + +TRAINING_TABLE = "Mascarade_Training" +REGISTRY_TABLE = "Datasets_Registry" +EXPORTS_TABLE = "Exports" + +TRAINING_COLUMNS = ( + "item_key", "domain", "system", "user_msg", "assistant_msg", + "extra_turns", "source", "exclure", "notes", +) +REGISTRY_COLUMNS = ( + "name", "family", "domain", "hf_dataset_id", "license", + "n_items", "notes", +) +EXPORTS_COLUMNS = ( + "export_id", "domain", "created_at", "n_items", "content_hash", + "output_file", "hf_dataset_id", +) + +_ROOT = Path(__file__).resolve().parent.parent.parent # .../mascarade-eval +EXPORTS_DIR = _ROOT / "exports" diff --git a/mascarade-eval/tests/test_grist_constants.py b/mascarade-eval/tests/test_grist_constants.py new file mode 100644 index 0000000..0625521 --- /dev/null +++ b/mascarade-eval/tests/test_grist_constants.py @@ -0,0 +1,23 @@ +# tests/test_grist_constants.py +from mascarade_eval import grist + + +def test_constants_present(): + assert grist.GRIST_BASE == "https://grist.saillant.cc/api" + assert grist.DOC_HELDOUT == "eGbbrpzN3TeLq3sUd2YFA2" + assert grist.TRAINING_TABLE == "Mascarade_Training" + assert grist.REGISTRY_TABLE == "Datasets_Registry" + assert grist.EXPORTS_TABLE == "Exports" + + +def test_training_columns_shape(): + assert grist.TRAINING_COLUMNS == ( + "item_key", "domain", "system", "user_msg", "assistant_msg", + "extra_turns", "source", "exclure", "notes", + ) + assert "exclure" in grist.TRAINING_COLUMNS + + +def test_exports_dir_under_repo_root(): + # EXPORTS_DIR sits next to the heldout/ dir at the repo root. + assert grist.EXPORTS_DIR.name == "exports" From 42e7af7f8e332cf9b5e943b7886b58ea5ef72bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:05:24 +0200 Subject: [PATCH 02/24] feat(grist): add Grist REST client --- mascarade-eval/mascarade_eval/grist/client.py | 104 ++++++++++++++++++ mascarade-eval/tests/test_grist_client.py | 67 +++++++++++ 2 files changed, 171 insertions(+) create mode 100644 mascarade-eval/mascarade_eval/grist/client.py create mode 100644 mascarade-eval/tests/test_grist_client.py diff --git a/mascarade-eval/mascarade_eval/grist/client.py b/mascarade-eval/mascarade_eval/grist/client.py new file mode 100644 index 0000000..00d55b3 --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/client.py @@ -0,0 +1,104 @@ +# mascarade_eval/grist/client.py +"""Thin Grist REST client. The HTTP transport is injectable for tests.""" +from __future__ import annotations + +import json +import os +import sys +import urllib.error +import urllib.request + +from . import GRIST_BASE, KEY_FILE + +_BOOL_COLS = {"exclure"} +_INT_COLS = {"n_items", "n_rows"} + + +def _col_type(name: str) -> str: + if name in _BOOL_COLS: + return "Bool" + if name in _INT_COLS: + return "Int" + return "Text" + + +def load_grist_key() -> str: + """Return the Grist API key from env or ~/.config/electron-rare/grist.env.""" + key = os.environ.get("GRIST_API_KEY") + if key: + return key + if KEY_FILE.exists(): + for line in KEY_FILE.read_text().splitlines(): + if line.strip().startswith("GRIST_API_KEY="): + return line.split("=", 1)[1].strip().strip('"') + sys.exit("GRIST_API_KEY not found (env or ~/.config/electron-rare/grist.env)") + + +def load_doc_id(name: str) -> str | None: + """Return a doc ID stored as = in the grist.env file, or None.""" + env = os.environ.get(name) + if env: + return env + if KEY_FILE.exists(): + for line in KEY_FILE.read_text().splitlines(): + if line.strip().startswith(f"{name}="): + return line.split("=", 1)[1].strip().strip('"') + return None + + +def _http_transport(method: str, url: str, key: str, body: dict | None) -> dict: + data = json.dumps(body).encode() if body is not None else None + req = urllib.request.Request( + url, data=data, method=method, + headers={"Authorization": f"Bearer {key}", + "Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + raw = resp.read().decode("utf-8", "replace") + return json.loads(raw) if raw else {} + except urllib.error.HTTPError as exc: + detail = exc.read().decode("utf-8", "replace")[:300] + raise RuntimeError(f"Grist {method} {url} -> HTTP {exc.code}: {detail}") + + +class GristClient: + """Records-level access to one Grist document.""" + + def __init__(self, doc_id: str, key: str, transport=_http_transport): + self.doc_id = doc_id + self.key = key + self._transport = transport + + @classmethod + def from_env(cls, doc_id: str) -> "GristClient": + return cls(doc_id, load_grist_key()) + + def _api(self, method: str, path: str, body: dict | None = None) -> dict: + return self._transport(method, f"{GRIST_BASE}{path}", self.key, body) + + def list_tables(self) -> set[str]: + resp = self._api("GET", f"/docs/{self.doc_id}/tables") + return {t["id"] for t in resp.get("tables", [])} + + def create_table(self, table: str, columns: tuple[str, ...]) -> None: + cols = [{"id": c, "fields": {"label": c, "type": _col_type(c)}} + for c in columns] + self._api("POST", f"/docs/{self.doc_id}/tables", + {"tables": [{"id": table, "columns": cols}]}) + + def ensure_table(self, table: str, columns: tuple[str, ...]) -> None: + if table not in self.list_tables(): + self.create_table(table, columns) + + def fetch_records(self, table: str) -> list[dict]: + resp = self._api("GET", f"/docs/{self.doc_id}/tables/{table}/records") + return [{"_id": r["id"], **r["fields"]} for r in resp.get("records", [])] + + def add_records(self, table: str, rows: list[dict]) -> None: + if not rows: + return + for start in range(0, len(rows), 100): + chunk = rows[start:start + 100] + self._api("POST", f"/docs/{self.doc_id}/tables/{table}/records", + {"records": [{"fields": r} for r in chunk]}) diff --git a/mascarade-eval/tests/test_grist_client.py b/mascarade-eval/tests/test_grist_client.py new file mode 100644 index 0000000..0bbe9a1 --- /dev/null +++ b/mascarade-eval/tests/test_grist_client.py @@ -0,0 +1,67 @@ +# tests/test_grist_client.py +import pytest +from mascarade_eval.grist.client import GristClient, load_grist_key + + +def _recording_transport(log): + def transport(method, url, key, body): + log.append((method, url, body)) + if method == "GET" and url.endswith("/tables"): + return {"tables": [{"id": "Existing"}]} + if method == "GET" and "/records" in url: + return {"records": [ + {"id": 1, "fields": {"item_key": "k1", "exclure": False}}, + {"id": 2, "fields": {"item_key": "k2", "exclure": True}}, + ]} + return {} + return transport + + +def test_list_tables_returns_ids(): + log = [] + c = GristClient("doc1", "key1", transport=_recording_transport(log)) + assert c.list_tables() == {"Existing"} + assert log[0][0] == "GET" + assert log[0][1] == "https://grist.saillant.cc/api/docs/doc1/tables" + + +def test_fetch_records_flattens_id_into_fields(): + c = GristClient("doc1", "key1", transport=_recording_transport([])) + rows = c.fetch_records("Mascarade_Training") + assert rows == [ + {"_id": 1, "item_key": "k1", "exclure": False}, + {"_id": 2, "item_key": "k2", "exclure": True}, + ] + + +def test_add_records_posts_fields_wrapped(): + log = [] + c = GristClient("doc1", "key1", transport=_recording_transport(log)) + c.add_records("T", [{"a": "1"}, {"a": "2"}]) + method, url, body = log[-1] + assert method == "POST" + assert url.endswith("/docs/doc1/tables/T/records") + assert body == {"records": [{"fields": {"a": "1"}}, + {"fields": {"a": "2"}}]} + + +def test_add_records_noop_on_empty(): + log = [] + c = GristClient("doc1", "key1", transport=_recording_transport(log)) + c.add_records("T", []) + assert log == [] + + +def test_create_table_types_exclure_as_bool(): + log = [] + c = GristClient("doc1", "key1", transport=_recording_transport(log)) + c.create_table("T", ("item_key", "exclure", "n_items")) + method, url, body = log[-1] + assert method == "POST" + cols = {col["id"]: col["fields"]["type"] for col in body["tables"][0]["columns"]} + assert cols == {"item_key": "Text", "exclure": "Bool", "n_items": "Int"} + + +def test_load_grist_key_prefers_env(monkeypatch): + monkeypatch.setenv("GRIST_API_KEY", "env-key") + assert load_grist_key() == "env-key" From 14dae57282897ad94a7fbb20775c80eb3751a5d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:13:34 +0200 Subject: [PATCH 03/24] feat(grist): message flatten/rebuild transforms --- .../mascarade_eval/grist/migrate.py | 65 +++++++++++++++++ .../tests/test_grist_migrate_transforms.py | 73 +++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 mascarade-eval/mascarade_eval/grist/migrate.py create mode 100644 mascarade-eval/tests/test_grist_migrate_transforms.py diff --git a/mascarade-eval/mascarade_eval/grist/migrate.py b/mascarade-eval/mascarade_eval/grist/migrate.py new file mode 100644 index 0000000..7f434df --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/migrate.py @@ -0,0 +1,65 @@ +# mascarade_eval/grist/migrate.py +"""Backfill the training corpus from HuggingFace into Grist. + +Pure transforms (flatten_messages / rebuild_messages) are unit-tested; +migrate_domain wires them to HF download + insert-only ingestion. +""" +from __future__ import annotations + +import json + +_ROLE_NORMAL = {"user": "user", "human": "user", + "assistant": "assistant", "gpt": "assistant", + "system": "system"} + + +def _normalize(record: dict) -> list[dict]: + """Return [{role, content}, ...] from an OpenAI or ShareGPT record.""" + raw = record.get("messages") or record.get("conversations") or [] + out: list[dict] = [] + for m in raw: + if not isinstance(m, dict): + continue + role = _ROLE_NORMAL.get(m.get("role") or m.get("from") or "") + if role is None: + continue + content = m.get("content") or m.get("value") or "" + out.append({"role": role, "content": content}) + return out + + +def flatten_messages(record: dict) -> dict: + """Collapse a chat record into editable columns. + + Single-turn (<=1 system, exactly 1 user, exactly 1 assistant) maps to + system/user_msg/assistant_msg with empty extra_turns. Anything else + keeps the full normalized message list as JSON in extra_turns. + """ + msgs = _normalize(record) + systems = [m for m in msgs if m["role"] == "system"] + users = [m for m in msgs if m["role"] == "user"] + assistants = [m for m in msgs if m["role"] == "assistant"] + single_turn = (len(systems) <= 1 and len(users) == 1 + and len(assistants) == 1 and len(msgs) == len(systems) + 2) + flat = { + "system": systems[0]["content"] if systems else "", + "user_msg": users[0]["content"] if users else "", + "assistant_msg": assistants[0]["content"] if assistants else "", + "extra_turns": "", + } + if not single_turn: + flat["extra_turns"] = json.dumps(msgs, ensure_ascii=False) + return flat + + +def rebuild_messages(row: dict) -> dict: + """Inverse of flatten_messages: return {"messages": [...]}.""" + extra = row.get("extra_turns") or "" + if extra: + return {"messages": json.loads(extra)} + msgs: list[dict] = [] + if row.get("system"): + msgs.append({"role": "system", "content": row["system"]}) + msgs.append({"role": "user", "content": row.get("user_msg", "")}) + msgs.append({"role": "assistant", "content": row.get("assistant_msg", "")}) + return {"messages": msgs} diff --git a/mascarade-eval/tests/test_grist_migrate_transforms.py b/mascarade-eval/tests/test_grist_migrate_transforms.py new file mode 100644 index 0000000..d81e547 --- /dev/null +++ b/mascarade-eval/tests/test_grist_migrate_transforms.py @@ -0,0 +1,73 @@ +# tests/test_grist_migrate_transforms.py +import json +from mascarade_eval.grist.migrate import flatten_messages, rebuild_messages + + +def test_flatten_single_turn_openai(): + rec = {"messages": [ + {"role": "system", "content": "S"}, + {"role": "user", "content": "Q"}, + {"role": "assistant", "content": "A"}, + ]} + flat = flatten_messages(rec) + assert flat == {"system": "S", "user_msg": "Q", + "assistant_msg": "A", "extra_turns": ""} + + +def test_flatten_single_turn_sharegpt(): + rec = {"conversations": [ + {"from": "human", "value": "Q"}, + {"from": "gpt", "value": "A"}, + ]} + flat = flatten_messages(rec) + assert flat == {"system": "", "user_msg": "Q", + "assistant_msg": "A", "extra_turns": ""} + + +def test_flatten_multi_turn_keeps_extra_turns(): + rec = {"messages": [ + {"role": "user", "content": "Q1"}, + {"role": "assistant", "content": "A1"}, + {"role": "user", "content": "Q2"}, + {"role": "assistant", "content": "A2"}, + ]} + flat = flatten_messages(rec) + assert flat["user_msg"] == "Q1" + assert flat["assistant_msg"] == "A1" + parsed = json.loads(flat["extra_turns"]) + assert parsed == [ + {"role": "user", "content": "Q1"}, + {"role": "assistant", "content": "A1"}, + {"role": "user", "content": "Q2"}, + {"role": "assistant", "content": "A2"}, + ] + + +def test_rebuild_single_turn_round_trip(): + rec = {"messages": [ + {"role": "system", "content": "S"}, + {"role": "user", "content": "Q"}, + {"role": "assistant", "content": "A"}, + ]} + flat = flatten_messages(rec) + assert rebuild_messages(flat) == rec + + +def test_rebuild_single_turn_no_system(): + flat = {"system": "", "user_msg": "Q", + "assistant_msg": "A", "extra_turns": ""} + assert rebuild_messages(flat) == {"messages": [ + {"role": "user", "content": "Q"}, + {"role": "assistant", "content": "A"}, + ]} + + +def test_rebuild_multi_turn_uses_extra_turns(): + rec = {"messages": [ + {"role": "user", "content": "Q1"}, + {"role": "assistant", "content": "A1"}, + {"role": "user", "content": "Q2"}, + {"role": "assistant", "content": "A2"}, + ]} + flat = flatten_messages(rec) + assert rebuild_messages(flat) == rec From ae73ef73ffd34724daf17082fc218bbb41ebf001 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:21:23 +0200 Subject: [PATCH 04/24] feat(grist): add insert-only ingestion core Implement item_key (domain-prefixed SHA1), compute_delta (skips existing keys + dedupes within batch), and ingest_rows (ensure-table, fetch existing keys, insert delta only). dry_run=True computes without writing. Add FakeClient fixture in conftest.py for reuse in tasks 5/6/9. --- mascarade-eval/mascarade_eval/grist/ingest.py | 42 ++++++++++++ mascarade-eval/tests/conftest.py | 36 +++++++++++ mascarade-eval/tests/test_grist_ingest.py | 64 +++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 mascarade-eval/mascarade_eval/grist/ingest.py create mode 100644 mascarade-eval/tests/conftest.py create mode 100644 mascarade-eval/tests/test_grist_ingest.py diff --git a/mascarade-eval/mascarade_eval/grist/ingest.py b/mascarade-eval/mascarade_eval/grist/ingest.py new file mode 100644 index 0000000..9c391f9 --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/ingest.py @@ -0,0 +1,42 @@ +# mascarade_eval/grist/ingest.py +"""Insert-only ingestion into Grist. + +This module holds the source-of-truth invariant: an existing item row is +NEVER updated, so human edits in Grist survive re-ingestion. +""" +from __future__ import annotations + +import hashlib + + +def item_key(domain: str, text: str) -> str: + """Stable key for an item: domain prefix + SHA1 of its text.""" + digest = hashlib.sha1(text.encode("utf-8")).hexdigest()[:10] + return f"{domain}-{digest}" + + +def compute_delta(existing_keys: set[str], incoming: list[dict], + key_field: str = "item_key") -> list[dict]: + """Return only rows whose key is absent from Grist and unseen in batch.""" + seen: set[str] = set(existing_keys) + delta: list[dict] = [] + for row in incoming: + key = row[key_field] + if key in seen: + continue + seen.add(key) + delta.append(row) + return delta + + +def ingest_rows(client, table: str, columns: tuple[str, ...], + rows: list[dict], key_field: str = "item_key", + dry_run: bool = False) -> dict: + """Insert-only ingestion. Returns {"inserted": n, "skipped": n}.""" + client.ensure_table(table, columns) + existing = {r[key_field] for r in client.fetch_records(table) + if key_field in r} + delta = compute_delta(existing, rows, key_field) + if not dry_run: + client.add_records(table, delta) + return {"inserted": len(delta), "skipped": len(rows) - len(delta)} diff --git a/mascarade-eval/tests/conftest.py b/mascarade-eval/tests/conftest.py new file mode 100644 index 0000000..ff7c203 --- /dev/null +++ b/mascarade-eval/tests/conftest.py @@ -0,0 +1,36 @@ +# tests/conftest.py +import pytest + + +class FakeClient: + """In-memory stand-in for GristClient. Records all writes.""" + + def __init__(self, tables=None, records=None): + self.doc_id = "fake-doc" + self._tables = set(tables or []) + self._records = {t: list(rs) for t, rs in (records or {}).items()} + self.created = [] + self.added = {} + + def list_tables(self): + return set(self._tables) + + def create_table(self, table, columns): + self._tables.add(table) + self.created.append((table, tuple(columns))) + + def ensure_table(self, table, columns): + if table not in self._tables: + self.create_table(table, columns) + + def fetch_records(self, table): + return [dict(r) for r in self._records.get(table, [])] + + def add_records(self, table, rows): + self.added.setdefault(table, []).extend(rows) + self._records.setdefault(table, []).extend(rows) + + +@pytest.fixture +def fake_client(): + return FakeClient diff --git a/mascarade-eval/tests/test_grist_ingest.py b/mascarade-eval/tests/test_grist_ingest.py new file mode 100644 index 0000000..ed32b2d --- /dev/null +++ b/mascarade-eval/tests/test_grist_ingest.py @@ -0,0 +1,64 @@ +# tests/test_grist_ingest.py +from mascarade_eval.grist import TRAINING_TABLE, TRAINING_COLUMNS +from mascarade_eval.grist.ingest import item_key, compute_delta, ingest_rows + + +def test_item_key_is_deterministic_and_domain_prefixed(): + k1 = item_key("kicad", "How do I add a net class?") + k2 = item_key("kicad", "How do I add a net class?") + assert k1 == k2 + assert k1.startswith("kicad-") + + +def test_item_key_differs_by_text(): + assert item_key("kicad", "A") != item_key("kicad", "B") + + +def test_compute_delta_skips_existing_keys(): + existing = {"kicad-aaaaaaaaaa"} + incoming = [ + {"item_key": "kicad-aaaaaaaaaa", "user_msg": "old"}, + {"item_key": "kicad-bbbbbbbbbb", "user_msg": "new"}, + ] + delta = compute_delta(existing, incoming) + assert [r["item_key"] for r in delta] == ["kicad-bbbbbbbbbb"] + + +def test_compute_delta_dedupes_within_batch(): + incoming = [ + {"item_key": "k1", "user_msg": "x"}, + {"item_key": "k1", "user_msg": "x-dup"}, + ] + delta = compute_delta(set(), incoming) + assert len(delta) == 1 + assert delta[0]["user_msg"] == "x" + + +def test_ingest_rows_inserts_only_new(fake_client): + client = fake_client( + tables=[TRAINING_TABLE], + records={TRAINING_TABLE: [{"item_key": "k1", "user_msg": "kept"}]}, + ) + rows = [ + {"item_key": "k1", "user_msg": "WOULD OVERWRITE"}, + {"item_key": "k2", "user_msg": "fresh"}, + ] + report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows) + assert report == {"inserted": 1, "skipped": 1} + assert client.added[TRAINING_TABLE] == [{"item_key": "k2", + "user_msg": "fresh"}] + + +def test_ingest_rows_creates_table_when_absent(fake_client): + client = fake_client(tables=[]) + ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, + [{"item_key": "k1"}]) + assert client.created == [(TRAINING_TABLE, TRAINING_COLUMNS)] + + +def test_ingest_rows_dry_run_writes_nothing(fake_client): + client = fake_client(tables=[TRAINING_TABLE]) + report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, + [{"item_key": "k1"}], dry_run=True) + assert report == {"inserted": 1, "skipped": 0} + assert client.added == {} From a168b17a3b81beda9ac079ffb516b5f27c21aa62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:27:14 +0200 Subject: [PATCH 05/24] feat(grist): add deterministic export canonical_jsonl sorts by item_key and uses sort_keys=True so the same Grist state always produces the same SHA256 digest. export_domain filters exclure rows, writes a hashed .jsonl snapshot, and journals one row to the Exports table. dry_run=True computes the report without any I/O. --- mascarade-eval/mascarade_eval/grist/export.py | 64 +++++++++++++++++ mascarade-eval/tests/test_grist_export.py | 69 +++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 mascarade-eval/mascarade_eval/grist/export.py create mode 100644 mascarade-eval/tests/test_grist_export.py diff --git a/mascarade-eval/mascarade_eval/grist/export.py b/mascarade-eval/mascarade_eval/grist/export.py new file mode 100644 index 0000000..61b943e --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/export.py @@ -0,0 +1,64 @@ +# mascarade_eval/grist/export.py +"""Deterministic Grist -> .jsonl snapshot export, journaled in Exports.""" +from __future__ import annotations + +import datetime +import hashlib +import json +from pathlib import Path + +from . import EXPORTS_COLUMNS, EXPORTS_TABLE, TRAINING_TABLE +from .migrate import rebuild_messages + + +def canonical_jsonl(keyed_rows: list[tuple[str, dict]]) -> str: + """Serialize (sort_key, object) pairs to JSONL ordered by sort_key. + + Same input set -> same bytes, regardless of input order. The sort key + itself is not written; only the object is. + """ + ordered = sorted(keyed_rows, key=lambda kv: kv[0]) + return "\n".join(json.dumps(obj, ensure_ascii=False, sort_keys=True) + for _, obj in ordered) + + +def content_hash(text: str) -> str: + """SHA256 hex digest of the canonical snapshot text.""" + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def _timestamp() -> str: + return datetime.datetime.now(datetime.UTC).strftime("%Y%m%dT%H%M%SZ") + + +def export_domain(client, domain: str, out_dir: Path, + dry_run: bool = False) -> dict: + """Export one domain's non-excluded training rows to a hashed snapshot. + + Returns a report dict matching the Exports row written to Grist. + """ + rows = [r for r in client.fetch_records(TRAINING_TABLE) + if r.get("domain") == domain and not r.get("exclure")] + payload = canonical_jsonl( + [(r.get("item_key", ""), rebuild_messages(r)) for r in rows]) + digest = content_hash(payload) + stamp = _timestamp() + filename = f"{domain}.{stamp}.jsonl" + report = { + "export_id": f"{domain}-{stamp}", + "domain": domain, + "created_at": stamp, + "n_items": len(rows), + "content_hash": digest, + "output_file": filename, + "hf_dataset_id": "", + } + if dry_run: + return report + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + (out_dir / filename).write_text(payload + ("\n" if payload else ""), + encoding="utf-8") + client.ensure_table(EXPORTS_TABLE, EXPORTS_COLUMNS) + client.add_records(EXPORTS_TABLE, [report]) + return report diff --git a/mascarade-eval/tests/test_grist_export.py b/mascarade-eval/tests/test_grist_export.py new file mode 100644 index 0000000..3c8321e --- /dev/null +++ b/mascarade-eval/tests/test_grist_export.py @@ -0,0 +1,69 @@ +# tests/test_grist_export.py +import json +from mascarade_eval.grist import TRAINING_TABLE, EXPORTS_TABLE +from mascarade_eval.grist.export import ( + canonical_jsonl, content_hash, export_domain, +) + + +def test_canonical_jsonl_sorts_by_key(): + keyed = [("b", {"v": 2}), ("a", {"v": 1})] + lines = canonical_jsonl(keyed).splitlines() + assert json.loads(lines[0]) == {"v": 1} + assert json.loads(lines[1]) == {"v": 2} + + +def test_canonical_jsonl_is_order_independent(): + a = [("x", {"v": 1}), ("y", {"v": 2})] + b = [("y", {"v": 2}), ("x", {"v": 1})] + assert canonical_jsonl(a) == canonical_jsonl(b) + + +def test_canonical_jsonl_omits_the_sort_key_from_output(): + text = canonical_jsonl([("x", {"v": 1})]) + assert json.loads(text) == {"v": 1} # no "x", no item_key + + +def test_content_hash_stable(): + text = canonical_jsonl([("x", {"v": 1})]) + assert content_hash(text) == content_hash(text) + assert len(content_hash(text)) == 64 + + +def test_export_domain_filters_excluded_and_writes_file(fake_client, tmp_path): + client = fake_client( + tables=[TRAINING_TABLE], + records={TRAINING_TABLE: [ + {"_id": 1, "item_key": "kicad-1", "domain": "kicad", + "user_msg": "Q1", "assistant_msg": "A1", "system": "", + "extra_turns": "", "source": "", "exclure": False, "notes": ""}, + {"_id": 2, "item_key": "kicad-2", "domain": "kicad", + "user_msg": "Q2", "assistant_msg": "A2", "system": "", + "extra_turns": "", "source": "", "exclure": True, "notes": ""}, + ]}, + ) + report = export_domain(client, "kicad", out_dir=tmp_path) + assert report["n_items"] == 1 # the excluded row is dropped + out_file = tmp_path / report["output_file"] + assert out_file.exists() + written = [json.loads(ln) for ln in out_file.read_text().splitlines()] + assert written == [{"messages": [ + {"role": "user", "content": "Q1"}, + {"role": "assistant", "content": "A1"}, + ]}] + assert client.added[EXPORTS_TABLE][0]["domain"] == "kicad" + assert client.added[EXPORTS_TABLE][0]["content_hash"] == report["content_hash"] + + +def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path): + client = fake_client( + tables=[TRAINING_TABLE], + records={TRAINING_TABLE: [ + {"_id": 1, "item_key": "kicad-1", "domain": "kicad", + "user_msg": "Q", "assistant_msg": "A", "system": "", + "extra_turns": "", "exclure": False}]}, + ) + report = export_domain(client, "kicad", out_dir=tmp_path, dry_run=True) + assert report["n_items"] == 1 + assert list(tmp_path.iterdir()) == [] + assert client.added == {} From a34041e865383447ad517d3bec5ad8fdd54dc7de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:33:10 +0200 Subject: [PATCH 06/24] fix(grist): drop orphan snapshot on export failure --- mascarade-eval/mascarade_eval/grist/export.py | 13 ++++++++---- mascarade-eval/tests/test_grist_export.py | 20 +++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/mascarade-eval/mascarade_eval/grist/export.py b/mascarade-eval/mascarade_eval/grist/export.py index 61b943e..ec28cc5 100644 --- a/mascarade-eval/mascarade_eval/grist/export.py +++ b/mascarade-eval/mascarade_eval/grist/export.py @@ -57,8 +57,13 @@ def export_domain(client, domain: str, out_dir: Path, return report out_dir = Path(out_dir) out_dir.mkdir(parents=True, exist_ok=True) - (out_dir / filename).write_text(payload + ("\n" if payload else ""), - encoding="utf-8") - client.ensure_table(EXPORTS_TABLE, EXPORTS_COLUMNS) - client.add_records(EXPORTS_TABLE, [report]) + out_path = out_dir / filename + out_path.write_text(payload + ("\n" if payload else ""), + encoding="utf-8") + try: + client.ensure_table(EXPORTS_TABLE, EXPORTS_COLUMNS) + client.add_records(EXPORTS_TABLE, [report]) + except Exception: + out_path.unlink(missing_ok=True) + raise return report diff --git a/mascarade-eval/tests/test_grist_export.py b/mascarade-eval/tests/test_grist_export.py index 3c8321e..f9baea9 100644 --- a/mascarade-eval/tests/test_grist_export.py +++ b/mascarade-eval/tests/test_grist_export.py @@ -1,5 +1,6 @@ # tests/test_grist_export.py import json +import pytest from mascarade_eval.grist import TRAINING_TABLE, EXPORTS_TABLE from mascarade_eval.grist.export import ( canonical_jsonl, content_hash, export_domain, @@ -67,3 +68,22 @@ def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path): assert report["n_items"] == 1 assert list(tmp_path.iterdir()) == [] assert client.added == {} + + +def test_export_domain_removes_file_when_grist_logging_fails( + fake_client, tmp_path): + client = fake_client( + tables=[TRAINING_TABLE], + records={TRAINING_TABLE: [ + {"_id": 1, "item_key": "kicad-1", "domain": "kicad", + "user_msg": "Q", "assistant_msg": "A", "system": "", + "extra_turns": "", "exclure": False}]}, + ) + + def boom(table, rows): + raise RuntimeError("grist down") + + client.add_records = boom + with pytest.raises(RuntimeError, match="grist down"): + export_domain(client, "kicad", out_dir=tmp_path) + assert list(tmp_path.iterdir()) == [] # no orphaned snapshot file From 2d572ef72d4dc5e45ee6d01fbe2990faf6940e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:39:22 +0200 Subject: [PATCH 07/24] feat(grist): wire HF backfill into ingestion --- .../mascarade_eval/grist/migrate.py | 62 +++++++++++++++++++ .../tests/test_grist_migrate_domain.py | 40 ++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 mascarade-eval/tests/test_grist_migrate_domain.py diff --git a/mascarade-eval/mascarade_eval/grist/migrate.py b/mascarade-eval/mascarade_eval/grist/migrate.py index 7f434df..c804030 100644 --- a/mascarade-eval/mascarade_eval/grist/migrate.py +++ b/mascarade-eval/mascarade_eval/grist/migrate.py @@ -8,6 +8,10 @@ import json +from mascarade_eval import HF_ORG +from . import REGISTRY_COLUMNS, REGISTRY_TABLE, TRAINING_COLUMNS, TRAINING_TABLE +from .ingest import ingest_rows, item_key + _ROLE_NORMAL = {"user": "user", "human": "user", "assistant": "assistant", "gpt": "assistant", "system": "system"} @@ -63,3 +67,61 @@ def rebuild_messages(row: dict) -> dict: msgs.append({"role": "user", "content": row.get("user_msg", "")}) msgs.append({"role": "assistant", "content": row.get("assistant_msg", "")}) return {"messages": msgs} + + +def _download_training_records(domain: str) -> list[dict]: + """Download _chat.jsonl from HF and parse it into records.""" + from huggingface_hub import hf_hub_download + path = hf_hub_download( + repo_id=f"{HF_ORG}/mascarade-{domain}-dataset", + filename=f"{domain}_chat.jsonl", + repo_type="dataset", + ) + records: list[dict] = [] + with open(path, encoding="utf-8") as fh: + for line in fh: + line = line.strip() + if line: + records.append(json.loads(line)) + return records + + +def _to_training_row(domain: str, record: dict) -> dict: + flat = flatten_messages(record) + return { + "item_key": item_key(domain, flat["user_msg"]), + "domain": domain, + "system": flat["system"], + "user_msg": flat["user_msg"], + "assistant_msg": flat["assistant_msg"], + "extra_turns": flat["extra_turns"], + "source": f"{HF_ORG}/mascarade-{domain}-dataset", + "exclure": False, + "notes": "", + } + + +def migrate_domain(client, domain: str, records: list[dict] | None = None, + dry_run: bool = False) -> dict: + """Backfill one domain's HF training data into Grist (insert-only). + + Pass `records` to skip the HF download (used by tests). + """ + if records is None: + records = _download_training_records(domain) + rows = [_to_training_row(domain, r) for r in records] + report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows, + dry_run=dry_run) + if not dry_run: + client.ensure_table(REGISTRY_TABLE, REGISTRY_COLUMNS) + client.add_records(REGISTRY_TABLE, [{ + "name": f"mascarade-{domain}-train", + "family": "mascarade-training", + "domain": domain, + "hf_dataset_id": f"{HF_ORG}/mascarade-{domain}-dataset", + "license": "CC-BY-SA-4.0", + "n_items": len(rows), + "notes": f"backfilled {report['inserted']} new, " + f"{report['skipped']} already present", + }]) + return report diff --git a/mascarade-eval/tests/test_grist_migrate_domain.py b/mascarade-eval/tests/test_grist_migrate_domain.py new file mode 100644 index 0000000..64a365e --- /dev/null +++ b/mascarade-eval/tests/test_grist_migrate_domain.py @@ -0,0 +1,40 @@ +# tests/test_grist_migrate_domain.py +from mascarade_eval.grist import TRAINING_TABLE, REGISTRY_TABLE +from mascarade_eval.grist.migrate import migrate_domain + + +def test_migrate_domain_ingests_flattened_rows(fake_client): + client = fake_client(tables=[]) + records = [ + {"messages": [{"role": "user", "content": "Q1"}, + {"role": "assistant", "content": "A1"}]}, + {"messages": [{"role": "user", "content": "Q2"}, + {"role": "assistant", "content": "A2"}]}, + ] + report = migrate_domain(client, "kicad", records=records) + assert report["inserted"] == 2 + added = client.added[TRAINING_TABLE] + assert {r["user_msg"] for r in added} == {"Q1", "Q2"} + assert all(r["domain"] == "kicad" for r in added) + assert all(r["item_key"].startswith("kicad-") for r in added) + assert all(r["exclure"] is False for r in added) + + +def test_migrate_domain_is_idempotent(fake_client): + client = fake_client(tables=[]) + records = [{"messages": [{"role": "user", "content": "Q"}, + {"role": "assistant", "content": "A"}]}] + migrate_domain(client, "kicad", records=records) + report2 = migrate_domain(client, "kicad", records=records) + assert report2 == {"inserted": 0, "skipped": 1} + + +def test_migrate_domain_writes_registry_row(fake_client): + client = fake_client(tables=[]) + records = [{"messages": [{"role": "user", "content": "Q"}, + {"role": "assistant", "content": "A"}]}] + migrate_domain(client, "kicad", records=records) + reg = client.added[REGISTRY_TABLE] + assert reg[0]["name"] == "mascarade-kicad-train" + assert reg[0]["family"] == "mascarade-training" + assert reg[0]["hf_dataset_id"] == "Ailiance-fr/mascarade-kicad-dataset" From 0d86e6766def8454afbab3c8611a120e28de8a22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:46:24 +0200 Subject: [PATCH 08/24] feat(grist): add HuggingFace snapshot publisher --- .../mascarade_eval/grist/publish.py | 31 +++++++++++++++++ mascarade-eval/tests/test_grist_publish.py | 34 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 mascarade-eval/mascarade_eval/grist/publish.py create mode 100644 mascarade-eval/tests/test_grist_publish.py diff --git a/mascarade-eval/mascarade_eval/grist/publish.py b/mascarade-eval/mascarade_eval/grist/publish.py new file mode 100644 index 0000000..22ed14c --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/publish.py @@ -0,0 +1,31 @@ +# mascarade_eval/grist/publish.py +"""Publish an exported snapshot to its HuggingFace dataset repo.""" +from __future__ import annotations + +from pathlib import Path + + +def _hf_upload(*, path_or_fileobj, path_in_repo, repo_id, repo_type, + commit_message): + from huggingface_hub import upload_file + upload_file(path_or_fileobj=path_or_fileobj, path_in_repo=path_in_repo, + repo_id=repo_id, repo_type=repo_type, + commit_message=commit_message) + + +def publish_snapshot(snapshot_path: str, hf_dataset_id: str, + filename: str, uploader=_hf_upload) -> None: + """Upload one exported .jsonl snapshot to its HF dataset repo. + + `uploader` is injected for testing; production uses huggingface_hub. + """ + path = Path(snapshot_path) + if not path.exists(): + raise FileNotFoundError(f"snapshot not found: {snapshot_path}") + uploader( + path_or_fileobj=str(path), + path_in_repo=filename, + repo_id=hf_dataset_id, + repo_type="dataset", + commit_message=f"dataset: refresh {filename} from Grist export", + ) diff --git a/mascarade-eval/tests/test_grist_publish.py b/mascarade-eval/tests/test_grist_publish.py new file mode 100644 index 0000000..422ef04 --- /dev/null +++ b/mascarade-eval/tests/test_grist_publish.py @@ -0,0 +1,34 @@ +# tests/test_grist_publish.py +import pytest +from mascarade_eval.grist.publish import publish_snapshot + + +def test_publish_snapshot_uploads_with_expected_args(tmp_path): + snap = tmp_path / "kicad.20260519T120000Z.jsonl" + snap.write_text('{"messages": []}\n') + calls = [] + + def fake_upload(*, path_or_fileobj, path_in_repo, repo_id, repo_type, + commit_message): + calls.append({ + "path_or_fileobj": path_or_fileobj, + "path_in_repo": path_in_repo, + "repo_id": repo_id, + "repo_type": repo_type, + "commit_message": commit_message, + }) + + publish_snapshot(str(snap), "Ailiance-fr/mascarade-kicad-dataset", + "kicad_chat.jsonl", uploader=fake_upload) + assert len(calls) == 1 + assert calls[0]["repo_id"] == "Ailiance-fr/mascarade-kicad-dataset" + assert calls[0]["repo_type"] == "dataset" + assert calls[0]["path_in_repo"] == "kicad_chat.jsonl" + assert calls[0]["path_or_fileobj"] == str(snap) + + +def test_publish_snapshot_rejects_missing_file(tmp_path): + with pytest.raises(FileNotFoundError): + publish_snapshot(str(tmp_path / "nope.jsonl"), + "Ailiance-fr/mascarade-kicad-dataset", + "kicad_chat.jsonl", uploader=lambda **k: None) From 2ab86ba8c46c885d17fe370c71c1b2863170a9f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:51:55 +0200 Subject: [PATCH 09/24] feat(grist): add dataset management CLI --- mascarade-eval/mascarade_eval/grist/cli.py | 111 +++++++++++++++++++++ mascarade-eval/tests/test_grist_cli.py | 43 ++++++++ 2 files changed, 154 insertions(+) create mode 100644 mascarade-eval/mascarade_eval/grist/cli.py create mode 100644 mascarade-eval/tests/test_grist_cli.py diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py new file mode 100644 index 0000000..aab2795 --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/cli.py @@ -0,0 +1,111 @@ +# mascarade_eval/grist/cli.py +"""CLI for Grist-backed dataset management: ingest / export / migrate / publish. + +Run: python -m mascarade_eval.grist.cli [options] +""" +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +from . import EXPORTS_DIR, TRAINING_COLUMNS, TRAINING_TABLE +from .client import GristClient, load_doc_id +from .export import export_domain +from .ingest import item_key, ingest_rows +from .migrate import flatten_messages, migrate_domain +from .publish import publish_snapshot + + +def build_parser() -> argparse.ArgumentParser: + ap = argparse.ArgumentParser(prog="grist-dataset", description=__doc__) + sub = ap.add_subparsers(dest="command", required=True) + + p_ing = sub.add_parser("ingest", help="insert-only ingest a .jsonl") + p_ing.add_argument("--doc") + p_ing.add_argument("--jsonl", required=True) + p_ing.add_argument("--domain", required=True) + p_ing.add_argument("--dry-run", action="store_true") + + p_exp = sub.add_parser("export", help="export a domain to a snapshot") + p_exp.add_argument("--doc") + p_exp.add_argument("--domain", required=True) + p_exp.add_argument("--dry-run", action="store_true") + + p_mig = sub.add_parser("migrate", help="backfill a domain from HF") + p_mig.add_argument("--doc") + p_mig.add_argument("--domain", required=True) + p_mig.add_argument("--dry-run", action="store_true") + + p_pub = sub.add_parser("publish", help="upload a snapshot to HF") + p_pub.add_argument("--snapshot", required=True) + p_pub.add_argument("--hf-dataset", required=True) + p_pub.add_argument("--filename", required=True) + + return ap + + +def resolve_doc(doc_arg: str | None) -> str: + """Return the doc ID from --doc or the GRIST_DOC_TRAINING env/file value.""" + if doc_arg: + return doc_arg + doc = load_doc_id("GRIST_DOC_TRAINING") + if not doc: + sys.exit("no doc ID: pass --doc or set GRIST_DOC_TRAINING") + return doc + + +def _ingest_jsonl_rows(domain: str, jsonl_path: str) -> list[dict]: + rows: list[dict] = [] + for line in Path(jsonl_path).read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + record = json.loads(line) + except json.JSONDecodeError as exc: + print(f"[warn] skipped malformed line: {exc}", file=sys.stderr) + continue + flat = flatten_messages(record) + rows.append({ + "item_key": item_key(domain, flat["user_msg"]), + "domain": domain, + "system": flat["system"], + "user_msg": flat["user_msg"], + "assistant_msg": flat["assistant_msg"], + "extra_turns": flat["extra_turns"], + "source": record.get("source", ""), + "exclure": False, + "notes": "", + }) + return rows + + +def main(argv: list[str] | None = None) -> int: + args = build_parser().parse_args(argv) + + if args.command == "publish": + publish_snapshot(args.snapshot, args.hf_dataset, args.filename) + print(f"published {args.snapshot} -> {args.hf_dataset}") + return 0 + + client = GristClient.from_env(resolve_doc(args.doc)) + + if args.command == "ingest": + rows = _ingest_jsonl_rows(args.domain, args.jsonl) + report = ingest_rows(client, TRAINING_TABLE, TRAINING_COLUMNS, rows, + dry_run=args.dry_run) + print(f"ingest {args.domain}: {report}") + elif args.command == "export": + report = export_domain(client, args.domain, EXPORTS_DIR, + dry_run=args.dry_run) + print(f"export {args.domain}: {report}") + elif args.command == "migrate": + report = migrate_domain(client, args.domain, dry_run=args.dry_run) + print(f"migrate {args.domain}: {report}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py new file mode 100644 index 0000000..0886b21 --- /dev/null +++ b/mascarade-eval/tests/test_grist_cli.py @@ -0,0 +1,43 @@ +# tests/test_grist_cli.py +import pytest +from mascarade_eval.grist.cli import build_parser, resolve_doc + + +def test_parser_ingest_requires_doc_and_jsonl(): + ns = build_parser().parse_args( + ["ingest", "--doc", "D", "--jsonl", "mine.jsonl", "--domain", "kicad"]) + assert ns.command == "ingest" + assert ns.doc == "D" + assert ns.jsonl == "mine.jsonl" + assert ns.domain == "kicad" + + +def test_parser_export_accepts_dry_run(): + ns = build_parser().parse_args( + ["export", "--doc", "D", "--domain", "kicad", "--dry-run"]) + assert ns.command == "export" + assert ns.dry_run is True + + +def test_parser_migrate_and_publish(): + p = build_parser() + m = p.parse_args(["migrate", "--doc", "D", "--domain", "kicad"]) + assert m.command == "migrate" + pub = p.parse_args( + ["publish", "--snapshot", "exports/kicad.x.jsonl", + "--hf-dataset", "Ailiance-fr/mascarade-kicad-dataset", + "--filename", "kicad_chat.jsonl"]) + assert pub.command == "publish" + assert pub.hf_dataset == "Ailiance-fr/mascarade-kicad-dataset" + + +def test_resolve_doc_prefers_explicit_arg(): + assert resolve_doc("explicit-id") == "explicit-id" + + +def test_resolve_doc_errors_when_unset(monkeypatch): + monkeypatch.delenv("GRIST_DOC_TRAINING", raising=False) + monkeypatch.setattr("mascarade_eval.grist.cli.load_doc_id", + lambda name: None) + with pytest.raises(SystemExit): + resolve_doc(None) From 50c3c0edd0a8652eb84bf4eb71cfff37bc58d244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 12:56:25 +0200 Subject: [PATCH 10/24] fix(grist): clean exit on missing ingest file --- mascarade-eval/mascarade_eval/grist/cli.py | 13 +++++++++++-- mascarade-eval/tests/test_grist_cli.py | 6 ++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py index aab2795..d92b0d5 100644 --- a/mascarade-eval/mascarade_eval/grist/cli.py +++ b/mascarade-eval/mascarade_eval/grist/cli.py @@ -47,7 +47,10 @@ def build_parser() -> argparse.ArgumentParser: def resolve_doc(doc_arg: str | None) -> str: - """Return the doc ID from --doc or the GRIST_DOC_TRAINING env/file value.""" + """Return the doc ID from --doc or the GRIST_DOC_TRAINING env/file value. + + Exits the program (sys.exit) if neither source provides a doc ID. + """ if doc_arg: return doc_arg doc = load_doc_id("GRIST_DOC_TRAINING") @@ -57,8 +60,14 @@ def resolve_doc(doc_arg: str | None) -> str: def _ingest_jsonl_rows(domain: str, jsonl_path: str) -> list[dict]: + try: + text = Path(jsonl_path).read_text(encoding="utf-8") + except FileNotFoundError: + sys.exit(f"file not found: {jsonl_path}") + except UnicodeDecodeError as exc: + sys.exit(f"cannot decode {jsonl_path}: {exc}") rows: list[dict] = [] - for line in Path(jsonl_path).read_text(encoding="utf-8").splitlines(): + for line in text.splitlines(): line = line.strip() if not line: continue diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py index 0886b21..b131364 100644 --- a/mascarade-eval/tests/test_grist_cli.py +++ b/mascarade-eval/tests/test_grist_cli.py @@ -41,3 +41,9 @@ def test_resolve_doc_errors_when_unset(monkeypatch): lambda name: None) with pytest.raises(SystemExit): resolve_doc(None) + + +def test_ingest_jsonl_rows_exits_on_missing_file(tmp_path): + from mascarade_eval.grist.cli import _ingest_jsonl_rows + with pytest.raises(SystemExit): + _ingest_jsonl_rows("kicad", str(tmp_path / "does-not-exist.jsonl")) From 3423aaaaf9ac84c3ab3d7641475ff5415fcb68a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:04:21 +0200 Subject: [PATCH 11/24] test(grist): add round-trip check and docs --- mascarade-eval/mascarade_eval/grist/README.md | 33 +++++++++++++++++ mascarade-eval/tests/test_grist_roundtrip.py | 37 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 mascarade-eval/mascarade_eval/grist/README.md create mode 100644 mascarade-eval/tests/test_grist_roundtrip.py diff --git a/mascarade-eval/mascarade_eval/grist/README.md b/mascarade-eval/mascarade_eval/grist/README.md new file mode 100644 index 0000000..20998e7 --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/README.md @@ -0,0 +1,33 @@ +# mascarade_eval.grist — Grist-backed dataset management + +Grist is the canonical source of truth for the mascarade training corpus. +Mining ingests in insert-only mode (edits made in Grist are never +overwritten); training and HF publication consume a deterministic export. + +## One-time setup + +1. Create an empty Grist doc "Mascarade Training" at grist.saillant.cc. +2. Add `GRIST_DOC_TRAINING=` to `~/.config/electron-rare/grist.env` + (the file already holds `GRIST_API_KEY`). + +## Commands + +Run with `uv run python -m mascarade_eval.grist.cli `. + +- `migrate --domain kicad` — backfill a domain's HF training data into + Grist (insert-only). Run once per domain to seed the doc. +- `ingest --domain kicad --jsonl mine.jsonl` — insert-only ingest of a + new mining/curation file. Existing rows are never touched. +- `export --domain kicad` — write a hashed `.jsonl` snapshot to + `exports/` and log a row in the `Exports` table. +- `publish --snapshot exports/kicad..jsonl --hf-dataset + Ailiance-fr/mascarade-kicad-dataset --filename kicad_chat.jsonl` — + upload a snapshot to its HF dataset repo. + +Add `--dry-run` to `ingest`, `export`, or `migrate` to preview without +writing to Grist or disk. + +## Human review + +Edit rows directly in the Grist UI. To drop an item from future exports, +tick its `exclure` checkbox — `export` filters those rows out. diff --git a/mascarade-eval/tests/test_grist_roundtrip.py b/mascarade-eval/tests/test_grist_roundtrip.py new file mode 100644 index 0000000..0521cf9 --- /dev/null +++ b/mascarade-eval/tests/test_grist_roundtrip.py @@ -0,0 +1,37 @@ +# tests/test_grist_roundtrip.py +import json +from mascarade_eval.grist.migrate import migrate_domain +from mascarade_eval.grist.export import export_domain + + +def test_migrate_then_export_round_trips(fake_client, tmp_path): + source = [ + {"messages": [{"role": "user", "content": "Q1"}, + {"role": "assistant", "content": "A1"}]}, + {"messages": [{"role": "system", "content": "S"}, + {"role": "user", "content": "Q2"}, + {"role": "assistant", "content": "A2"}]}, + ] + client = fake_client(tables=[]) + migrate_domain(client, "kicad", records=source) + report = export_domain(client, "kicad", out_dir=tmp_path) + + assert report["n_items"] == 2 + out_file = tmp_path / report["output_file"] + exported = [json.loads(ln) for ln in out_file.read_text().splitlines()] + + def norm(msgs): + return sorted(json.dumps(m, sort_keys=True) for m in msgs) + + source_sets = {tuple(norm(r["messages"])) for r in source} + export_sets = {tuple(norm(r["messages"])) for r in exported} + assert source_sets == export_sets + + +def test_double_ingest_inserts_zero_the_second_time(fake_client): + source = [{"messages": [{"role": "user", "content": "Q"}, + {"role": "assistant", "content": "A"}]}] + client = fake_client(tables=[]) + migrate_domain(client, "kicad", records=source) + second = migrate_domain(client, "kicad", records=source) + assert second["inserted"] == 0 From af8a305f05add0be4460b406f2ba0ae957d009fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:14:41 +0200 Subject: [PATCH 12/24] feat(grist): add review-status constants --- .../mascarade_eval/grist/__init__.py | 24 +++++++++++++----- mascarade-eval/tests/test_grist_constants.py | 25 ++++++++++++++++--- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/mascarade-eval/mascarade_eval/grist/__init__.py b/mascarade-eval/mascarade_eval/grist/__init__.py index ac27099..360dbec 100644 --- a/mascarade-eval/mascarade_eval/grist/__init__.py +++ b/mascarade-eval/mascarade_eval/grist/__init__.py @@ -3,15 +3,16 @@ Grist is the canonical source of truth. Mining ingests in insert-only mode (human edits in Grist are never overwritten); training and HF -publication consume a deterministic export. +publication consume a deterministic export of human-validated rows. """ from pathlib import Path GRIST_BASE = "https://grist.saillant.cc/api" -# Known existing doc (held-out eval). The training doc ID is provided at -# runtime via --doc or the GRIST_DOC_TRAINING env/file value. -DOC_HELDOUT = "eGbbrpzN3TeLq3sUd2YFA2" +# Known existing docs. The training doc ID is provided at runtime via +# --doc or the GRIST_DOC_TRAINING env/file value. +DOC_HELDOUT = "eGbbrpzN3TeLq3sUd2YFA2" # ailiance-llm-workflow +DOC_MASCARADE = "dhyrySCayizD1PNqCNhCPN" # mascarade-data KEY_FILE = Path.home() / ".config" / "electron-rare" / "grist.env" @@ -19,10 +20,21 @@ REGISTRY_TABLE = "Datasets_Registry" EXPORTS_TABLE = "Exports" +# Human-review columns appended to every validation-target table. +REVIEW_COLUMNS = ("review_status", "reviewer", "reviewed_at", "review_note") +REVIEW_STATUSES = ("pending", "validated", "rejected", "needs_fix") +REVIEWER_CHOICES = ("clems",) + +# Existing tables that receive the review columns, keyed by doc ID. +REVIEW_TARGETS = { + DOC_HELDOUT: ("Heldout_Items", "Datasets"), + DOC_MASCARADE: ("Mascarade_Eval_Items", "Bench_31_domains"), +} + TRAINING_COLUMNS = ( "item_key", "domain", "system", "user_msg", "assistant_msg", - "extra_turns", "source", "exclure", "notes", -) + "extra_turns", "source", "notes", +) + REVIEW_COLUMNS REGISTRY_COLUMNS = ( "name", "family", "domain", "hf_dataset_id", "license", "n_items", "notes", diff --git a/mascarade-eval/tests/test_grist_constants.py b/mascarade-eval/tests/test_grist_constants.py index 0625521..af5e5d5 100644 --- a/mascarade-eval/tests/test_grist_constants.py +++ b/mascarade-eval/tests/test_grist_constants.py @@ -5,19 +5,36 @@ def test_constants_present(): assert grist.GRIST_BASE == "https://grist.saillant.cc/api" assert grist.DOC_HELDOUT == "eGbbrpzN3TeLq3sUd2YFA2" + assert grist.DOC_MASCARADE == "dhyrySCayizD1PNqCNhCPN" assert grist.TRAINING_TABLE == "Mascarade_Training" assert grist.REGISTRY_TABLE == "Datasets_Registry" assert grist.EXPORTS_TABLE == "Exports" -def test_training_columns_shape(): +def test_review_constants(): + assert grist.REVIEW_COLUMNS == ( + "review_status", "reviewer", "reviewed_at", "review_note") + assert grist.REVIEW_STATUSES == ( + "pending", "validated", "rejected", "needs_fix") + assert grist.REVIEWER_CHOICES == ("clems",) + + +def test_review_targets_cover_both_docs(): + assert grist.REVIEW_TARGETS == { + grist.DOC_HELDOUT: ("Heldout_Items", "Datasets"), + grist.DOC_MASCARADE: ("Mascarade_Eval_Items", "Bench_31_domains"), + } + + +def test_training_columns_end_with_review_columns(): assert grist.TRAINING_COLUMNS == ( "item_key", "domain", "system", "user_msg", "assistant_msg", - "extra_turns", "source", "exclure", "notes", + "extra_turns", "source", "notes", + "review_status", "reviewer", "reviewed_at", "review_note", ) - assert "exclure" in grist.TRAINING_COLUMNS + assert "exclure" not in grist.TRAINING_COLUMNS + assert grist.TRAINING_COLUMNS[-4:] == grist.REVIEW_COLUMNS def test_exports_dir_under_repo_root(): - # EXPORTS_DIR sits next to the heldout/ dir at the repo root. assert grist.EXPORTS_DIR.name == "exports" From 40bd1b293b8dc9aa855fb58e2cda65bf00f35043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:15:23 +0200 Subject: [PATCH 13/24] refactor(grist): producers write review_status --- mascarade-eval/mascarade_eval/grist/cli.py | 2 +- mascarade-eval/mascarade_eval/grist/migrate.py | 2 +- mascarade-eval/tests/test_grist_migrate_domain.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py index d92b0d5..7204a24 100644 --- a/mascarade-eval/mascarade_eval/grist/cli.py +++ b/mascarade-eval/mascarade_eval/grist/cli.py @@ -85,8 +85,8 @@ def _ingest_jsonl_rows(domain: str, jsonl_path: str) -> list[dict]: "assistant_msg": flat["assistant_msg"], "extra_turns": flat["extra_turns"], "source": record.get("source", ""), - "exclure": False, "notes": "", + "review_status": "pending", }) return rows diff --git a/mascarade-eval/mascarade_eval/grist/migrate.py b/mascarade-eval/mascarade_eval/grist/migrate.py index c804030..a36aa19 100644 --- a/mascarade-eval/mascarade_eval/grist/migrate.py +++ b/mascarade-eval/mascarade_eval/grist/migrate.py @@ -96,8 +96,8 @@ def _to_training_row(domain: str, record: dict) -> dict: "assistant_msg": flat["assistant_msg"], "extra_turns": flat["extra_turns"], "source": f"{HF_ORG}/mascarade-{domain}-dataset", - "exclure": False, "notes": "", + "review_status": "pending", } diff --git a/mascarade-eval/tests/test_grist_migrate_domain.py b/mascarade-eval/tests/test_grist_migrate_domain.py index 64a365e..400c8f4 100644 --- a/mascarade-eval/tests/test_grist_migrate_domain.py +++ b/mascarade-eval/tests/test_grist_migrate_domain.py @@ -17,7 +17,7 @@ def test_migrate_domain_ingests_flattened_rows(fake_client): assert {r["user_msg"] for r in added} == {"Q1", "Q2"} assert all(r["domain"] == "kicad" for r in added) assert all(r["item_key"].startswith("kicad-") for r in added) - assert all(r["exclure"] is False for r in added) + assert all(r["review_status"] == "pending" for r in added) def test_migrate_domain_is_idempotent(fake_client): From f4b6158cf923348103ad512fd6c27f0f03e2d2c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:16:19 +0200 Subject: [PATCH 14/24] feat(grist): add column DDL to client --- mascarade-eval/mascarade_eval/grist/client.py | 36 +++++++++---- mascarade-eval/tests/test_grist_client.py | 51 ++++++++++++++++--- 2 files changed, 69 insertions(+), 18 deletions(-) diff --git a/mascarade-eval/mascarade_eval/grist/client.py b/mascarade-eval/mascarade_eval/grist/client.py index 00d55b3..6340c9b 100644 --- a/mascarade-eval/mascarade_eval/grist/client.py +++ b/mascarade-eval/mascarade_eval/grist/client.py @@ -8,18 +8,23 @@ import urllib.error import urllib.request -from . import GRIST_BASE, KEY_FILE +from . import GRIST_BASE, KEY_FILE, REVIEW_STATUSES, REVIEWER_CHOICES -_BOOL_COLS = {"exclure"} _INT_COLS = {"n_items", "n_rows"} +_CHOICE_COLS = { + "review_status": REVIEW_STATUSES, + "reviewer": REVIEWER_CHOICES, +} -def _col_type(name: str) -> str: - if name in _BOOL_COLS: - return "Bool" +def _col_fields(name: str) -> dict: + """Grist column `fields` payload for a column id (label/type/options).""" + if name in _CHOICE_COLS: + opts = json.dumps({"choices": list(_CHOICE_COLS[name])}) + return {"label": name, "type": "Choice", "widgetOptions": opts} if name in _INT_COLS: - return "Int" - return "Text" + return {"label": name, "type": "Int"} + return {"label": name, "type": "Text"} def load_grist_key() -> str: @@ -63,7 +68,7 @@ def _http_transport(method: str, url: str, key: str, body: dict | None) -> dict: class GristClient: - """Records-level access to one Grist document.""" + """Records- and column-level access to one Grist document.""" def __init__(self, doc_id: str, key: str, transport=_http_transport): self.doc_id = doc_id @@ -82,8 +87,7 @@ def list_tables(self) -> set[str]: return {t["id"] for t in resp.get("tables", [])} def create_table(self, table: str, columns: tuple[str, ...]) -> None: - cols = [{"id": c, "fields": {"label": c, "type": _col_type(c)}} - for c in columns] + cols = [{"id": c, "fields": _col_fields(c)} for c in columns] self._api("POST", f"/docs/{self.doc_id}/tables", {"tables": [{"id": table, "columns": cols}]}) @@ -91,6 +95,18 @@ def ensure_table(self, table: str, columns: tuple[str, ...]) -> None: if table not in self.list_tables(): self.create_table(table, columns) + def list_columns(self, table: str) -> set[str]: + resp = self._api( + "GET", f"/docs/{self.doc_id}/tables/{table}/columns") + return {c["id"] for c in resp.get("columns", [])} + + def add_columns(self, table: str, columns: tuple[str, ...]) -> None: + if not columns: + return + cols = [{"id": c, "fields": _col_fields(c)} for c in columns] + self._api("POST", f"/docs/{self.doc_id}/tables/{table}/columns", + {"columns": cols}) + def fetch_records(self, table: str) -> list[dict]: resp = self._api("GET", f"/docs/{self.doc_id}/tables/{table}/records") return [{"_id": r["id"], **r["fields"]} for r in resp.get("records", [])] diff --git a/mascarade-eval/tests/test_grist_client.py b/mascarade-eval/tests/test_grist_client.py index 0bbe9a1..9af0fd0 100644 --- a/mascarade-eval/tests/test_grist_client.py +++ b/mascarade-eval/tests/test_grist_client.py @@ -8,10 +8,14 @@ def transport(method, url, key, body): log.append((method, url, body)) if method == "GET" and url.endswith("/tables"): return {"tables": [{"id": "Existing"}]} + if method == "GET" and url.endswith("/columns"): + return {"columns": [{"id": "item_key"}, {"id": "domain"}]} if method == "GET" and "/records" in url: return {"records": [ - {"id": 1, "fields": {"item_key": "k1", "exclure": False}}, - {"id": 2, "fields": {"item_key": "k2", "exclure": True}}, + {"id": 1, "fields": {"item_key": "k1", + "review_status": "pending"}}, + {"id": 2, "fields": {"item_key": "k2", + "review_status": "validated"}}, ]} return {} return transport @@ -29,8 +33,8 @@ def test_fetch_records_flattens_id_into_fields(): c = GristClient("doc1", "key1", transport=_recording_transport([])) rows = c.fetch_records("Mascarade_Training") assert rows == [ - {"_id": 1, "item_key": "k1", "exclure": False}, - {"_id": 2, "item_key": "k2", "exclure": True}, + {"_id": 1, "item_key": "k1", "review_status": "pending"}, + {"_id": 2, "item_key": "k2", "review_status": "validated"}, ] @@ -52,14 +56,45 @@ def test_add_records_noop_on_empty(): assert log == [] -def test_create_table_types_exclure_as_bool(): +def test_create_table_assigns_column_types(): log = [] c = GristClient("doc1", "key1", transport=_recording_transport(log)) - c.create_table("T", ("item_key", "exclure", "n_items")) + c.create_table("T", ("item_key", "n_items", "review_status")) method, url, body = log[-1] assert method == "POST" - cols = {col["id"]: col["fields"]["type"] for col in body["tables"][0]["columns"]} - assert cols == {"item_key": "Text", "exclure": "Bool", "n_items": "Int"} + cols = {col["id"]: col["fields"]["type"] + for col in body["tables"][0]["columns"]} + assert cols == {"item_key": "Text", "n_items": "Int", + "review_status": "Choice"} + + +def test_list_columns_returns_ids(): + log = [] + c = GristClient("doc1", "key1", transport=_recording_transport(log)) + assert c.list_columns("Heldout_Items") == {"item_key", "domain"} + method, url, _ = log[-1] + assert method == "GET" + assert url.endswith("/docs/doc1/tables/Heldout_Items/columns") + + +def test_add_columns_posts_choice_with_widget_options(): + log = [] + c = GristClient("doc1", "key1", transport=_recording_transport(log)) + c.add_columns("Heldout_Items", ("review_status", "review_note")) + method, url, body = log[-1] + assert method == "POST" + assert url.endswith("/docs/doc1/tables/Heldout_Items/columns") + by_id = {col["id"]: col["fields"] for col in body["columns"]} + assert by_id["review_status"]["type"] == "Choice" + assert "pending" in by_id["review_status"]["widgetOptions"] + assert by_id["review_note"]["type"] == "Text" + + +def test_add_columns_noop_on_empty(): + log = [] + c = GristClient("doc1", "key1", transport=_recording_transport(log)) + c.add_columns("T", ()) + assert log == [] def test_load_grist_key_prefers_env(monkeypatch): From 4a3a071c6e93db3fde6daa0d107ffbe5582173bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:17:09 +0200 Subject: [PATCH 15/24] feat(grist): add review-column schema migration --- mascarade-eval/mascarade_eval/grist/schema.py | 34 ++++++++++++++++++ mascarade-eval/tests/conftest.py | 12 ++++++- mascarade-eval/tests/test_grist_schema.py | 36 +++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 mascarade-eval/mascarade_eval/grist/schema.py create mode 100644 mascarade-eval/tests/test_grist_schema.py diff --git a/mascarade-eval/mascarade_eval/grist/schema.py b/mascarade-eval/mascarade_eval/grist/schema.py new file mode 100644 index 0000000..40a6895 --- /dev/null +++ b/mascarade-eval/mascarade_eval/grist/schema.py @@ -0,0 +1,34 @@ +# mascarade_eval/grist/schema.py +"""Add the human-review columns to existing Grist tables (idempotent). + +A column already present on a table is never recreated, so re-running +the migration is safe. New tables created by the pipeline already carry +the review columns via TRAINING_COLUMNS. +""" +from __future__ import annotations + +from . import REVIEW_COLUMNS + + +def ensure_review_columns(client, table: str) -> list[str]: + """Add any missing review column to one table. Returns columns added.""" + existing = client.list_columns(table) + missing = [c for c in REVIEW_COLUMNS if c not in existing] + if missing: + client.add_columns(table, tuple(missing)) + return missing + + +def migrate_doc(client, tables: tuple[str, ...]) -> dict: + """Ensure review columns on each table that exists in the document. + + A table absent from the document is reported as None (skipped). + """ + present = client.list_tables() + report: dict = {} + for table in tables: + if table in present: + report[table] = ensure_review_columns(client, table) + else: + report[table] = None + return report diff --git a/mascarade-eval/tests/conftest.py b/mascarade-eval/tests/conftest.py index ff7c203..2b1f464 100644 --- a/mascarade-eval/tests/conftest.py +++ b/mascarade-eval/tests/conftest.py @@ -5,24 +5,34 @@ class FakeClient: """In-memory stand-in for GristClient. Records all writes.""" - def __init__(self, tables=None, records=None): + def __init__(self, tables=None, records=None, columns=None): self.doc_id = "fake-doc" self._tables = set(tables or []) self._records = {t: list(rs) for t, rs in (records or {}).items()} + self._columns = {t: list(cs) for t, cs in (columns or {}).items()} self.created = [] self.added = {} + self.added_columns = {} def list_tables(self): return set(self._tables) def create_table(self, table, columns): self._tables.add(table) + self._columns[table] = list(columns) self.created.append((table, tuple(columns))) def ensure_table(self, table, columns): if table not in self._tables: self.create_table(table, columns) + def list_columns(self, table): + return set(self._columns.get(table, [])) + + def add_columns(self, table, columns): + self._columns.setdefault(table, []).extend(columns) + self.added_columns.setdefault(table, []).extend(columns) + def fetch_records(self, table): return [dict(r) for r in self._records.get(table, [])] diff --git a/mascarade-eval/tests/test_grist_schema.py b/mascarade-eval/tests/test_grist_schema.py new file mode 100644 index 0000000..14ba110 --- /dev/null +++ b/mascarade-eval/tests/test_grist_schema.py @@ -0,0 +1,36 @@ +# tests/test_grist_schema.py +from mascarade_eval.grist import REVIEW_COLUMNS +from mascarade_eval.grist.schema import ensure_review_columns, migrate_doc + + +def test_ensure_review_columns_adds_all_when_absent(fake_client): + client = fake_client(tables=["Heldout_Items"], + columns={"Heldout_Items": ["item_key", "prompt"]}) + added = ensure_review_columns(client, "Heldout_Items") + assert added == list(REVIEW_COLUMNS) + assert client.added_columns["Heldout_Items"] == list(REVIEW_COLUMNS) + + +def test_ensure_review_columns_is_idempotent(fake_client): + cols = ["item_key", *REVIEW_COLUMNS] + client = fake_client(tables=["Heldout_Items"], + columns={"Heldout_Items": cols}) + added = ensure_review_columns(client, "Heldout_Items") + assert added == [] + assert "Heldout_Items" not in client.added_columns + + +def test_ensure_review_columns_adds_only_missing(fake_client): + client = fake_client( + tables=["Datasets"], + columns={"Datasets": ["domain", "review_status", "reviewer"]}) + added = ensure_review_columns(client, "Datasets") + assert added == ["reviewed_at", "review_note"] + + +def test_migrate_doc_skips_absent_tables(fake_client): + client = fake_client(tables=["Heldout_Items"], + columns={"Heldout_Items": ["item_key"]}) + report = migrate_doc(client, ("Heldout_Items", "Mascarade_Training")) + assert report["Heldout_Items"] == list(REVIEW_COLUMNS) + assert report["Mascarade_Training"] is None From 303bc13b42a4c4472be3e1a980582d90d7dbcbce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:17:57 +0200 Subject: [PATCH 16/24] feat(grist): add schema CLI subcommand --- mascarade-eval/mascarade_eval/grist/cli.py | 11 +++++++++++ mascarade-eval/tests/test_grist_cli.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py index 7204a24..e51971d 100644 --- a/mascarade-eval/mascarade_eval/grist/cli.py +++ b/mascarade-eval/mascarade_eval/grist/cli.py @@ -43,6 +43,8 @@ def build_parser() -> argparse.ArgumentParser: p_pub.add_argument("--hf-dataset", required=True) p_pub.add_argument("--filename", required=True) + sub.add_parser("schema", help="add review columns to existing tables") + return ap @@ -99,6 +101,15 @@ def main(argv: list[str] | None = None) -> int: print(f"published {args.snapshot} -> {args.hf_dataset}") return 0 + if args.command == "schema": + from . import REVIEW_TARGETS + from .schema import migrate_doc + for doc_id, tables in REVIEW_TARGETS.items(): + doc_client = GristClient.from_env(doc_id) + report = migrate_doc(doc_client, tables) + print(f"schema {doc_id}: {report}") + return 0 + client = GristClient.from_env(resolve_doc(args.doc)) if args.command == "ingest": diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py index b131364..6d5213c 100644 --- a/mascarade-eval/tests/test_grist_cli.py +++ b/mascarade-eval/tests/test_grist_cli.py @@ -47,3 +47,19 @@ def test_ingest_jsonl_rows_exits_on_missing_file(tmp_path): from mascarade_eval.grist.cli import _ingest_jsonl_rows with pytest.raises(SystemExit): _ingest_jsonl_rows("kicad", str(tmp_path / "does-not-exist.jsonl")) + + +def test_parser_accepts_schema_command(): + ns = build_parser().parse_args(["schema"]) + assert ns.command == "schema" + + +def test_schema_command_runs_over_review_targets(monkeypatch, fake_client): + from mascarade_eval.grist import cli + made = fake_client(tables=["Heldout_Items"], + columns={"Heldout_Items": ["item_key"]}) + monkeypatch.setattr(cli.GristClient, "from_env", + classmethod(lambda c, doc: made)) + rc = cli.main(["schema"]) + assert rc == 0 + assert made.added_columns["Heldout_Items"] From f942b61b12cbf94994d0583a1ea282e56cd42e1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:20:34 +0200 Subject: [PATCH 17/24] feat(grist): gate export on review_status export_domain now ships only rows with review_status=validated; --include-pending re-includes pending rows. Completes the exclure -> review_status amendment across the round-trip test and the package README, which the plan's affected-files list had missed. --- mascarade-eval/mascarade_eval/grist/README.md | 7 ++- mascarade-eval/mascarade_eval/grist/cli.py | 5 +- mascarade-eval/mascarade_eval/grist/export.py | 20 +++++-- mascarade-eval/tests/test_grist_cli.py | 6 +++ mascarade-eval/tests/test_grist_export.py | 52 ++++++++++++------- mascarade-eval/tests/test_grist_roundtrip.py | 5 +- 6 files changed, 68 insertions(+), 27 deletions(-) diff --git a/mascarade-eval/mascarade_eval/grist/README.md b/mascarade-eval/mascarade_eval/grist/README.md index 20998e7..b4aa209 100644 --- a/mascarade-eval/mascarade_eval/grist/README.md +++ b/mascarade-eval/mascarade_eval/grist/README.md @@ -29,5 +29,8 @@ writing to Grist or disk. ## Human review -Edit rows directly in the Grist UI. To drop an item from future exports, -tick its `exclure` checkbox — `export` filters those rows out. +Edit rows directly in the Grist UI. Each row carries a `review_status` +(`pending` / `validated` / `rejected` / `needs_fix`); `export` ships only +`validated` rows. Pass `--include-pending` to `export` to also include +rows still awaiting review. See `docs/grist-native-views-recipe.md` and +`docs/grist-widget-setup.md` for the review surfaces. diff --git a/mascarade-eval/mascarade_eval/grist/cli.py b/mascarade-eval/mascarade_eval/grist/cli.py index e51971d..2fa5fe7 100644 --- a/mascarade-eval/mascarade_eval/grist/cli.py +++ b/mascarade-eval/mascarade_eval/grist/cli.py @@ -32,6 +32,8 @@ def build_parser() -> argparse.ArgumentParser: p_exp.add_argument("--doc") p_exp.add_argument("--domain", required=True) p_exp.add_argument("--dry-run", action="store_true") + p_exp.add_argument("--include-pending", action="store_true", + help="also export rows still pending review") p_mig = sub.add_parser("migrate", help="backfill a domain from HF") p_mig.add_argument("--doc") @@ -119,7 +121,8 @@ def main(argv: list[str] | None = None) -> int: print(f"ingest {args.domain}: {report}") elif args.command == "export": report = export_domain(client, args.domain, EXPORTS_DIR, - dry_run=args.dry_run) + dry_run=args.dry_run, + include_pending=args.include_pending) print(f"export {args.domain}: {report}") elif args.command == "migrate": report = migrate_domain(client, args.domain, dry_run=args.dry_run) diff --git a/mascarade-eval/mascarade_eval/grist/export.py b/mascarade-eval/mascarade_eval/grist/export.py index ec28cc5..f1095c9 100644 --- a/mascarade-eval/mascarade_eval/grist/export.py +++ b/mascarade-eval/mascarade_eval/grist/export.py @@ -31,14 +31,28 @@ def _timestamp() -> str: return datetime.datetime.now(datetime.UTC).strftime("%Y%m%dT%H%M%SZ") +def _is_exportable(row: dict, include_pending: bool) -> bool: + """A row ships only when validated (or pending, if explicitly allowed). + + `rejected` and `needs_fix` rows are always excluded. A row with no + review_status is treated as `pending`. + """ + status = row.get("review_status") or "pending" + if status == "validated": + return True + return include_pending and status == "pending" + + def export_domain(client, domain: str, out_dir: Path, - dry_run: bool = False) -> dict: - """Export one domain's non-excluded training rows to a hashed snapshot. + dry_run: bool = False, + include_pending: bool = False) -> dict: + """Export one domain's human-validated training rows to a hashed snapshot. Returns a report dict matching the Exports row written to Grist. """ rows = [r for r in client.fetch_records(TRAINING_TABLE) - if r.get("domain") == domain and not r.get("exclure")] + if r.get("domain") == domain + and _is_exportable(r, include_pending)] payload = canonical_jsonl( [(r.get("item_key", ""), rebuild_messages(r)) for r in rows]) digest = content_hash(payload) diff --git a/mascarade-eval/tests/test_grist_cli.py b/mascarade-eval/tests/test_grist_cli.py index 6d5213c..c8a1824 100644 --- a/mascarade-eval/tests/test_grist_cli.py +++ b/mascarade-eval/tests/test_grist_cli.py @@ -63,3 +63,9 @@ def test_schema_command_runs_over_review_targets(monkeypatch, fake_client): rc = cli.main(["schema"]) assert rc == 0 assert made.added_columns["Heldout_Items"] + + +def test_parser_export_accepts_include_pending(): + ns = build_parser().parse_args( + ["export", "--doc", "D", "--domain", "kicad", "--include-pending"]) + assert ns.include_pending is True diff --git a/mascarade-eval/tests/test_grist_export.py b/mascarade-eval/tests/test_grist_export.py index f9baea9..3629308 100644 --- a/mascarade-eval/tests/test_grist_export.py +++ b/mascarade-eval/tests/test_grist_export.py @@ -7,6 +7,13 @@ ) +def _row(key, status, q="Q", a="A"): + return {"_id": key, "item_key": f"kicad-{key}", "domain": "kicad", + "user_msg": q, "assistant_msg": a, "system": "", + "extra_turns": "", "source": "", "notes": "", + "review_status": status} + + def test_canonical_jsonl_sorts_by_key(): keyed = [("b", {"v": 2}), ("a", {"v": 1})] lines = canonical_jsonl(keyed).splitlines() @@ -22,7 +29,7 @@ def test_canonical_jsonl_is_order_independent(): def test_canonical_jsonl_omits_the_sort_key_from_output(): text = canonical_jsonl([("x", {"v": 1})]) - assert json.loads(text) == {"v": 1} # no "x", no item_key + assert json.loads(text) == {"v": 1} def test_content_hash_stable(): @@ -31,38 +38,46 @@ def test_content_hash_stable(): assert len(content_hash(text)) == 64 -def test_export_domain_filters_excluded_and_writes_file(fake_client, tmp_path): +def test_export_domain_ships_only_validated_rows(fake_client, tmp_path): client = fake_client( tables=[TRAINING_TABLE], records={TRAINING_TABLE: [ - {"_id": 1, "item_key": "kicad-1", "domain": "kicad", - "user_msg": "Q1", "assistant_msg": "A1", "system": "", - "extra_turns": "", "source": "", "exclure": False, "notes": ""}, - {"_id": 2, "item_key": "kicad-2", "domain": "kicad", - "user_msg": "Q2", "assistant_msg": "A2", "system": "", - "extra_turns": "", "source": "", "exclure": True, "notes": ""}, + _row(1, "validated", q="Q1", a="A1"), + _row(2, "rejected", q="Q2", a="A2"), + _row(3, "pending", q="Q3", a="A3"), + _row(4, "needs_fix", q="Q4", a="A4"), ]}, ) report = export_domain(client, "kicad", out_dir=tmp_path) - assert report["n_items"] == 1 # the excluded row is dropped + assert report["n_items"] == 1 # only the validated row out_file = tmp_path / report["output_file"] - assert out_file.exists() written = [json.loads(ln) for ln in out_file.read_text().splitlines()] assert written == [{"messages": [ {"role": "user", "content": "Q1"}, {"role": "assistant", "content": "A1"}, ]}] - assert client.added[EXPORTS_TABLE][0]["domain"] == "kicad" assert client.added[EXPORTS_TABLE][0]["content_hash"] == report["content_hash"] -def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path): +def test_export_domain_include_pending_adds_pending_only(fake_client, tmp_path): client = fake_client( tables=[TRAINING_TABLE], records={TRAINING_TABLE: [ - {"_id": 1, "item_key": "kicad-1", "domain": "kicad", - "user_msg": "Q", "assistant_msg": "A", "system": "", - "extra_turns": "", "exclure": False}]}, + _row(1, "validated"), + _row(2, "pending"), + _row(3, "rejected"), + _row(4, ""), # missing status -> treated as pending + ]}, + ) + report = export_domain(client, "kicad", out_dir=tmp_path, + include_pending=True) + assert report["n_items"] == 3 # validated + pending + empty, not rejected + + +def test_export_domain_dry_run_writes_nothing(fake_client, tmp_path): + client = fake_client( + tables=[TRAINING_TABLE], + records={TRAINING_TABLE: [_row(1, "validated")]}, ) report = export_domain(client, "kicad", out_dir=tmp_path, dry_run=True) assert report["n_items"] == 1 @@ -74,10 +89,7 @@ def test_export_domain_removes_file_when_grist_logging_fails( fake_client, tmp_path): client = fake_client( tables=[TRAINING_TABLE], - records={TRAINING_TABLE: [ - {"_id": 1, "item_key": "kicad-1", "domain": "kicad", - "user_msg": "Q", "assistant_msg": "A", "system": "", - "extra_turns": "", "exclure": False}]}, + records={TRAINING_TABLE: [_row(1, "validated")]}, ) def boom(table, rows): @@ -86,4 +98,4 @@ def boom(table, rows): client.add_records = boom with pytest.raises(RuntimeError, match="grist down"): export_domain(client, "kicad", out_dir=tmp_path) - assert list(tmp_path.iterdir()) == [] # no orphaned snapshot file + assert list(tmp_path.iterdir()) == [] diff --git a/mascarade-eval/tests/test_grist_roundtrip.py b/mascarade-eval/tests/test_grist_roundtrip.py index 0521cf9..eefbaee 100644 --- a/mascarade-eval/tests/test_grist_roundtrip.py +++ b/mascarade-eval/tests/test_grist_roundtrip.py @@ -14,7 +14,10 @@ def test_migrate_then_export_round_trips(fake_client, tmp_path): ] client = fake_client(tables=[]) migrate_domain(client, "kicad", records=source) - report = export_domain(client, "kicad", out_dir=tmp_path) + # Migrated rows start as review_status=pending; include them so the + # round-trip exercises message semantics independent of review state. + report = export_domain(client, "kicad", out_dir=tmp_path, + include_pending=True) assert report["n_items"] == 2 out_file = tmp_path / report["output_file"] From 7546cee3116728b9a6e54c105ac41a34ec75a51c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:21:10 +0200 Subject: [PATCH 18/24] docs(grist): add native views and form recipe --- .../docs/grist-native-views-recipe.md | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 mascarade-eval/docs/grist-native-views-recipe.md diff --git a/mascarade-eval/docs/grist-native-views-recipe.md b/mascarade-eval/docs/grist-native-views-recipe.md new file mode 100644 index 0000000..2f8857d --- /dev/null +++ b/mascarade-eval/docs/grist-native-views-recipe.md @@ -0,0 +1,71 @@ +# Grist native review views — operator recipe + +Manual Grist UI steps for the parts of the human-review layer that are +not API-scriptable. Run the schema migration first +(`python -m mascarade_eval.grist.cli schema`) so the review columns +exist. + +## 1. review_status choice colors + +For each table carrying `review_status` (`Heldout_Items`, `Datasets` +in doc *ailiance-llm-workflow*; `Mascarade_Eval_Items`, +`Bench_31_domains` in doc *mascarade-data*, plus `Mascarade_Training`): + +1. Open the table, click the `review_status` column header → **Column + options**. +2. Under **CHOICES**, confirm the four values are present: `pending`, + `validated`, `rejected`, `needs_fix`. +3. Set the chip color of each: pending = grey `#E8E8E8`, + validated = green `#C6E5B3`, rejected = red `#F2B5B5`, + needs_fix = amber `#F5D9A6`. + +## 2. Bench_31_domains review page (doc mascarade-data) + +1. **Add Page** → name it `Bench review`. +2. Add a **Table** widget bound to `Bench_31_domains`. +3. Add a filter on `review_status` and a second on `domain`; save the + view so the filters persist. +4. Conditional formatting (column header → **Column options** → + **Add conditional style**): + - `judge_score`: red when `$judge_score < 50`, amber when + `$judge_score < 70`, green otherwise. + - `validator_score`: red when `$validator_score < 50`, green when + `$validator_score >= 70`. + - `ppl`: red when `$ppl > 20`, amber when `$ppl > 10`. +5. Add a **Card List** widget on the same page bound to + `Bench_31_domains`, linked to the table widget, showing `model`, + `domain`, `judge_score`, `judge_rationale`, `validator_score`, + `review_status`, `reviewer`, `review_note` — this is the per-row + review surface. + +## 3. Datasets review view (doc ailiance-llm-workflow) + +1. **Add Page** → `Datasets review`. +2. Add a **Table** widget bound to `Datasets`, filtered on + `review_status`. +3. Show `domain`, `name`, `n_rows`, `license`, `hf_dataset_id`, + `review_status`, `reviewer`, `review_note`. + +## 4. Read-only scoreboards + +For `Bench_public`, `Bench_niches_ppl`, `Bench_gateway`, +`Bench_lift_v1`, `Bench_lift_v2`: add one page `Scoreboards` with a +Table widget per table. Apply conditional formatting on the score +columns (green high / red low) as in section 2. No review columns — +these tables are reference only. + +## 5. Bench entry form (doc mascarade-data) + +1. **Add Page** → `Bench entry`. +2. Add a **Form** widget bound to `Bench_31_domains`. +3. Keep only these fields on the form: `model`, `domain`, `ppl`, + `task_score`, `task_metric`, `judge_score`, `source`, `date`. + Remove pipeline-only fields (`validator_image_digest`, `run_id`, + `host`, `runtime_s`, `tokens_per_s`, …). +4. Click **Publish** and copy the share URL — this is the manual + bench-result entry form. Automated runs keep writing via the API. + +## 6. Clean-up + +Delete the empty default `Table1` (columns A/B/C) in each of the three +documents. From 5f1ba2adab891f909a07e5edca3242c89a1d6b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:22:02 +0200 Subject: [PATCH 19/24] feat(grist): add review console widget --- .../widgets/review-console/index.html | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 mascarade-eval/widgets/review-console/index.html diff --git a/mascarade-eval/widgets/review-console/index.html b/mascarade-eval/widgets/review-console/index.html new file mode 100644 index 0000000..0032f4f --- /dev/null +++ b/mascarade-eval/widgets/review-console/index.html @@ -0,0 +1,141 @@ + + + + +Review Console + + + + +
+ + + + + + From 038ba88cdb6a347408e3cee2cdedb345d95c2b50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:22:34 +0200 Subject: [PATCH 20/24] docs(grist): add widget setup recipe --- mascarade-eval/docs/grist-widget-setup.md | 67 +++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 mascarade-eval/docs/grist-widget-setup.md diff --git a/mascarade-eval/docs/grist-widget-setup.md b/mascarade-eval/docs/grist-widget-setup.md new file mode 100644 index 0000000..607325b --- /dev/null +++ b/mascarade-eval/docs/grist-widget-setup.md @@ -0,0 +1,67 @@ +# Review Console widget — hosting, wiring, smoke test + +The widget at `widgets/review-console/index.html` is a static file. It +must be served over HTTPS and registered in Grist as a Custom URL +widget. + +## 1. Host the static file + +Serve the file behind the existing electron-server cloudflared tunnel. + +```bash +# from the repo, on the dev machine +scp widgets/review-console/index.html \ + electron-server:/srv/grist-widgets/review-console/index.html +``` + +On electron-server, expose `/srv/grist-widgets/` via the existing +static file server / Caddy / nginx and add a cloudflared route so the +file is reachable at: + +``` +https://grist-widgets.saillant.cc/review-console/index.html +``` + +Verify: `curl -sI https://grist-widgets.saillant.cc/review-console/index.html` +should return `HTTP/2 200`. + +> Hosting touches shared infra (cloudflared, electron-server) — confirm +> with the operator before applying the route. + +## 2. Add a review page in Grist + +In doc *ailiance-llm-workflow*: + +1. **Add Page** → `Heldout review`. +2. Add a **Custom** widget. Select **Custom URL** and paste + `https://grist-widgets.saillant.cc/review-console/index.html`. +3. Bind the widget to the `Heldout_Items` table. +4. When prompted, grant the widget **Full document access** (it must + write the review columns). +5. Open the widget's **Column mapping**: + - `primary` → `prompt` + - `secondary` → `reference` + - `context` → `domain`, `source` + +Repeat for the future `Mascarade_Training` table (map `primary` → +`user_msg`, `secondary` → `assistant_msg`) and for +`Mascarade_Eval_Items` in doc *mascarade-data* (map `primary` → +`question`, `secondary` → `reference`). + +## 3. Smoke-test checklist + +On the `Heldout review` page: + +- [ ] The progress line shows `revus 0 / 400 — en attente 400`. +- [ ] The first pending item's prompt and reference render in full. +- [ ] Pressing `V` writes `review_status = validated`, `reviewer`, + `reviewed_at` (ISO-8601) and advances to the next item; the + progress counter increments. +- [ ] Pressing `R` and `F` write `rejected` / `needs_fix`. +- [ ] A value typed in the note field lands in `review_note` and the + field clears after the decision. +- [ ] `S` / `→` skips without writing. +- [ ] After every pending row is decided, the widget shows + "Tous les items en attente sont revus ✓". +- [ ] Re-running `python -m mascarade_eval.grist.cli export --domain + ` ships only the rows marked `validated`. From e680d678979cf5f8cfa815272c33296d75f001bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 13:29:03 +0200 Subject: [PATCH 21/24] docs(grist): point widget recipe at live URL --- mascarade-eval/docs/grist-widget-setup.md | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/mascarade-eval/docs/grist-widget-setup.md b/mascarade-eval/docs/grist-widget-setup.md index 607325b..0f786d7 100644 --- a/mascarade-eval/docs/grist-widget-setup.md +++ b/mascarade-eval/docs/grist-widget-setup.md @@ -6,27 +6,26 @@ widget. ## 1. Host the static file -Serve the file behind the existing electron-server cloudflared tunnel. +The widget is served from the `zacus-static` nginx bind-mount on +electron-server (no new traefik route or cloudflared hostname needed — +Grist only needs any HTTPS URL). ```bash # from the repo, on the dev machine +ssh electron-server 'mkdir -p \ + /home/electron/saillant-sites/zacus-static/review-console' scp widgets/review-console/index.html \ - electron-server:/srv/grist-widgets/review-console/index.html + electron-server:/home/electron/saillant-sites/zacus-static/review-console/index.html ``` -On electron-server, expose `/srv/grist-widgets/` via the existing -static file server / Caddy / nginx and add a cloudflared route so the -file is reachable at: +Live URL (verified `HTTP 200`): ``` -https://grist-widgets.saillant.cc/review-console/index.html +https://zacus.saillant.cc/review-console/index.html ``` -Verify: `curl -sI https://grist-widgets.saillant.cc/review-console/index.html` -should return `HTTP/2 200`. - -> Hosting touches shared infra (cloudflared, electron-server) — confirm -> with the operator before applying the route. +To redeploy after editing the widget, re-run the `scp` above — nginx +serves the mounted directory live, no container restart. ## 2. Add a review page in Grist @@ -34,7 +33,7 @@ In doc *ailiance-llm-workflow*: 1. **Add Page** → `Heldout review`. 2. Add a **Custom** widget. Select **Custom URL** and paste - `https://grist-widgets.saillant.cc/review-console/index.html`. + `https://zacus.saillant.cc/review-console/index.html`. 3. Bind the widget to the `Heldout_Items` table. 4. When prompted, grant the widget **Full document access** (it must write the review columns). From 6e4505802693a8f2e3343a9c739103d24a38f2ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 17:03:24 +0200 Subject: [PATCH 22/24] docs(grist): host widget on admin.ailiance.fr --- mascarade-eval/docs/grist-widget-setup.md | 47 +++++++++++++++-------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/mascarade-eval/docs/grist-widget-setup.md b/mascarade-eval/docs/grist-widget-setup.md index 0f786d7..3ba91a0 100644 --- a/mascarade-eval/docs/grist-widget-setup.md +++ b/mascarade-eval/docs/grist-widget-setup.md @@ -4,28 +4,43 @@ The widget at `widgets/review-console/index.html` is a static file. It must be served over HTTPS and registered in Grist as a Custom URL widget. -## 1. Host the static file +## 1. Hosting -The widget is served from the `zacus-static` nginx bind-mount on -electron-server (no new traefik route or cloudflared hostname needed — -Grist only needs any HTTPS URL). +The widget is served by a dedicated `review-widget` nginx container in +`/home/electron/saillant-sites/` on electron-server, exposed through +traefik on the existing `admin.ailiance.fr` hostname under `/review` +(a `Host && PathPrefix` router — no new cloudflared hostname needed). -```bash -# from the repo, on the dev machine -ssh electron-server 'mkdir -p \ - /home/electron/saillant-sites/zacus-static/review-console' -scp widgets/review-console/index.html \ - electron-server:/home/electron/saillant-sites/zacus-static/review-console/index.html +Compose service (`saillant-sites/docker-compose.yml`): + +```yaml + review-widget: + image: nginx:alpine + container_name: review-widget + restart: unless-stopped + networks: [traefik] + labels: + - traefik.enable=true + - traefik.docker.network=traefik + - traefik.http.routers.review-admin.rule=Host(`admin.ailiance.fr`) && PathPrefix(`/review`) + - traefik.http.routers.review-admin.entrypoints=websecure + - traefik.http.routers.review-admin.tls.certresolver=letsencrypt + - traefik.http.routers.review-admin.service=review-widget + - traefik.http.services.review-widget.loadbalancer.server.port=80 + volumes: + - ./train-static:/usr/share/nginx/html:ro ``` -Live URL (verified `HTTP 200`): +The widget file lives at `saillant-sites/train-static/review/index.html`. +Redeploy after editing the widget (nginx serves the mount live, no +restart): -``` -https://zacus.saillant.cc/review-console/index.html +```bash +scp widgets/review-console/index.html \ + electron-server:/home/electron/saillant-sites/train-static/review/index.html ``` -To redeploy after editing the widget, re-run the `scp` above — nginx -serves the mounted directory live, no container restart. +Live URL (verified `HTTP 200`): `https://admin.ailiance.fr/review/` ## 2. Add a review page in Grist @@ -33,7 +48,7 @@ In doc *ailiance-llm-workflow*: 1. **Add Page** → `Heldout review`. 2. Add a **Custom** widget. Select **Custom URL** and paste - `https://zacus.saillant.cc/review-console/index.html`. + `https://admin.ailiance.fr/review/`. 3. Bind the widget to the `Heldout_Items` table. 4. When prompted, grant the widget **Full document access** (it must write the review columns). From 794e8498680e68c91b0fe1821ebc3a903477b575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 17:16:12 +0200 Subject: [PATCH 23/24] fix(grist): widget reads all columns for queue --- mascarade-eval/widgets/review-console/index.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mascarade-eval/widgets/review-console/index.html b/mascarade-eval/widgets/review-console/index.html index 0032f4f..434e22a 100644 --- a/mascarade-eval/widgets/review-console/index.html +++ b/mascarade-eval/widgets/review-console/index.html @@ -135,7 +135,9 @@ optional: true, allowMultiple: true }, ], }); -grist.onRecords(rebuild); +// includeColumns:"all" — the queue logic reads review_status, which is +// not a mapped column, so the default "shown" set would omit it. +grist.onRecords(rebuild, { includeColumns: "all" }); From 8a0a8cbc60115b09481bb47b657341d263d0ba16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=27=C3=A9lectron=20rare?= <108685187+electron-rare@users.noreply.github.com> Date: Tue, 19 May 2026 17:21:56 +0200 Subject: [PATCH 24/24] fix(grist): widget reads full table via docApi --- .../widgets/review-console/index.html | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/mascarade-eval/widgets/review-console/index.html b/mascarade-eval/widgets/review-console/index.html index 434e22a..f698881 100644 --- a/mascarade-eval/widgets/review-console/index.html +++ b/mascarade-eval/widgets/review-console/index.html @@ -47,23 +47,40 @@ "use strict"; const REVIEWER = "clems"; // adjust to the reviewer's Grist choice value +let mapping = null; // column mapping from onRecords (display columns) let rows = []; // [{id, status, primary, secondary, context}] let queue = []; // ids still pending let cursor = 0; const $ = (id) => document.getElementById(id); -function rebuild(records) { - rows = records.map((rec) => { - const m = grist.mapColumnNames(rec) || {}; - let ctx = m.context; - if (Array.isArray(ctx)) ctx = ctx.filter(Boolean).join(" · "); +// Grist restricts onRecords/fetchSelectedTable to mapped columns, so the +// review_* columns are invisible there. fetchTable via docApi (full +// access) returns every column — that is the only channel that sees +// review_status. onRecords is kept only to supply the column mapping +// and to fire on every data change. +async function refresh() { + if (!mapping) return; + const tableId = await grist.getSelectedTableId(); + const data = await grist.docApi.fetchTable(tableId); + const ids = data.id || []; + const ctxCols = Array.isArray(mapping.context) + ? mapping.context + : (mapping.context ? [mapping.context] : []); + const cell = (name, i) => (name && data[name]) ? data[name][i] : null; + rows = ids.map((id, i) => { + const ctx = ctxCols + .map((c) => (data[c] ? data[c][i] : null)) + .filter((v) => v != null && v !== "") + .join(" · "); + const p = cell(mapping.primary, i); + const s = cell(mapping.secondary, i); return { - id: rec.id, - status: rec.review_status || "pending", - primary: m.primary == null ? "" : String(m.primary), - secondary: m.secondary == null ? "" : String(m.secondary), - context: ctx == null ? "" : String(ctx), + id, + status: (data.review_status && data.review_status[i]) || "pending", + primary: p == null ? "" : String(p), + secondary: s == null ? "" : String(s), + context: ctx, }; }); queue = rows.filter((r) => r.status === "pending").map((r) => r.id); @@ -135,9 +152,7 @@ optional: true, allowMultiple: true }, ], }); -// includeColumns:"all" — the queue logic reads review_status, which is -// not a mapped column, so the default "shown" set would omit it. -grist.onRecords(rebuild, { includeColumns: "all" }); +grist.onRecords((records, m) => { mapping = m; void refresh(); });