diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a81f2e2..69d936a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,6 +44,8 @@ jobs: - run: cargo test --locked --workspace --all-features - name: fixture manifest validation run: python3 fixtures/validate_fixtures.py + - name: layout evaluator alpha + run: make layout-evaluator-alpha - name: Python surface tests run: PYTHONPATH=python python3 -m unittest discover -s python/tests - name: readiness gate tests diff --git a/Makefile b/Makefile index 31ae55c..51e558c 100644 --- a/Makefile +++ b/Makefile @@ -11,8 +11,9 @@ VERIFY_ALPHA_OUT ?= $(ROOT)/target/verify-alpha VERIFY_RENDERED_CROPS_OUT ?= $(ROOT)/target/verify-rendered-crops COMPARE_RENDERED_CROPS_LEFT ?= $(VERIFY_RENDERED_CROPS_OUT)/run1 COMPARE_RENDERED_CROPS_RIGHT ?= $(VERIFY_RENDERED_CROPS_OUT)/run2 +LAYOUT_EVALUATOR_OUT ?= $(ROOT)/target/layout-evaluator-alpha -.PHONY: verify-alpha verify-alpha-tree verify-rendered-crops compare-rendered-crops python-surface-test release-hygiene release-advisory third-party-license-manifest release-notice-draft +.PHONY: verify-alpha verify-alpha-tree verify-rendered-crops compare-rendered-crops layout-evaluator-alpha python-surface-test release-hygiene release-advisory third-party-license-manifest release-notice-draft $(ETHOS_BIN): cargo build --locked -p ethos-cli @@ -40,6 +41,10 @@ verify-rendered-crops: $(ETHOS_BIN) compare-rendered-crops: $(PYTHON) examples/verify/compare_rendered_crop_runs.py --left-run $(COMPARE_RENDERED_CROPS_LEFT) --right-run $(COMPARE_RENDERED_CROPS_RIGHT) +layout-evaluator-alpha: + $(PYTHON) fixtures/evaluate_layout_alpha.py --out $(LAYOUT_EVALUATOR_OUT)/report.json + $(PYTHON) fixtures/test_evaluate_layout_alpha.py + python-surface-test: PYTHONPATH=$(ROOT)/python $(PYTHON) -m unittest discover -s python/tests diff --git a/docs/execution-status.md b/docs/execution-status.md index 14189bd..d94d151 100644 --- a/docs/execution-status.md +++ b/docs/execution-status.md @@ -15,6 +15,7 @@ The committed implementation now includes: - Runtime checks that reject missing or mismatched PDFium versions, release artifacts, and extracted libraries with stable errors before dynamic loading. - `ethos doc parse` / `ethos fingerprint` PDF execution through a worker process with `max_parse_ms` timeout enforcement, stable error-envelope relay, diagnostics-gated worker stderr, and page-range validation/filtering. - Quantized page/span extraction at the backend boundary, plus a basic deterministic layout pass that assembles paragraph `text_block` elements, fixture-backed alpha heading and flat list-item elements, and simple column reading order for the current born-digital fixtures. Fixture validation binds selected `fixture.json` expectations to committed extraction/layout goldens and binds current alpha text/Markdown exports to committed layout output so current read-order, element-type, heading-export, list-item, and export cases fail closed on drift. +- An internal layout evaluator scaffold exists at `fixtures/evaluate_layout_alpha.py` and `make layout-evaluator-alpha`. It reads committed `fixture.json` and `layout.json` files, summarizes alpha element-type and subset coverage, and fails closed on missing layout expectations or drift in fixture-backed reading order / heading / list-item cases. - Schema/example/profile validation is green through `schemas/validate_examples.py` using `jsonschema` draft 2020-12 validation, including the crop descriptor artifact contract plus referential-integrity and bbox sanity checks outside JSON Schema. - `ethos verify` now produces non-empty quote, value, presence, and table-cell verification checks over native Ethos document JSON and synthetic OpenDataLoader-style JSON through `--grounding opendataloader-json`; it also verifies quote/value/presence citations over pinned real OpenDataLoader 2.4.7 JSON, including grounded and ungrounded cases. Citation/config inputs are rejected when they drift outside the closed schemas. The public demo harness covers grounded, ungrounded, split-quote, not-found, stale-fingerprint, unsupported non-v1 claim, capability-limited, malformed-citation, malformed OpenDataLoader-style input, and summary-format reject paths. - Verification semantics are now trust-honest at alpha scope: quote containment is explicitly labeled, value/table-cell checks require normalized equality, fingerprint-pinned citations fail closed when source fingerprints are unavailable, and structured capability limits explain why a run is downgraded. @@ -51,6 +52,7 @@ Milestone A has an accepted internal Gate Zero decision for roadmap control, so | PDFium loader/runtime checks | Landed: missing/mismatched version, artifact, and runtime library hashes fail deterministically | Release packaging and operator setup path still need hardening | | Real PDF backend | Landed for simple born-digital PDFs: page count, quantized spans, worker execution, timeout, page filtering, and fingerprint path exist | Wider corpus coverage, failure fixtures, memory-limit behavior, quirk log, and Gate Zero run are still missing | | Layout groundwork | Landed: basic paragraph text blocks, fixture-backed alpha heading and flat list-item elements, simple column reading order over quantized spans, fixture metadata checks against committed extraction/layout goldens for current read-order and element-type expectations, and alpha text/Markdown export goldens derived from committed layout output | Tables, nested/richer list and heading semantics, rotation/quirk handling, and confidence policy remain future work | +| Layout evaluator scaffold | Landed: deterministic internal evaluator over committed layout fixture expectations, with heading/list/reading-order coverage checks, expectation drift diagnostics, report JSON, Make target, and unit coverage | Broader evaluator dimensions and CI matrix integration remain future work | | Python surface scaffold | Landed: internal stdlib wrapper over a caller-provided local `ethos doc parse` command, with explicit JSON/Markdown/text methods, page selection passthrough, diagnostics passthrough, timeout handling, command failure reporting, and mocked-command unit coverage | Native binding work, broader API design, and public setup path remain future work | | Font policy groundwork | Partially landed: substitution table and profile policy are present; fixture output uses deterministic substitution IDs | Bundled fallback asset hashing and broader font/CID validation remain open | | Schema/example validation | Landed: schemas, examples, deterministic profile, referential integrity, and bbox sanity pass the `jsonschema` validation gate | Contract changes still require explicit versioning and compatibility review | diff --git a/fixtures/evaluate_layout_alpha.py b/fixtures/evaluate_layout_alpha.py new file mode 100644 index 0000000..dc824d0 --- /dev/null +++ b/fixtures/evaluate_layout_alpha.py @@ -0,0 +1,578 @@ +#!/usr/bin/env python3 +# +# Copyright 2026 The Ethos maintainers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Internal Milestone B layout evaluator over committed fixture goldens. + +This script does not parse PDFs and does not compare Ethos to other tools. It +summarizes the committed alpha layout fixture expectations and fails closed when +layout.json drifts away from fixture.json expectations, when required expectation +fields are missing, or when heading/list/reading-order fixture coverage is absent. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from collections import Counter +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Tuple + +ROOT = Path(__file__).resolve().parent +REQUIRED_EXPECTATION_FIELDS = ("expected_text", "expected_element_types") +COVERAGE_GATES = { + "heading_fixture": { + "subset": "headings", + "element_type": "heading", + }, + "list_item_fixture": { + "subset": "lists", + "element_type": "list_item", + }, + "multi_column_reading_order_fixture": { + "subset": "multi_column", + "requires_multi_element_expected_text": True, + }, +} + + +def main(argv: Optional[List[str]] = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--fixtures-root", + type=Path, + default=ROOT, + help="Path to the fixtures directory. Defaults to this script's directory.", + ) + parser.add_argument( + "--out", + type=Path, + help="Optional path for the deterministic JSON evaluator report.", + ) + args = parser.parse_args(argv) + + report = evaluate_layout_alpha(args.fixtures_root) + if args.out is not None: + args.out.parent.mkdir(parents=True, exist_ok=True) + args.out.write_bytes(canonical_json_bytes(report)) + + for diagnostic in report["diagnostics"]: + fixture = diagnostic.get("fixture_id", "corpus") + print(f"FAIL {fixture}: {diagnostic['message']}") + + if report["status"] == "pass": + print( + "ok layout evaluator checked " + f"{report['fixtures_evaluated']} successful fixture(s)" + ) + print( + "ok layout evaluator element types " + f"{json.dumps(report['element_type_counts'], sort_keys=True)}" + ) + print("ok layout evaluator heading/list/reading-order coverage present") + if args.out is not None: + print(f"ok layout evaluator report wrote {args.out}") + return 0 + + print(f"\n{len(report['diagnostics'])} layout evaluator failure(s)") + return 1 + + +def evaluate_layout_alpha(fixtures_root: Path) -> Dict[str, Any]: + diagnostics: List[Dict[str, Any]] = [] + manifest = load_json( + fixtures_root / "manifest.json", + diagnostics, + None, + "manifest.json", + ) + entries = manifest.get("fixtures", []) if isinstance(manifest, dict) else [] + if not isinstance(entries, list): + diagnostics.append( + diagnostic( + "invalid_manifest", + None, + "manifest fixtures must be an array", + "manifest.json", + ) + ) + entries = [] + + checks: List[Dict[str, Any]] = [] + element_type_counts: Counter[str] = Counter() + subset_counts: Counter[str] = Counter() + coverage: Dict[str, List[str]] = {gate: [] for gate in COVERAGE_GATES} + + for index, entry in enumerate(entries): + if not isinstance(entry, dict): + diagnostics.append( + diagnostic( + "invalid_manifest_entry", + None, + f"manifest fixtures[{index}] must be an object", + "manifest.json", + ) + ) + continue + + fixture_id = entry.get("id") + fixture_file = entry.get("file") + subsets = entry.get("subsets") + if not isinstance(fixture_id, str) or not fixture_id: + fixture_id = f"manifest fixtures[{index}]" + if not isinstance(fixture_file, str): + diagnostics.append( + diagnostic( + "invalid_manifest_entry", + fixture_id, + "manifest entry file must be a string", + "manifest.json", + ) + ) + continue + if not isinstance(subsets, list) or not all(isinstance(item, str) for item in subsets): + diagnostics.append( + diagnostic( + "invalid_manifest_entry", + fixture_id, + "manifest entry subsets must be a string array", + "manifest.json", + ) + ) + continue + if "failure" in subsets: + continue + + fixture_dir = (fixtures_root / fixture_file).parent + check = evaluate_fixture(fixtures_root, fixture_id, fixture_dir, subsets, diagnostics) + if check is None: + continue + checks.append(check) + element_type_counts.update(check["element_types"]) + subset_counts.update(subsets) + update_coverage(coverage, check, subsets) + + for gate, fixtures in coverage.items(): + if not fixtures: + diagnostics.append( + diagnostic( + "missing_coverage", + None, + f"{gate} has no committed successful fixture coverage", + "manifest.json", + ) + ) + + diagnostics.sort(key=diagnostic_sort_key) + checks.sort(key=lambda check: check["fixture_id"]) + report = { + "version": 1, + "status": "pass" if not diagnostics else "fail", + "fixtures_evaluated": len(checks), + "element_type_counts": sorted_counter_dict(element_type_counts), + "subset_counts": sorted_counter_dict(subset_counts), + "coverage": {key: sorted(value) for key, value in sorted(coverage.items())}, + "checks": checks, + "diagnostics": diagnostics, + } + return report + + +def evaluate_fixture( + fixtures_root: Path, + fixture_id: str, + fixture_dir: Path, + subsets: List[str], + diagnostics: List[Dict[str, Any]], +) -> Optional[Dict[str, Any]]: + fixture_rel = relpath(fixtures_root, fixture_dir) + metadata = load_json( + fixture_dir / "fixture.json", + diagnostics, + fixture_id, + f"{fixture_rel}/fixture.json", + ) + layout = load_json( + fixture_dir / "layout.json", + diagnostics, + fixture_id, + f"{fixture_rel}/layout.json", + ) + if not isinstance(metadata, dict) or not isinstance(layout, dict): + return None + + elements = layout.get("elements") + if not isinstance(elements, list): + diagnostics.append( + diagnostic( + "invalid_layout", + fixture_id, + "layout.json elements must be an array", + f"{fixture_rel}/layout.json", + ) + ) + return None + + element_text = [] + element_types = [] + for element_index, element in enumerate(elements): + if not isinstance(element, dict): + diagnostics.append( + diagnostic( + "invalid_layout", + fixture_id, + f"layout element {element_index} must be an object", + f"{fixture_rel}/layout.json", + ) + ) + continue + element_text.append(element.get("text")) + element_types.append(element.get("type")) + + missing_fields = [ + field for field in REQUIRED_EXPECTATION_FIELDS if field not in metadata + ] + for field in missing_fields: + diagnostics.append( + diagnostic( + "missing_expectation", + fixture_id, + f"fixture.json must include {field}", + f"{fixture_rel}/fixture.json", + ) + ) + + expected_text = normalize_expected_text(metadata.get("expected_text")) + expected_element_types = metadata.get("expected_element_types") + expected_elements = metadata.get("expected_elements") + + expected_text_status = compare_expected_text( + fixture_id, + fixture_rel, + expected_text, + element_text, + diagnostics, + ) + expected_element_types_status = compare_expected_element_types( + fixture_id, + fixture_rel, + expected_element_types, + element_types, + diagnostics, + ) + expected_elements_status = compare_expected_elements( + fixture_id, + fixture_rel, + expected_elements, + len(elements), + diagnostics, + ) + subset_status = compare_subset_expectations( + fixture_id, + fixture_rel, + subsets, + element_types, + expected_text, + diagnostics, + ) + + return { + "fixture_id": fixture_id, + "path": fixture_rel, + "subsets": sorted(subsets), + "elements": len(elements), + "element_types": as_string_list(element_types), + "expected_text": expected_text_status, + "expected_element_types": expected_element_types_status, + "expected_elements": expected_elements_status, + "subset_expectations": subset_status, + } + + +def compare_expected_text( + fixture_id: str, + fixture_rel: str, + expected_text: Optional[List[str]], + element_text: List[Any], + diagnostics: List[Dict[str, Any]], +) -> str: + if expected_text is None: + return "missing" + if not all(isinstance(item, str) for item in element_text): + diagnostics.append( + diagnostic( + "invalid_layout", + fixture_id, + "layout element text values must all be strings", + f"{fixture_rel}/layout.json", + ) + ) + return "invalid" + if element_text != expected_text: + diagnostics.append( + diagnostic( + "expected_text_mismatch", + fixture_id, + "expected_text does not match layout element text order", + f"{fixture_rel}/fixture.json", + expected=expected_text, + actual=element_text, + ) + ) + return "mismatch" + return "pass" + + +def compare_expected_element_types( + fixture_id: str, + fixture_rel: str, + expected_element_types: Any, + element_types: List[Any], + diagnostics: List[Dict[str, Any]], +) -> str: + if expected_element_types is None: + return "missing" + if not isinstance(expected_element_types, list) or not all( + isinstance(item, str) for item in expected_element_types + ): + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + "expected_element_types must be a string array", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + if not all(isinstance(item, str) for item in element_types): + diagnostics.append( + diagnostic( + "invalid_layout", + fixture_id, + "layout element type values must all be strings", + f"{fixture_rel}/layout.json", + ) + ) + return "invalid" + if element_types != expected_element_types: + diagnostics.append( + diagnostic( + "expected_element_types_mismatch", + fixture_id, + "expected_element_types does not match layout element type order", + f"{fixture_rel}/fixture.json", + expected=expected_element_types, + actual=element_types, + ) + ) + return "mismatch" + return "pass" + + +def compare_expected_elements( + fixture_id: str, + fixture_rel: str, + expected_elements: Any, + actual_count: int, + diagnostics: List[Dict[str, Any]], +) -> str: + if expected_elements is None: + return "not_declared" + if not isinstance(expected_elements, int) or expected_elements < 0: + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + "expected_elements must be an integer >= 0", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + if expected_elements != actual_count: + diagnostics.append( + diagnostic( + "expected_elements_mismatch", + fixture_id, + "expected_elements does not match layout element count", + f"{fixture_rel}/fixture.json", + expected=expected_elements, + actual=actual_count, + ) + ) + return "mismatch" + return "pass" + + +def compare_subset_expectations( + fixture_id: str, + fixture_rel: str, + subsets: List[str], + element_types: List[Any], + expected_text: Optional[List[str]], + diagnostics: List[Dict[str, Any]], +) -> str: + statuses = [] + if "headings" in subsets: + statuses.append("headings") + if "heading" not in element_types: + diagnostics.append( + diagnostic( + "subset_expectation_mismatch", + fixture_id, + "headings subset must include at least one heading element", + f"{fixture_rel}/layout.json", + ) + ) + if "lists" in subsets: + statuses.append("lists") + if "list_item" not in element_types: + diagnostics.append( + diagnostic( + "subset_expectation_mismatch", + fixture_id, + "lists subset must include at least one list_item element", + f"{fixture_rel}/layout.json", + ) + ) + if "multi_column" in subsets: + statuses.append("multi_column") + if expected_text is None or len(expected_text) < 2: + diagnostics.append( + diagnostic( + "subset_expectation_mismatch", + fixture_id, + "multi_column subset must declare multi-element expected_text", + f"{fixture_rel}/fixture.json", + ) + ) + return "pass" if statuses else "not_applicable" + + +def update_coverage( + coverage: Dict[str, List[str]], + check: Dict[str, Any], + subsets: Iterable[str], +) -> None: + subset_set = set(subsets) + element_types = set(check["element_types"]) + for gate, requirement in COVERAGE_GATES.items(): + required_subset = requirement["subset"] + if required_subset not in subset_set: + continue + required_type = requirement.get("element_type") + if required_type is not None and required_type not in element_types: + continue + if ( + requirement.get("requires_multi_element_expected_text") + and check["expected_text"] != "pass" + ): + continue + if requirement.get("requires_multi_element_expected_text") and check["elements"] < 2: + continue + coverage[gate].append(check["fixture_id"]) + + +def load_json( + path: Path, + diagnostics: List[Dict[str, Any]], + fixture_id: Optional[str], + display_path: str, +) -> Any: + try: + return json.loads(path.read_text(encoding="utf-8")) + except FileNotFoundError: + diagnostics.append( + diagnostic( + "missing_file", + fixture_id, + f"{path.name} is missing", + display_path, + ) + ) + except json.JSONDecodeError as exc: + diagnostics.append( + diagnostic( + "invalid_json", + fixture_id, + f"{path.name} is not valid JSON: {exc.msg}", + display_path, + ) + ) + return None + + +def normalize_expected_text(value: Any) -> Optional[List[str]]: + if isinstance(value, str): + return [value] + if isinstance(value, list) and all(isinstance(item, str) for item in value): + return value + return None + + +def as_string_list(values: Iterable[Any]) -> List[str]: + return [value if isinstance(value, str) else "" for value in values] + + +def sorted_counter_dict(counter: Counter[str]) -> Dict[str, int]: + return {key: counter[key] for key in sorted(counter)} + + +def diagnostic( + code: str, + fixture_id: Optional[str], + message: str, + path: str, + *, + expected: Any = None, + actual: Any = None, +) -> Dict[str, Any]: + item: Dict[str, Any] = { + "code": code, + "message": message, + "path": path, + } + if fixture_id is not None: + item["fixture_id"] = fixture_id + if expected is not None: + item["expected"] = expected + if actual is not None: + item["actual"] = actual + return item + + +def diagnostic_sort_key(item: Dict[str, Any]) -> Tuple[str, str, str]: + return ( + str(item.get("fixture_id", "")), + str(item.get("code", "")), + str(item.get("path", "")), + ) + + +def canonical_json_bytes(value: Any) -> bytes: + text = json.dumps(value, ensure_ascii=False, separators=(",", ":"), sort_keys=True) + return f"{text}\n".encode("utf-8") + + +def relpath(root: Path, path: Path) -> str: + try: + return path.relative_to(root).as_posix() + except ValueError: + return path.as_posix() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/fixtures/test_evaluate_layout_alpha.py b/fixtures/test_evaluate_layout_alpha.py new file mode 100644 index 0000000..bbb657d --- /dev/null +++ b/fixtures/test_evaluate_layout_alpha.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python3 +# +# Copyright 2026 The Ethos maintainers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path + +from evaluate_layout_alpha import canonical_json_bytes, evaluate_layout_alpha + + +class LayoutEvaluatorAlphaTests(unittest.TestCase): + def setUp(self) -> None: + self.tempdir = tempfile.TemporaryDirectory() + self.root = Path(self.tempdir.name) + + def tearDown(self) -> None: + self.tempdir.cleanup() + + def test_passing_fixture_set_reports_counts_and_coverage(self) -> None: + self.write_required_alpha_fixture_set() + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "pass") + self.assertEqual(report["fixtures_evaluated"], 3) + self.assertEqual( + report["element_type_counts"], + {"heading": 1, "list_item": 2, "text_block": 3}, + ) + self.assertEqual( + report["coverage"], + { + "heading_fixture": ["heading-case"], + "list_item_fixture": ["list-case"], + "multi_column_reading_order_fixture": ["column-case"], + }, + ) + self.assertEqual(report["diagnostics"], []) + + def test_missing_expected_text_fails_closed(self) -> None: + self.write_required_alpha_fixture_set() + metadata_path = self.root / "synthetic/heading-case/fixture.json" + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + metadata.pop("expected_text") + self.write_json(metadata_path, metadata) + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + self.assertDiagnostic(report, "missing_expectation", "heading-case") + + def test_expected_text_drift_reports_expected_and_actual(self) -> None: + self.write_required_alpha_fixture_set() + metadata_path = self.root / "synthetic/column-case/fixture.json" + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + metadata["expected_text"] = ["Right column", "Left column"] + self.write_json(metadata_path, metadata) + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + diagnostic = self.onlyDiagnostic(report, "expected_text_mismatch", "column-case") + self.assertEqual(diagnostic["expected"], ["Right column", "Left column"]) + self.assertEqual(diagnostic["actual"], ["Left column", "Right column"]) + + def test_heading_subset_requires_heading_element(self) -> None: + self.write_required_alpha_fixture_set() + layout_path = self.root / "synthetic/heading-case/layout.json" + layout = json.loads(layout_path.read_text(encoding="utf-8")) + layout["elements"][0]["type"] = "text_block" + self.write_json(layout_path, layout) + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + self.assertDiagnostic(report, "subset_expectation_mismatch", "heading-case") + self.assertDiagnostic(report, "missing_coverage", None) + + def test_list_subset_requires_list_item_element(self) -> None: + self.write_required_alpha_fixture_set() + layout_path = self.root / "synthetic/list-case/layout.json" + layout = json.loads(layout_path.read_text(encoding="utf-8")) + for element in layout["elements"]: + element["type"] = "text_block" + self.write_json(layout_path, layout) + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + self.assertDiagnostic(report, "subset_expectation_mismatch", "list-case") + self.assertDiagnostic(report, "missing_coverage", None) + + def test_multi_column_fixture_requires_multi_element_expected_text(self) -> None: + self.write_required_alpha_fixture_set() + metadata_path = self.root / "synthetic/column-case/fixture.json" + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + metadata["expected_text"] = "Left column Right column" + self.write_json(metadata_path, metadata) + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + self.assertDiagnostic(report, "expected_text_mismatch", "column-case") + self.assertDiagnostic(report, "subset_expectation_mismatch", "column-case") + self.assertDiagnostic(report, "missing_coverage", None) + + def test_missing_layout_file_reports_missing_file(self) -> None: + self.write_required_alpha_fixture_set() + (self.root / "synthetic/list-case/layout.json").unlink() + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + self.assertDiagnostic(report, "missing_file", "list-case") + + def test_report_is_canonical_json_serializable(self) -> None: + self.write_required_alpha_fixture_set() + report = evaluate_layout_alpha(self.root) + + encoded = canonical_json_bytes(report) + expected = json.dumps( + report, + ensure_ascii=False, + separators=(",", ":"), + sort_keys=True, + ).encode("utf-8") + b"\n" + + self.assertEqual(encoded, expected) + + def assertDiagnostic( + self, + report, + code: str, + fixture_id: str | None, + ) -> None: + self.onlyDiagnostic(report, code, fixture_id) + + def onlyDiagnostic( + self, + report, + code: str, + fixture_id: str | None, + ): + matches = [ + diagnostic + for diagnostic in report["diagnostics"] + if diagnostic["code"] == code and diagnostic.get("fixture_id") == fixture_id + ] + self.assertLessEqual(len(matches), 1, f"multiple diagnostics matched {code}") + self.assertTrue(matches, f"missing diagnostic {code} for {fixture_id}") + return matches[0] + + def write_required_alpha_fixture_set(self) -> None: + entries = [ + self.write_fixture( + fixture_id="heading-case", + fixture_path="synthetic/heading-case/document.pdf", + subsets=["born_digital", "headings"], + expected_text=["Alpha Heading", "Body text"], + expected_element_types=["heading", "text_block"], + elements=[ + {"id": "e000001", "type": "heading", "text": "Alpha Heading"}, + {"id": "e000002", "type": "text_block", "text": "Body text"}, + ], + ), + self.write_fixture( + fixture_id="list-case", + fixture_path="synthetic/list-case/document.pdf", + subsets=["born_digital", "lists"], + expected_text=["- First", "2. Second"], + expected_element_types=["list_item", "list_item"], + elements=[ + {"id": "e000001", "type": "list_item", "text": "- First"}, + {"id": "e000002", "type": "list_item", "text": "2. Second"}, + ], + ), + self.write_fixture( + fixture_id="column-case", + fixture_path="synthetic/column-case/document.pdf", + subsets=["born_digital", "multi_column"], + expected_text=["Left column", "Right column"], + expected_element_types=["text_block", "text_block"], + elements=[ + {"id": "e000001", "type": "text_block", "text": "Left column"}, + {"id": "e000002", "type": "text_block", "text": "Right column"}, + ], + ), + ] + self.write_json( + self.root / "manifest.json", + { + "manifest_version": "1.0.0", + "root": "fixtures", + "subsets_declared": [ + "born_digital", + "headings", + "lists", + "multi_column", + ], + "fixtures": entries, + }, + ) + + def write_fixture( + self, + *, + fixture_id: str, + fixture_path: str, + subsets: list[str], + expected_text, + expected_element_types: list[str], + elements: list[dict], + ): + fixture_dir = (self.root / fixture_path).parent + fixture_dir.mkdir(parents=True, exist_ok=True) + self.write_json( + fixture_dir / "fixture.json", + { + "id": fixture_id, + "subsets": subsets, + "expected_text": expected_text, + "expected_element_types": expected_element_types, + "expected_elements": len(elements), + }, + ) + self.write_json(fixture_dir / "layout.json", {"elements": elements, "warnings": []}) + return { + "id": fixture_id, + "file": fixture_path, + "sha256": "0" * 64, + "pages": 1, + "subsets": subsets, + "provenance": "Synthetic evaluator unit fixture.", + "license": "CC0-1.0", + } + + def write_json(self, path: Path, value) -> None: + path.write_bytes(canonical_json_bytes(value)) + + +if __name__ == "__main__": + unittest.main()