Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/scripts/test_ci_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_ci_workflow_guard_is_run_by_ci(self) -> None:
self.assertIn("python3 .github/scripts/test_execution_status.py", text)
self.assertIn("python3 .github/scripts/test_roadmap_status.py", text)
self.assertIn("python3 .github/scripts/test_milestone_b_closeout_record.py", text)
self.assertIn("python3 .github/scripts/test_milestone_b_exit_checklist.py", text)


if __name__ == "__main__":
Expand Down
82 changes: 82 additions & 0 deletions .github/scripts/test_milestone_b_exit_checklist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python3
#
# Copyright 2026 The Ethos maintainers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import annotations

import re
import unittest
from pathlib import Path


ROOT = Path(__file__).resolve().parents[2]
CHECKLIST = ROOT / "docs/milestone-b-exit-checklist.md"
ROADMAP = ROOT / "docs/roadmap.md"


def checklist_text() -> str:
return CHECKLIST.read_text(encoding="utf-8")


def normalized_checklist_text() -> str:
return re.sub(r"\s+", " ", checklist_text())


class MilestoneBExitChecklistTests(unittest.TestCase):
def test_roadmap_links_to_checklist(self) -> None:
text = ROADMAP.read_text(encoding="utf-8")

self.assertIn("[13-B exit checklist](milestone-b-exit-checklist.md)", text)

def test_checklist_names_current_validation_commands(self) -> None:
text = checklist_text()

self.assertIn("make milestone-b-internal-checks PYTHON=<jsonschema-venv>/bin/python", text)
self.assertIn("make verify-alpha", text)
self.assertIn("make layout-evaluator-alpha", text)
self.assertIn("make python-surface-test", text)

def test_checklist_covers_internal_b_lanes(self) -> None:
text = checklist_text()

for lane in [
"WS-VERIFY-ALPHA",
"WS-LAYOUT",
"WS-SURFACES",
"WS-HARNESS",
"DETERMINISM",
]:
self.assertIn(lane, text)

def test_checklist_keeps_public_boundaries_explicit(self) -> None:
text = normalized_checklist_text()

self.assertIn("does not approve public benchmark reports", text)
self.assertIn("release artifacts", text)
self.assertIn("package publication", text)
self.assertIn("production positioning", text)
self.assertIn("Performance/quality/footprint claims remain blocked", text)
self.assertIn("Table-quality and parser-quality claims remain blocked", text)

def test_checklist_does_not_claim_broader_scope(self) -> None:
text = checklist_text()

self.assertIn("No semantic/arithmetic verification expansion is claimed.", text)
self.assertIn("No broader parser/table/OCR completion is claimed.", text)


if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions .github/scripts/test_milestone_b_internal_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def test_target_composes_current_internal_gates(self) -> None:
"$(PYTHON) .github/scripts/test_execution_status.py",
"$(PYTHON) .github/scripts/test_roadmap_status.py",
"$(PYTHON) .github/scripts/test_milestone_b_closeout_record.py",
"$(PYTHON) .github/scripts/test_milestone_b_exit_checklist.py",
"$(MAKE) verify-alpha PYTHON=$(PYTHON)",
"$(MAKE) layout-evaluator-alpha PYTHON=$(PYTHON)",
"$(MAKE) python-surface-test PYTHON=$(PYTHON)",
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ jobs:
run: python3 .github/scripts/test_roadmap_status.py
- name: Milestone B closeout validation record tests
run: python3 .github/scripts/test_milestone_b_closeout_record.py
- name: Milestone B exit checklist tests
run: python3 .github/scripts/test_milestone_b_exit_checklist.py
- name: Gate Zero harness tests
run: python3 benchmarks/harness/test_run_gate_zero.py
- name: same-platform double-parse byte-diff
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ milestone-b-internal-checks:
$(PYTHON) .github/scripts/test_execution_status.py
$(PYTHON) .github/scripts/test_roadmap_status.py
$(PYTHON) .github/scripts/test_milestone_b_closeout_record.py
$(PYTHON) .github/scripts/test_milestone_b_exit_checklist.py
$(MAKE) verify-alpha PYTHON=$(PYTHON)
$(MAKE) layout-evaluator-alpha PYTHON=$(PYTHON)
$(MAKE) python-surface-test PYTHON=$(PYTHON)
Expand Down
48 changes: 48 additions & 0 deletions docs/milestone-b-exit-checklist.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Milestone B Internal Exit Checklist

This checklist maps the current committed source tree to the Milestone B lanes in
`docs/IMPLEMENTATION_PLAN.md` and the PRD's Milestone B section.

Status: Internal Milestone B closeout validation is green for the current committed scope.
Milestone exit remains a decider call. This checklist does not approve public benchmark reports,
release artifacts, package publication, production positioning, or performance/quality/footprint
claims.

## Required Internal Validation

Run the aggregate closeout target from a clean tree:

```sh
make milestone-b-internal-checks PYTHON=<jsonschema-venv>/bin/python
```

The target includes fixture validation, font-policy validation, status/roadmap/closeout-record
guards, `make verify-alpha`, `make layout-evaluator-alpha`, `make python-surface-test`, claim
language guardrails, public-readiness guardrails, and `git diff --check`.

## Exit Evidence

| Lane | Internal B criterion | Current evidence | Closeout status | Still outside scope |
| --- | --- | --- | --- | --- |
| WS-VERIFY-ALPHA | Alpha verification over native Ethos JSON and OpenDataLoader-style grounding sources, with deterministic evidence matching and capability-aware reports | `make verify-alpha`; native, synthetic OpenDataLoader-style, and pinned real OpenDataLoader fixtures; split-quote, stale-fingerprint, non-v1, capability-limited, malformed-input, and summary diagnostics coverage | Present for current v1 alpha policy | Future claim-kind expansion, `verify_citations` v1 hardening, broader adapter shapes, semantic/arithmetic verification |
| WS-LAYOUT | Reading order, block grouping, heading/list alpha behavior, and Markdown/text export fixtures | `make layout-evaluator-alpha`; fixture metadata and committed extraction/layout/text/Markdown goldens | Present for current fixture-backed alpha scope | Broader table, nested-list, richer heading, OCR/image-only, and wider layout semantics |
| WS-SURFACES | Internal Python surface scaffold for local CLI-backed parsing calls | `make python-surface-test`; stdlib tests with a fake caller-provided `ethos` command | Present as internal scaffold | Native bindings, wheel publication, package setup, public API stability |
| WS-HARNESS | Internal validation path composes fixture, trust-loop, layout, surface, and policy guardrails | `make milestone-b-internal-checks`; `docs/validation/milestone-b-closeout-validation-2026-06-17.md` | Present for current source-tree validation | Public comparison report flow, claim-wording approval, release/package approval |
| DETERMINISM | PR and nightly workflow guardrails cover current deterministic contracts, with Windows x64 preflight for core contracts | CI workflow static guards; `test_determinism_workflow.py`; same-platform checks in current internal validation paths | Present for current configured contracts | Windows PDFium runtime provisioning and broader cross-platform corpus validation |

## Boundaries

- This checklist is internal closeout evidence only.
- Public benchmark reports remain blocked.
- Release artifacts and package publication remain blocked.
- Production positioning remains blocked.
- Performance/quality/footprint claims remain blocked.
- Table-quality and parser-quality claims remain blocked.
- No semantic/arithmetic verification expansion is claimed.
- No broader parser/table/OCR completion is claimed.

## Next Milestone Hand-off

Milestone C should start from the trust-loop and fixture/evaluator contracts already guarded here.
The first C slice should choose a single lane, add fixture-backed behavior first, and update this
checklist only if the Milestone B closeout contract itself changes.
2 changes: 1 addition & 1 deletion docs/roadmap.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ performance/quality/footprint claims.
| --- | --- | --- | --- |
| Week 0 | pre-kickoff | ADRs, governance, corpus freeze, CI bootstrap, competitor pins | All 11 rows done; clock starts |
| A | weeks 1-8 | Contracts (5 schemas, c14n, deterministic profile), trust-boundary artifacts (`GroundingSource`, verification schemas, OpenDataLoader adapter stub, `ethos verify` CLI stub), PDFium Phase 1 spike, harness + competitor adapters, CLI skeleton | **Gate Zero**: ADR-0005 is accepted as `PROCEED` for internal Milestone B continuation. This is not public benchmark, release, package, production, or claim approval. |
| B | weeks 9-14 | **`ethos verify` alpha first**: native Ethos JSON + synthetic and pinned real OpenDataLoader verification demos, stale fingerprint checks, capability-limited reports, deterministic evidence matching including split-quote coverage, explicit unsupported non-v1 claim reporting, adapter structure diagnostics; then reading order, blocks, headings, lists, Markdown/text exporters, Python wheel scaffold, quality dashboard, Windows x64 nightly determinism | 13-B exit checklist |
| B | weeks 9-14 | **`ethos verify` alpha first**: native Ethos JSON + synthetic and pinned real OpenDataLoader verification demos, stale fingerprint checks, capability-limited reports, deterministic evidence matching including split-quote coverage, explicit unsupported non-v1 claim reporting, adapter structure diagnostics; then reading order, blocks, headings, lists, Markdown/text exporters, Python wheel scaffold, quality dashboard, Windows x64 nightly determinism | [13-B exit checklist](milestone-b-exit-checklist.md) |
| C | weeks 15-22 | Simple/bordered tables; RAG chunker + citations; non-text region coordinates; security report + default-chunk exclusion; debug overlay; internal benchmark snapshot | 13-C exit + first checkpoint |
| D | weeks 23-30 | `verify_citations` v1; crop API; sandbox/subprocess backend; Node beta and MCP experimental only if staffed or accepted by release-scope ADR | 13-D exit |
| E | weeks 31-40 | Public benchmark report (reproducible, labeled tiers); PDFium Phase 2 project-maintained builds; stable CLI/Python docs; proof-of-trust demos; **Public Beta** | Release 1 claim audit + public-beta checkpoint |
Expand Down
Loading