Polish TwinBench for S-tier public launch #8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Harness CI | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| jobs: | |
| harness-ci: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Setup Python 3.10 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.10" | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r harness/requirements.txt | |
| - name: Compile harness sources | |
| run: python -m py_compile harness/*.py | |
| - name: Runner CLI smoke | |
| run: python -m harness.runner --help | |
| - name: Validate v0.2 result artifact schema | |
| run: | | |
| python - <<'PY' | |
| import json | |
| from pathlib import Path | |
| p = Path("results/nullalis-v0.2.json") | |
| if not p.exists(): | |
| raise SystemExit("results/nullalis-v0.2.json not found") | |
| data = json.loads(p.read_text()) | |
| required = [ | |
| "benchmark_version", | |
| "verified_composite_score", | |
| "projected_composite_score", | |
| "measured_coverage", | |
| "coverage_adjusted_verified_score", | |
| "dimension_verified_scores", | |
| "dimension_projected_scores", | |
| "dimension_measured_coverage", | |
| ] | |
| missing = [k for k in required if k not in data] | |
| if missing: | |
| raise SystemExit(f"missing keys: {missing}") | |
| if data["benchmark_version"] != "0.2": | |
| raise SystemExit("benchmark_version must be 0.2") | |
| print("artifact schema ok") | |
| PY |