Skip to content

Polish TwinBench for S-tier public launch #8

Polish TwinBench for S-tier public launch

Polish TwinBench for S-tier public launch #8

Workflow file for this run

name: Harness CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
harness-ci:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r harness/requirements.txt
- name: Compile harness sources
run: python -m py_compile harness/*.py
- name: Runner CLI smoke
run: python -m harness.runner --help
- name: Validate v0.2 result artifact schema
run: |
python - <<'PY'
import json
from pathlib import Path
p = Path("results/nullalis-v0.2.json")
if not p.exists():
raise SystemExit("results/nullalis-v0.2.json not found")
data = json.loads(p.read_text())
required = [
"benchmark_version",
"verified_composite_score",
"projected_composite_score",
"measured_coverage",
"coverage_adjusted_verified_score",
"dimension_verified_scores",
"dimension_projected_scores",
"dimension_measured_coverage",
]
missing = [k for k in required if k not in data]
if missing:
raise SystemExit(f"missing keys: {missing}")
if data["benchmark_version"] != "0.2":
raise SystemExit("benchmark_version must be 0.2")
print("artifact schema ok")
PY