diff --git a/2.0/README.md b/2.0/README.md index 1942fe1e..56166363 100644 --- a/2.0/README.md +++ b/2.0/README.md @@ -52,3 +52,20 @@ BBOPlace-Bench. Its problem ID is `bboplace_iccad2015`. It follows the same candidate format, CPU-only evaluator, MP-HPWL metric, relaxed MGO baselines, and quick-versus-final evaluation flow as `bboplace_ispd2005`, but scores the ICCAD2015 benchmark set. + +## BBOPlace Direct ISPD2005 + +This direct-placement variant asks agents to submit one JSON placement for a +single ISPD2005 design, `adaptec1`, instead of writing a Python placement +generator. Its problem ID is `bboplace_direct_ispd2005`. The evaluator uses the +same CPU-only BBOPlace MGO MP-HPWL path and relaxed baseline as the ISPD2005 +suite task, but both iterative feedback and final verification score only that +one design. + +## BBOPlace Direct ICCAD2015 + +This direct-placement variant asks agents to submit one JSON placement for a +single ICCAD2015 design, `superblue1`. Its problem ID is +`bboplace_direct_iccad2015`. It follows the same JSON interface and single +design evaluation flow as `bboplace_direct_ispd2005`, with the ICCAD2015 +baseline for `superblue1`. diff --git a/2.0/problems/bboplace_direct_iccad2015/config.yaml b/2.0/problems/bboplace_direct_iccad2015/config.yaml new file mode 100644 index 00000000..de16070a --- /dev/null +++ b/2.0/problems/bboplace_direct_iccad2015/config.yaml @@ -0,0 +1,18 @@ +tag: optimization +runtime: + language: json + timeout_seconds: 10800 + environment: "JSON placement for one hidden ICCAD2015 BBOPlace design" + apt_packages: + - python3-numpy + docker: + image: ubuntu:24.04 + judge_image: ghcr.io/frontiercs/frontiercs-bboplace-data:2026-06-ispd-iccad +submission: + kind: file + path: /app/solution.json +environment: + cpus: 8 + memory_mb: 16384 + storage_mb: 8192 + build_timeout_seconds: 3600 diff --git a/2.0/problems/bboplace_direct_iccad2015/evaluate.sh b/2.0/problems/bboplace_direct_iccad2015/evaluate.sh new file mode 100755 index 00000000..16842ea2 --- /dev/null +++ b/2.0/problems/bboplace_direct_iccad2015/evaluate.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +SOLUTION="/work/execution_env/solution_env/solution.json" + +if [[ ! -f "$SOLUTION" ]]; then + echo "Error: Missing $SOLUTION" >&2 + exit 1 +fi + +python "$SCRIPT_DIR/evaluator.py" "$SOLUTION" diff --git a/2.0/problems/bboplace_direct_iccad2015/evaluator.py b/2.0/problems/bboplace_direct_iccad2015/evaluator.py new file mode 100644 index 00000000..34bf30cc --- /dev/null +++ b/2.0/problems/bboplace_direct_iccad2015/evaluator.py @@ -0,0 +1,420 @@ +"""Evaluator for the direct BBOPlace ICCAD2015 Frontier-CS 2.0 problem.""" + +from __future__ import annotations + +import importlib.util +import json +import math +import os +import pickle +import pwd +import shutil +import subprocess +import sys +import tempfile +import traceback +from argparse import Namespace +from pathlib import Path +from types import ModuleType, SimpleNamespace +from typing import Any + + +DATASET = "iccad2015" +BENCHMARKS = ("superblue1",) +QUICK_BENCHMARKS = (BENCHMARKS[0],) +MAX_CANDIDATES = 1 +TIMEOUT_SECONDS = int(os.environ.get("BBOPLACE_SOLUTION_TIMEOUT_SECONDS", "10800")) +BBOPLACE_ROOT = Path(os.environ.get("BBOPLACE_ROOT", "/opt/bboplace-bench")) +INF_HPWL_THRESHOLD = 1e15 + +# BBOPlace-Bench report, Table V, MGO + PSO MP-HPWL. +# Reported unit is x10^5; constants below are raw HPWL values relaxed by 1.2x. +BASELINE_HPWL = { + "superblue1": 0.696e5, + "superblue3": 1.824e5, + "superblue4": 1.128e5, + "superblue5": 4.512e5, + "superblue7": 2.028e5, + "superblue10": 0.648e5, + "superblue16": 1.152e5, + "superblue18": 0.576e5, +} + +_EVALUATORS: dict[str, Any] = {} + + +def _protect_evaluator_source() -> None: + try: + evaluator_path = Path(__file__).resolve() + if str(evaluator_path).startswith(("/judge/", "/tests/")) and os.geteuid() == 0: + evaluator_path.chmod(0o600) + except Exception: + pass + + +_protect_evaluator_source() + + +def _solution_preexec(): + if os.name != "posix": + return None + try: + if os.geteuid() != 0: + return None + nobody = pwd.getpwnam("nobody") + except Exception: + return None + + def demote() -> None: + os.setgid(nobody.pw_gid) + os.setuid(nobody.pw_uid) + + return demote + + +def _ensure_runtime_paths() -> None: + if not BBOPLACE_ROOT.exists(): + raise RuntimeError( + f"BBOPlace runtime not found at {BBOPLACE_ROOT}; the judge image must include it" + ) + for rel in ("src", "config", f"benchmarks/{DATASET}"): + path = BBOPLACE_ROOT / rel + if not path.exists(): + raise RuntimeError(f"BBOPlace judge image is missing {path}") + for path in ( + BBOPLACE_ROOT, + BBOPLACE_ROOT / "src", + BBOPLACE_ROOT / "benchmarks", + BBOPLACE_ROOT / "thirdparty", + BBOPLACE_ROOT / "thirdparty" / "dreamplace", + ): + text = str(path) + if text not in sys.path: + sys.path.insert(0, text) + os.environ["PYTHONPATH"] = ":".join(sys.path) + + +def _install_runtime_shims() -> None: + if "ray" not in sys.modules: + ray = ModuleType("ray") + + class RemoteFunction: + def __init__(self, fn): + self.fn = fn + + def remote(self, *args, **kwargs): + return self.fn(*args, **kwargs) + + def remote(*args, **kwargs): + if args and callable(args[0]) and len(args) == 1 and not kwargs: + return RemoteFunction(args[0]) + + def decorator(fn): + return RemoteFunction(fn) + + return decorator + + ray.remote = remote # type: ignore[attr-defined] + ray.get = lambda value: value # type: ignore[attr-defined] + ray.init = lambda *args, **kwargs: None # type: ignore[attr-defined] + sys.modules["ray"] = ray + + if "matplotlib" not in sys.modules: + matplotlib = ModuleType("matplotlib") + pyplot = ModuleType("matplotlib.pyplot") + patches = ModuleType("matplotlib.patches") + pyplot.figure = lambda *args, **kwargs: SimpleNamespace( # type: ignore[attr-defined] + add_subplot=lambda *a, **k: SimpleNamespace( + axes=SimpleNamespace( + xaxis=SimpleNamespace(set_visible=lambda *_: None), + yaxis=SimpleNamespace(set_visible=lambda *_: None), + ), + add_patch=lambda *_args, **_kwargs: None, + ), + savefig=lambda *_args, **_kwargs: None, + ) + pyplot.close = lambda *args, **kwargs: None # type: ignore[attr-defined] + patches.Rectangle = lambda *args, **kwargs: object() # type: ignore[attr-defined] + sys.modules["matplotlib"] = matplotlib + sys.modules["matplotlib.pyplot"] = pyplot + sys.modules["matplotlib.patches"] = patches + + +def _load_bbo_evaluator_class(): + _ensure_runtime_paths() + _install_runtime_shims() + import yaml # type: ignore + from config.benchmark import ( # type: ignore + BENCHMARK_DIR, + ROOT_DIR, + benchmark_dict, + benchmark_n_macro_dict, + benchmark_type_dict, + ) + from src.placedb import PlaceDB # type: ignore + placer_package = ModuleType("src.placer") + placer_package.__path__ = [str(Path(ROOT_DIR) / "src" / "placer")] # type: ignore[attr-defined] + sys.modules.setdefault("src.placer", placer_package) + from src.placer.mgo_placer import MaskGuidedOptimizationPlacer # type: ignore + + class Evaluator: + def __init__(self, args: Namespace): + config_path = Path(ROOT_DIR) / "config" + file_config_dict: dict[str, Any] = {} + with (config_path / "default.yaml").open("r", encoding="utf-8") as f: + file_config_dict.update(yaml.load(f, Loader=yaml.FullLoader) or {}) + with (config_path / "placer" / "mgo.yaml").open("r", encoding="utf-8") as f: + file_config_dict.update(yaml.load(f, Loader=yaml.FullLoader) or {}) + + benchmark_base = None + for candidate_base, names in benchmark_dict.items(): + if args.benchmark in names: + benchmark_base = candidate_base + break + if benchmark_base is None: + raise RuntimeError(f"benchmark is not registered: {args.benchmark}") + + file_config_dict.update( + { + "ROOT_DIR": ROOT_DIR, + "SOURCE_DIR": str(Path(ROOT_DIR) / "src"), + "THIRDPARTY_DIR": str(Path(ROOT_DIR) / "thirdparty"), + "placer": "mgo", + "benchmark": args.benchmark, + "benchmark_base": benchmark_base, + "benchmark_path": str(Path(BENCHMARK_DIR) / benchmark_base / args.benchmark), + "benchmark_type": benchmark_type_dict[benchmark_base], + "n_macro": benchmark_n_macro_dict[benchmark_base], + "eval_gp_hpwl": False, + "n_cpu_max": 1, + "result_path": str( + Path(tempfile.gettempdir()) + / "frontier_bboplace_results" + / DATASET + / args.benchmark + ), + "unique_token": "frontier_cs_2_0", + } + ) + args.__dict__.update({k: v for k, v in file_config_dict.items() if k not in args.__dict__}) + Path(args.result_path).mkdir(parents=True, exist_ok=True) + self.args = args + self.placedb = PlaceDB(args=args) + self.placer = MaskGuidedOptimizationPlacer(args=args, placedb=self.placedb) + + @property + def n_dim(self): + return self.placer.placedb.node_cnt * 2 + + return Evaluator + + +def _make_args(benchmark: str) -> Namespace: + return Namespace( + placer="mgo", + benchmark=benchmark, + eval_gp_hpwl=False, + seed=1, + use_wandb=False, + error_redirect=False, + n_cpu_max=1, + gpu=0, + ) + + +def _ensure_evaluator(benchmark: str) -> Any: + if benchmark in _EVALUATORS: + return _EVALUATORS[benchmark] + if benchmark not in BENCHMARKS: + raise RuntimeError(f"unknown benchmark: {benchmark}") + Evaluator = _load_bbo_evaluator_class() + _EVALUATORS[benchmark] = Evaluator(_make_args(benchmark)) + return _EVALUATORS[benchmark] + + +def _benchmark_info(benchmark: str, evaluator: Any) -> dict[str, Any]: + placedb = evaluator.placer.placedb + return { + "dataset": DATASET, + "benchmark": benchmark, + "placer": "mgo", + "metric": "mp_hpwl", + "objective": "minimize", + "dim": int(evaluator.n_dim), + "node_cnt": int(placedb.node_cnt), + "net_cnt": int(getattr(placedb, "net_cnt", len(getattr(placedb, "net_info", {})))), + "canvas_width": float(placedb.canvas_width), + "canvas_height": float(placedb.canvas_height), + "n_grid_x": int(evaluator.args.n_grid_x), + "n_grid_y": int(evaluator.args.n_grid_y), + "bounds_kind": "mgo_repeated_grid", + "submission_format": "direct_json", + "max_candidates_per_submission": MAX_CANDIDATES, + "baseline_hpwl": float(BASELINE_HPWL[benchmark]), + "baseline_source": "BBOPlace-Bench Table V, MGO + PSO MP-HPWL, unit x10^5, relaxed by 1.2x", + } + + +def prepare() -> dict[str, Any]: + _ensure_runtime_paths() + return { + "dataset": DATASET, + "benchmarks": list(BENCHMARKS), + "quick_feedback_benchmarks": list(QUICK_BENCHMARKS), + "bboplace_root": str(BBOPLACE_ROOT), + "max_candidates_per_submission": MAX_CANDIDATES, + "load_mode": "lazy_per_benchmark", + } + + +def _selected_benchmarks() -> tuple[tuple[str, ...], str]: + return BENCHMARKS, "single_instance" + + +def _run_solution(solution_path: str, info: dict[str, Any]) -> Any: + payload = json.loads(Path(solution_path).read_text(encoding="utf-8")) + if isinstance(payload, dict): + if "fill" in payload: + return [float(payload["fill"])] * int(info["dim"]) + if "placement" in payload: + return payload["placement"] + if "candidate" in payload: + return payload["candidate"] + if "candidates" in payload: + return payload["candidates"] + if "x" in payload and "y" in payload: + x = payload["x"] + y = payload["y"] + if not isinstance(x, list) or not isinstance(y, list): + raise ValueError("x and y must be lists") + return [*x, *y] + raise ValueError("JSON object must contain placement, candidate, candidates, or x/y") + return payload + + +def _normalize_candidates(raw: Any, *, dim: int, node_cnt: int, n_grid_x: int, n_grid_y: int): + import numpy as np + + arr = np.asarray(raw, dtype=float) + if arr.ndim == 1: + if arr.size != dim: + raise ValueError(f"expected one candidate of length {dim}, got length {arr.size}") + arr = arr.reshape(1, dim) + elif arr.ndim == 2: + if arr.shape[1] != dim: + raise ValueError(f"expected candidates with dimension {dim}, got {arr.shape[1]}") + else: + raise ValueError("candidates must be a 1D vector or a 2D list/array") + + if arr.shape[0] < 1: + raise ValueError("at least one candidate is required") + if arr.shape[0] > MAX_CANDIDATES: + raise ValueError( + f"too many candidates: got {arr.shape[0]}, maximum is {MAX_CANDIDATES}" + ) + if not np.all(np.isfinite(arr)): + raise ValueError("all candidate coordinates must be finite") + x = arr[:, :node_cnt] + y = arr[:, node_cnt:] + if np.any(x < 0.0) or np.any(x > float(n_grid_x)): + raise ValueError(f"x-grid coordinates must be in [0, {n_grid_x}]") + if np.any(y < 0.0) or np.any(y > float(n_grid_y)): + raise ValueError(f"y-grid coordinates must be in [0, {n_grid_y}]") + return arr + + +def _evaluate_candidates(evaluator: Any, candidates: Any) -> tuple[float, float, int, int]: + import numpy as np + + hpwl_values, overlap_values, _macro_pos = evaluator.placer.evaluate(candidates) + hpwl_arr = np.asarray(hpwl_values, dtype=float).reshape(-1) + overlap_arr = np.asarray(overlap_values, dtype=float).reshape(-1) + if hpwl_arr.size != candidates.shape[0]: + raise RuntimeError("BBOPlace returned an unexpected number of HPWL values") + if not np.all(np.isfinite(hpwl_arr)): + raise RuntimeError("BBOPlace returned a non-finite HPWL") + best_index = int(np.argmin(hpwl_arr)) + if float(hpwl_arr[best_index]) >= INF_HPWL_THRESHOLD: + raise ValueError("BBOPlace could not legalize any submitted candidate") + overlap = float(overlap_arr[best_index]) if overlap_arr.size > best_index else math.nan + return float(hpwl_arr[best_index]), overlap, best_index, int(candidates.shape[0]) + + +def evaluate(solution_path: str) -> tuple[float, float, str, dict[str, Any]]: + try: + per_benchmark: list[dict[str, Any]] = [] + bounded_scores: list[float] = [] + raw_scores: list[float] = [] + + selected_benchmarks, evaluation_scope = _selected_benchmarks() + for benchmark in selected_benchmarks: + evaluator = _ensure_evaluator(benchmark) + info = _benchmark_info(benchmark, evaluator) + raw_candidates = _run_solution(solution_path, info) + candidates = _normalize_candidates( + raw_candidates, + dim=info["dim"], + node_cnt=info["node_cnt"], + n_grid_x=info["n_grid_x"], + n_grid_y=info["n_grid_y"], + ) + candidate_hpwl, overlap_rate, candidate_index, n_candidates = _evaluate_candidates( + evaluator, candidates + ) + baseline_hpwl = BASELINE_HPWL[benchmark] + raw_score = 100.0 * (baseline_hpwl - candidate_hpwl) / baseline_hpwl + bounded_score = max(0.0, raw_score) + bounded_scores.append(bounded_score) + raw_scores.append(raw_score) + per_benchmark.append( + { + "benchmark": benchmark, + "candidate_hpwl": candidate_hpwl, + "baseline_hpwl": baseline_hpwl, + "raw_score": raw_score, + "score": bounded_score, + "overlap_rate": overlap_rate, + "candidate_index": candidate_index, + "n_candidates": n_candidates, + } + ) + + score = sum(bounded_scores) / len(bounded_scores) + score_unbounded = sum(raw_scores) / len(raw_scores) + message = ( + f"dataset={DATASET}; scope={evaluation_scope}; benchmarks={len(selected_benchmarks)}; " + f"mean_score={score:.6f}; mean_score_unbounded={score_unbounded:.6f}; " + "metric=MP-HPWL; baseline=1.2x relaxed MGO paper constants" + ) + metrics = { + "dataset": DATASET, + "evaluation_scope": evaluation_scope, + "direct_benchmark": BENCHMARKS[0], + "benchmark_count": len(selected_benchmarks), + "full_suite_benchmark_count": len(BENCHMARKS), + "score_formula": "max(0, 100 * (baseline_hpwl - candidate_hpwl) / baseline_hpwl)", + "mean_candidate_hpwl": sum(item["candidate_hpwl"] for item in per_benchmark) + / len(per_benchmark), + "per_benchmark": per_benchmark, + } + return score, score_unbounded, message, metrics + except subprocess.TimeoutExpired: + return 0.0, 0.0, f"timed out after {TIMEOUT_SECONDS}s", {} + except Exception as exc: + return 0.0, 0.0, f"evaluation failed: {exc}", {"traceback": traceback.format_exc()} + + +def main(argv: list[str]) -> int: + if len(argv) != 2: + print("usage: evaluator.py /path/to/solution.json", file=sys.stderr) + return 1 + score, score_unbounded, message, metrics = evaluate(argv[1]) + print(message, file=sys.stderr) + if metrics: + print(json.dumps(metrics, indent=2), file=sys.stderr) + print(f"{score:.12f} {score_unbounded:.12f}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/2.0/problems/bboplace_direct_iccad2015/readme b/2.0/problems/bboplace_direct_iccad2015/readme new file mode 100644 index 00000000..ce3a2b75 --- /dev/null +++ b/2.0/problems/bboplace_direct_iccad2015/readme @@ -0,0 +1,72 @@ +BBOPlace Direct ICCAD2015 +======================== + +Directly submit one JSON macro placement for a single BBOPlace ICCAD2015 +design: `superblue1`. The hidden judge evaluates the placement with the +original BBOPlace-Bench MGO MP-HPWL evaluator. + +Runtime and resources +--------------------- + +Your final submission is a JSON file at `/app/solution.json`. You are encouraged +to write Python programs, shell scripts, search loops, local parsers, or any +other helper code in `/app` while working. Those programs may generate and +overwrite `/app/solution.json` many times. Only the final JSON artifact is +graded. + +The agent container provides: + +- 8 CPU cores +- 16 GiB memory +- 8 GiB storage +- no GPU +- Python 3 with NumPy available for local helper scripts +- hidden benchmark data is not available in the agent workspace + +The judge also runs on CPU only. Do not rely on CUDA, DREAMPlace, Ray, or GPU +placement libraries for scoring. + +Submission format +----------------- + +Submit exactly one placement for `superblue1` by writing `/app/solution.json`. +After your program writes the file, call `bash /app/submit.sh` to score that +JSON. The JSON must use one of these forms: + +```json +{"placement": [0.0, 0.0, "..."]} +``` + +or: + +```json +{"x": [0.0, 0.0, "..."], "y": [0.0, 0.0, "..."]} +``` + +The placement vector length must equal `dim = 2 * node_cnt`. The first +`node_cnt` entries are x-grid coordinates and the remaining `node_cnt` entries +are y-grid coordinates. Coordinates must be finite, with x in `[0, n_grid_x]` +and y in `[0, n_grid_y]`. Only one placement is accepted. + +The judge discloses public metadata through the iterative feedback path, +including `dim`, `node_cnt`, `n_grid_x`, `n_grid_y`, and the baseline HPWL. +The netlist and evaluator source stay hidden in the judge image. + +Score +----- + +The objective is to minimize MP-HPWL for `superblue1`: + +`raw_score = 100 * (baseline_hpwl - candidate_hpwl) / baseline_hpwl` + +`score = max(0, raw_score)` + +The baseline constant is from the BBOPlace-Bench report, Table V, `MGO + PSO` +MP-HPWL. The paper reports values in units of `x10^5`; the judge stores the +raw HPWL value relaxed by `1.2x`: + +- `superblue1`: `0.696e5` + +Both iterative submissions and final verification evaluate this same single +design. The submit helper may still save the best iterative JSON artifact and +rerun it during final verification. diff --git a/2.0/problems/bboplace_direct_iccad2015/reference.json b/2.0/problems/bboplace_direct_iccad2015/reference.json new file mode 100644 index 00000000..6db7066e --- /dev/null +++ b/2.0/problems/bboplace_direct_iccad2015/reference.json @@ -0,0 +1 @@ +{"fill": 0.0} diff --git a/2.0/problems/bboplace_direct_ispd2005/config.yaml b/2.0/problems/bboplace_direct_ispd2005/config.yaml new file mode 100644 index 00000000..520d27af --- /dev/null +++ b/2.0/problems/bboplace_direct_ispd2005/config.yaml @@ -0,0 +1,18 @@ +tag: optimization +runtime: + language: json + timeout_seconds: 10800 + environment: "JSON placement for one hidden ISPD2005 BBOPlace design" + apt_packages: + - python3-numpy + docker: + image: ubuntu:24.04 + judge_image: ghcr.io/frontiercs/frontiercs-bboplace-data:2026-06-ispd-iccad +submission: + kind: file + path: /app/solution.json +environment: + cpus: 8 + memory_mb: 16384 + storage_mb: 8192 + build_timeout_seconds: 3600 diff --git a/2.0/problems/bboplace_direct_ispd2005/evaluate.sh b/2.0/problems/bboplace_direct_ispd2005/evaluate.sh new file mode 100755 index 00000000..16842ea2 --- /dev/null +++ b/2.0/problems/bboplace_direct_ispd2005/evaluate.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +SOLUTION="/work/execution_env/solution_env/solution.json" + +if [[ ! -f "$SOLUTION" ]]; then + echo "Error: Missing $SOLUTION" >&2 + exit 1 +fi + +python "$SCRIPT_DIR/evaluator.py" "$SOLUTION" diff --git a/2.0/problems/bboplace_direct_ispd2005/evaluator.py b/2.0/problems/bboplace_direct_ispd2005/evaluator.py new file mode 100644 index 00000000..30127001 --- /dev/null +++ b/2.0/problems/bboplace_direct_ispd2005/evaluator.py @@ -0,0 +1,418 @@ +"""Evaluator for the direct BBOPlace ISPD2005 Frontier-CS 2.0 problem.""" + +from __future__ import annotations + +import importlib.util +import json +import math +import os +import pickle +import pwd +import shutil +import subprocess +import sys +import tempfile +import traceback +from argparse import Namespace +from pathlib import Path +from types import ModuleType, SimpleNamespace +from typing import Any + + +DATASET = "ispd2005" +BENCHMARKS = ("adaptec1",) +QUICK_BENCHMARKS = (BENCHMARKS[0],) +MAX_CANDIDATES = 1 +TIMEOUT_SECONDS = int(os.environ.get("BBOPLACE_SOLUTION_TIMEOUT_SECONDS", "10800")) +BBOPLACE_ROOT = Path(os.environ.get("BBOPLACE_ROOT", "/opt/bboplace-bench")) +INF_HPWL_THRESHOLD = 1e15 + +# BBOPlace-Bench report, Table III, MGO + Vanilla-EA MP-HPWL. +# Reported unit is x10^5; constants below are raw HPWL values relaxed by 1.2x. +BASELINE_HPWL = { + "adaptec1": 6.96e5, + "adaptec2": 73.752e5, + "adaptec3": 67.356e5, + "adaptec4": 68.148e5, + "bigblue1": 2.76e5, + "bigblue3": 62.88e5, +} + +_EVALUATORS: dict[str, Any] = {} + + +def _protect_evaluator_source() -> None: + try: + evaluator_path = Path(__file__).resolve() + if str(evaluator_path).startswith(("/judge/", "/tests/")) and os.geteuid() == 0: + evaluator_path.chmod(0o600) + except Exception: + pass + + +_protect_evaluator_source() + + +def _solution_preexec(): + if os.name != "posix": + return None + try: + if os.geteuid() != 0: + return None + nobody = pwd.getpwnam("nobody") + except Exception: + return None + + def demote() -> None: + os.setgid(nobody.pw_gid) + os.setuid(nobody.pw_uid) + + return demote + + +def _ensure_runtime_paths() -> None: + if not BBOPLACE_ROOT.exists(): + raise RuntimeError( + f"BBOPlace runtime not found at {BBOPLACE_ROOT}; the judge image must include it" + ) + for rel in ("src", "config", f"benchmarks/{DATASET}"): + path = BBOPLACE_ROOT / rel + if not path.exists(): + raise RuntimeError(f"BBOPlace judge image is missing {path}") + for path in ( + BBOPLACE_ROOT, + BBOPLACE_ROOT / "src", + BBOPLACE_ROOT / "benchmarks", + BBOPLACE_ROOT / "thirdparty", + BBOPLACE_ROOT / "thirdparty" / "dreamplace", + ): + text = str(path) + if text not in sys.path: + sys.path.insert(0, text) + os.environ["PYTHONPATH"] = ":".join(sys.path) + + +def _install_runtime_shims() -> None: + if "ray" not in sys.modules: + ray = ModuleType("ray") + + class RemoteFunction: + def __init__(self, fn): + self.fn = fn + + def remote(self, *args, **kwargs): + return self.fn(*args, **kwargs) + + def remote(*args, **kwargs): + if args and callable(args[0]) and len(args) == 1 and not kwargs: + return RemoteFunction(args[0]) + + def decorator(fn): + return RemoteFunction(fn) + + return decorator + + ray.remote = remote # type: ignore[attr-defined] + ray.get = lambda value: value # type: ignore[attr-defined] + ray.init = lambda *args, **kwargs: None # type: ignore[attr-defined] + sys.modules["ray"] = ray + + if "matplotlib" not in sys.modules: + matplotlib = ModuleType("matplotlib") + pyplot = ModuleType("matplotlib.pyplot") + patches = ModuleType("matplotlib.patches") + pyplot.figure = lambda *args, **kwargs: SimpleNamespace( # type: ignore[attr-defined] + add_subplot=lambda *a, **k: SimpleNamespace( + axes=SimpleNamespace( + xaxis=SimpleNamespace(set_visible=lambda *_: None), + yaxis=SimpleNamespace(set_visible=lambda *_: None), + ), + add_patch=lambda *_args, **_kwargs: None, + ), + savefig=lambda *_args, **_kwargs: None, + ) + pyplot.close = lambda *args, **kwargs: None # type: ignore[attr-defined] + patches.Rectangle = lambda *args, **kwargs: object() # type: ignore[attr-defined] + sys.modules["matplotlib"] = matplotlib + sys.modules["matplotlib.pyplot"] = pyplot + sys.modules["matplotlib.patches"] = patches + + +def _load_bbo_evaluator_class(): + _ensure_runtime_paths() + _install_runtime_shims() + import yaml # type: ignore + from config.benchmark import ( # type: ignore + BENCHMARK_DIR, + ROOT_DIR, + benchmark_dict, + benchmark_n_macro_dict, + benchmark_type_dict, + ) + from src.placedb import PlaceDB # type: ignore + placer_package = ModuleType("src.placer") + placer_package.__path__ = [str(Path(ROOT_DIR) / "src" / "placer")] # type: ignore[attr-defined] + sys.modules.setdefault("src.placer", placer_package) + from src.placer.mgo_placer import MaskGuidedOptimizationPlacer # type: ignore + + class Evaluator: + def __init__(self, args: Namespace): + config_path = Path(ROOT_DIR) / "config" + file_config_dict: dict[str, Any] = {} + with (config_path / "default.yaml").open("r", encoding="utf-8") as f: + file_config_dict.update(yaml.load(f, Loader=yaml.FullLoader) or {}) + with (config_path / "placer" / "mgo.yaml").open("r", encoding="utf-8") as f: + file_config_dict.update(yaml.load(f, Loader=yaml.FullLoader) or {}) + + benchmark_base = None + for candidate_base, names in benchmark_dict.items(): + if args.benchmark in names: + benchmark_base = candidate_base + break + if benchmark_base is None: + raise RuntimeError(f"benchmark is not registered: {args.benchmark}") + + file_config_dict.update( + { + "ROOT_DIR": ROOT_DIR, + "SOURCE_DIR": str(Path(ROOT_DIR) / "src"), + "THIRDPARTY_DIR": str(Path(ROOT_DIR) / "thirdparty"), + "placer": "mgo", + "benchmark": args.benchmark, + "benchmark_base": benchmark_base, + "benchmark_path": str(Path(BENCHMARK_DIR) / benchmark_base / args.benchmark), + "benchmark_type": benchmark_type_dict[benchmark_base], + "n_macro": benchmark_n_macro_dict[benchmark_base], + "eval_gp_hpwl": False, + "n_cpu_max": 1, + "result_path": str( + Path(tempfile.gettempdir()) + / "frontier_bboplace_results" + / DATASET + / args.benchmark + ), + "unique_token": "frontier_cs_2_0", + } + ) + args.__dict__.update({k: v for k, v in file_config_dict.items() if k not in args.__dict__}) + Path(args.result_path).mkdir(parents=True, exist_ok=True) + self.args = args + self.placedb = PlaceDB(args=args) + self.placer = MaskGuidedOptimizationPlacer(args=args, placedb=self.placedb) + + @property + def n_dim(self): + return self.placer.placedb.node_cnt * 2 + + return Evaluator + + +def _make_args(benchmark: str) -> Namespace: + return Namespace( + placer="mgo", + benchmark=benchmark, + eval_gp_hpwl=False, + seed=1, + use_wandb=False, + error_redirect=False, + n_cpu_max=1, + gpu=0, + ) + + +def _ensure_evaluator(benchmark: str) -> Any: + if benchmark in _EVALUATORS: + return _EVALUATORS[benchmark] + if benchmark not in BENCHMARKS: + raise RuntimeError(f"unknown benchmark: {benchmark}") + Evaluator = _load_bbo_evaluator_class() + _EVALUATORS[benchmark] = Evaluator(_make_args(benchmark)) + return _EVALUATORS[benchmark] + + +def _benchmark_info(benchmark: str, evaluator: Any) -> dict[str, Any]: + placedb = evaluator.placer.placedb + return { + "dataset": DATASET, + "benchmark": benchmark, + "placer": "mgo", + "metric": "mp_hpwl", + "objective": "minimize", + "dim": int(evaluator.n_dim), + "node_cnt": int(placedb.node_cnt), + "net_cnt": int(getattr(placedb, "net_cnt", len(getattr(placedb, "net_info", {})))), + "canvas_width": float(placedb.canvas_width), + "canvas_height": float(placedb.canvas_height), + "n_grid_x": int(evaluator.args.n_grid_x), + "n_grid_y": int(evaluator.args.n_grid_y), + "bounds_kind": "mgo_repeated_grid", + "submission_format": "direct_json", + "max_candidates_per_submission": MAX_CANDIDATES, + "baseline_hpwl": float(BASELINE_HPWL[benchmark]), + "baseline_source": "BBOPlace-Bench Table III, MGO + Vanilla-EA MP-HPWL, unit x10^5, relaxed by 1.2x", + } + + +def prepare() -> dict[str, Any]: + _ensure_runtime_paths() + return { + "dataset": DATASET, + "benchmarks": list(BENCHMARKS), + "quick_feedback_benchmarks": list(QUICK_BENCHMARKS), + "bboplace_root": str(BBOPLACE_ROOT), + "max_candidates_per_submission": MAX_CANDIDATES, + "load_mode": "lazy_per_benchmark", + } + + +def _selected_benchmarks() -> tuple[tuple[str, ...], str]: + return BENCHMARKS, "single_instance" + + +def _run_solution(solution_path: str, info: dict[str, Any]) -> Any: + payload = json.loads(Path(solution_path).read_text(encoding="utf-8")) + if isinstance(payload, dict): + if "fill" in payload: + return [float(payload["fill"])] * int(info["dim"]) + if "placement" in payload: + return payload["placement"] + if "candidate" in payload: + return payload["candidate"] + if "candidates" in payload: + return payload["candidates"] + if "x" in payload and "y" in payload: + x = payload["x"] + y = payload["y"] + if not isinstance(x, list) or not isinstance(y, list): + raise ValueError("x and y must be lists") + return [*x, *y] + raise ValueError("JSON object must contain placement, candidate, candidates, or x/y") + return payload + + +def _normalize_candidates(raw: Any, *, dim: int, node_cnt: int, n_grid_x: int, n_grid_y: int): + import numpy as np + + arr = np.asarray(raw, dtype=float) + if arr.ndim == 1: + if arr.size != dim: + raise ValueError(f"expected one candidate of length {dim}, got length {arr.size}") + arr = arr.reshape(1, dim) + elif arr.ndim == 2: + if arr.shape[1] != dim: + raise ValueError(f"expected candidates with dimension {dim}, got {arr.shape[1]}") + else: + raise ValueError("candidates must be a 1D vector or a 2D list/array") + + if arr.shape[0] < 1: + raise ValueError("at least one candidate is required") + if arr.shape[0] > MAX_CANDIDATES: + raise ValueError( + f"too many candidates: got {arr.shape[0]}, maximum is {MAX_CANDIDATES}" + ) + if not np.all(np.isfinite(arr)): + raise ValueError("all candidate coordinates must be finite") + x = arr[:, :node_cnt] + y = arr[:, node_cnt:] + if np.any(x < 0.0) or np.any(x > float(n_grid_x)): + raise ValueError(f"x-grid coordinates must be in [0, {n_grid_x}]") + if np.any(y < 0.0) or np.any(y > float(n_grid_y)): + raise ValueError(f"y-grid coordinates must be in [0, {n_grid_y}]") + return arr + + +def _evaluate_candidates(evaluator: Any, candidates: Any) -> tuple[float, float, int, int]: + import numpy as np + + hpwl_values, overlap_values, _macro_pos = evaluator.placer.evaluate(candidates) + hpwl_arr = np.asarray(hpwl_values, dtype=float).reshape(-1) + overlap_arr = np.asarray(overlap_values, dtype=float).reshape(-1) + if hpwl_arr.size != candidates.shape[0]: + raise RuntimeError("BBOPlace returned an unexpected number of HPWL values") + if not np.all(np.isfinite(hpwl_arr)): + raise RuntimeError("BBOPlace returned a non-finite HPWL") + best_index = int(np.argmin(hpwl_arr)) + if float(hpwl_arr[best_index]) >= INF_HPWL_THRESHOLD: + raise ValueError("BBOPlace could not legalize any submitted candidate") + overlap = float(overlap_arr[best_index]) if overlap_arr.size > best_index else math.nan + return float(hpwl_arr[best_index]), overlap, best_index, int(candidates.shape[0]) + + +def evaluate(solution_path: str) -> tuple[float, float, str, dict[str, Any]]: + try: + per_benchmark: list[dict[str, Any]] = [] + bounded_scores: list[float] = [] + raw_scores: list[float] = [] + + selected_benchmarks, evaluation_scope = _selected_benchmarks() + for benchmark in selected_benchmarks: + evaluator = _ensure_evaluator(benchmark) + info = _benchmark_info(benchmark, evaluator) + raw_candidates = _run_solution(solution_path, info) + candidates = _normalize_candidates( + raw_candidates, + dim=info["dim"], + node_cnt=info["node_cnt"], + n_grid_x=info["n_grid_x"], + n_grid_y=info["n_grid_y"], + ) + candidate_hpwl, overlap_rate, candidate_index, n_candidates = _evaluate_candidates( + evaluator, candidates + ) + baseline_hpwl = BASELINE_HPWL[benchmark] + raw_score = 100.0 * (baseline_hpwl - candidate_hpwl) / baseline_hpwl + bounded_score = max(0.0, raw_score) + bounded_scores.append(bounded_score) + raw_scores.append(raw_score) + per_benchmark.append( + { + "benchmark": benchmark, + "candidate_hpwl": candidate_hpwl, + "baseline_hpwl": baseline_hpwl, + "raw_score": raw_score, + "score": bounded_score, + "overlap_rate": overlap_rate, + "candidate_index": candidate_index, + "n_candidates": n_candidates, + } + ) + + score = sum(bounded_scores) / len(bounded_scores) + score_unbounded = sum(raw_scores) / len(raw_scores) + message = ( + f"dataset={DATASET}; scope={evaluation_scope}; benchmarks={len(selected_benchmarks)}; " + f"mean_score={score:.6f}; mean_score_unbounded={score_unbounded:.6f}; " + "metric=MP-HPWL; baseline=1.2x relaxed MGO paper constants" + ) + metrics = { + "dataset": DATASET, + "evaluation_scope": evaluation_scope, + "direct_benchmark": BENCHMARKS[0], + "benchmark_count": len(selected_benchmarks), + "full_suite_benchmark_count": len(BENCHMARKS), + "score_formula": "max(0, 100 * (baseline_hpwl - candidate_hpwl) / baseline_hpwl)", + "mean_candidate_hpwl": sum(item["candidate_hpwl"] for item in per_benchmark) + / len(per_benchmark), + "per_benchmark": per_benchmark, + } + return score, score_unbounded, message, metrics + except subprocess.TimeoutExpired: + return 0.0, 0.0, f"timed out after {TIMEOUT_SECONDS}s", {} + except Exception as exc: + return 0.0, 0.0, f"evaluation failed: {exc}", {"traceback": traceback.format_exc()} + + +def main(argv: list[str]) -> int: + if len(argv) != 2: + print("usage: evaluator.py /path/to/solution.json", file=sys.stderr) + return 1 + score, score_unbounded, message, metrics = evaluate(argv[1]) + print(message, file=sys.stderr) + if metrics: + print(json.dumps(metrics, indent=2), file=sys.stderr) + print(f"{score:.12f} {score_unbounded:.12f}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/2.0/problems/bboplace_direct_ispd2005/readme b/2.0/problems/bboplace_direct_ispd2005/readme new file mode 100644 index 00000000..8d0a375e --- /dev/null +++ b/2.0/problems/bboplace_direct_ispd2005/readme @@ -0,0 +1,72 @@ +BBOPlace Direct ISPD2005 +======================= + +Directly submit one JSON macro placement for a single BBOPlace ISPD2005 design: +`adaptec1`. The hidden judge evaluates the placement with the original +BBOPlace-Bench MGO MP-HPWL evaluator. + +Runtime and resources +--------------------- + +Your final submission is a JSON file at `/app/solution.json`. You are encouraged +to write Python programs, shell scripts, search loops, local parsers, or any +other helper code in `/app` while working. Those programs may generate and +overwrite `/app/solution.json` many times. Only the final JSON artifact is +graded. + +The agent container provides: + +- 8 CPU cores +- 16 GiB memory +- 8 GiB storage +- no GPU +- Python 3 with NumPy available for local helper scripts +- hidden benchmark data is not available in the agent workspace + +The judge also runs on CPU only. Do not rely on CUDA, DREAMPlace, Ray, or GPU +placement libraries for scoring. + +Submission format +----------------- + +Submit exactly one placement for `adaptec1` by writing `/app/solution.json`. +After your program writes the file, call `bash /app/submit.sh` to score that +JSON. The JSON must use one of these forms: + +```json +{"placement": [0.0, 0.0, "..."]} +``` + +or: + +```json +{"x": [0.0, 0.0, "..."], "y": [0.0, 0.0, "..."]} +``` + +The placement vector length must equal `dim = 2 * node_cnt`. The first +`node_cnt` entries are x-grid coordinates and the remaining `node_cnt` entries +are y-grid coordinates. Coordinates must be finite, with x in `[0, n_grid_x]` +and y in `[0, n_grid_y]`. Only one placement is accepted. + +The judge discloses public metadata through the iterative feedback path, +including `dim`, `node_cnt`, `n_grid_x`, `n_grid_y`, and the baseline HPWL. +The netlist and evaluator source stay hidden in the judge image. + +Score +----- + +The objective is to minimize MP-HPWL for `adaptec1`: + +`raw_score = 100 * (baseline_hpwl - candidate_hpwl) / baseline_hpwl` + +`score = max(0, raw_score)` + +The baseline constant is from the BBOPlace-Bench report, Table III, +`MGO + Vanilla-EA` MP-HPWL. The paper reports values in units of `x10^5`; the +judge stores the raw HPWL value relaxed by `1.2x`: + +- `adaptec1`: `6.96e5` + +Both iterative submissions and final verification evaluate this same single +design. The submit helper may still save the best iterative JSON artifact and +rerun it during final verification. diff --git a/2.0/problems/bboplace_direct_ispd2005/reference.json b/2.0/problems/bboplace_direct_ispd2005/reference.json new file mode 100644 index 00000000..6db7066e --- /dev/null +++ b/2.0/problems/bboplace_direct_ispd2005/reference.json @@ -0,0 +1 @@ +{"fill": 0.0} diff --git a/README.md b/README.md index c23a9b0d..7d8de902 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Research Problems Algorithmic Problems - 2.0 Problems + 2.0 Problems

## News @@ -140,7 +140,8 @@ isolated from Frontier-CS's own `uv sync` environment. Frontier-CS 2.0 is agent-first: current 2.0 problems are meant to be run through Harbor-compatible agents rather than direct one-shot solution files. Problem IDs are their problem directory names, such as `erdos_unit_distance`, -the small `erdos_demo`, and BBOPlace variants such as `bboplace_ispd2005`. +the small `erdos_demo`, and BBOPlace variants such as `bboplace_ispd2005` and +`bboplace_direct_ispd2005`. ```bash # List 2.0 problems @@ -154,6 +155,9 @@ uv run frontier harbor trial 2.0 erdos_demo -a codex -m gpt-5.5 --json # Run a BBOPlace placement task uv run frontier harbor trial 2.0 bboplace_ispd2005 -a codex -m gpt-5.5 --json + +# Run a direct JSON BBOPlace single-design task +uv run frontier harbor trial 2.0 bboplace_direct_ispd2005 -a codex -m gpt-5.5 --json ``` See [2.0/README.md](2.0/README.md) for the current 2.0 track. diff --git a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/adapter.py b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/adapter.py index 9f1dcc6b..920fc381 100644 --- a/adapters/frontier-cs-2.0/src/frontier_cs_2_0/adapter.py +++ b/adapters/frontier-cs-2.0/src/frontier_cs_2_0/adapter.py @@ -303,11 +303,28 @@ def _write_tests( def _write_solution(self, task_paths: "TaskPaths", problem: FrontierCS20Problem) -> None: solution_dir = task_paths.solution_dir - reference = problem.problem_dir / "reference.py" - if reference.exists(): - shutil.copy2(reference, solution_dir / "reference.py") + submission = problem.config.get("submission", {}) or {} + submission_path = str(submission.get("path", "/app/solution.py")) + submission_suffix = Path(submission_path).suffix.lstrip(".") + reference_candidates = [] + if submission_suffix: + reference_candidates.append(problem.problem_dir / f"reference.{submission_suffix}") + reference_candidates.append(problem.problem_dir / "reference.py") + for reference in reference_candidates: + if reference.exists(): + shutil.copy2(reference, solution_dir / "reference.py") + break solve_sh = solution_dir / "solve.sh" - shutil.copy2(self.template_dir / "solution" / "solve.sh", solve_sh) + solve_text = (self.template_dir / "solution" / "solve.sh").read_text( + encoding="utf-8" + ) + solve_submission_path = ( + submission_path if submission.get("kind", "file") != "directory" else "/app/solution.py" + ) + solve_sh.write_text( + solve_text.replace("/app/solution.py", solve_submission_path), + encoding="utf-8", + ) solve_sh.chmod(0o755) def _write_task_config(self, task_paths: "TaskPaths", problem: FrontierCS20Problem) -> None: diff --git a/docs/bboplace_2_0_integration_plan.md b/docs/bboplace_2_0_integration_plan.md index ef3e56f2..6e8b62d3 100644 --- a/docs/bboplace_2_0_integration_plan.md +++ b/docs/bboplace_2_0_integration_plan.md @@ -7,9 +7,15 @@ Current direction: BBOPlace adapter. - Add two algorithmic suite tasks: `bboplace_ispd2005` and `bboplace_iccad2015`. +- Add two direct-placement single-design tasks: + `bboplace_direct_ispd2005` for `adaptec1` and + `bboplace_direct_iccad2015` for `superblue1`. - Keep the agent/main container separate from the judge/data container. - During agent iteration, score only the first benchmark in each suite for fast general-design feedback. During final verification, score the full suite. +- For direct-placement tasks, both iterative feedback and final verification + score the same single design. The agent submits `/app/solution.json` instead + of a Python generator. - The judge accepts `final` evaluation role only when the hidden verifier sends the generated role token. Agent submissions, including hand-written judge requests, are treated as quick-feedback submissions. @@ -20,7 +26,7 @@ Current direction: extracted ISPD2005 plus ICCAD2015 data. The evaluator uses only MGO + MP-HPWL, so the image does not need Ray, HPO, DREAMPlace, or GPU packages. -Evaluation flow: +Suite evaluation flow: ```mermaid flowchart TD @@ -37,6 +43,10 @@ flowchart TD K --> L["Return mean score, unbounded score, and metrics"] ``` +Direct-placement tasks use the same judge image and scoring path, but the agent +edits `/app/solution.json`, the evaluator reads exactly one placement, and both +agent feedback and final verification evaluate the fixed single design. + Scoring: `score = max(0, 100 * (baseline_hpwl - candidate_hpwl) / baseline_hpwl)` @@ -61,7 +71,6 @@ Data status: Future extension: -- Add direct-placement single-instance tasks where the model receives one - evaluator input file and directly outputs a placement. That variant is better - suited for agent fine-tuning and should be separate from these suite-style - algorithmic tasks. +- Add additional direct-placement single-design tasks if we want more + fine-tuning targets. Keep them separate from the suite-style algorithmic + tasks so direct placement and general design are measured independently. diff --git a/tools/bboplace/check_generated_tasks.py b/tools/bboplace/check_generated_tasks.py index 3178d8cd..80d94ec6 100644 --- a/tools/bboplace/check_generated_tasks.py +++ b/tools/bboplace/check_generated_tasks.py @@ -7,10 +7,28 @@ from pathlib import Path -EXPECTED_TASKS = ( - "frontier-cs-2-0-bboplace-ispd2005", - "frontier-cs-2-0-bboplace-iccad2015", -) +EXPECTED_TASKS = { + "frontier-cs-2-0-bboplace-ispd2005": { + "candidate_limit": 16, + "submission_path": "/app/solution.py", + "required_text": "quick-feedback score is never used directly", + }, + "frontier-cs-2-0-bboplace-iccad2015": { + "candidate_limit": 16, + "submission_path": "/app/solution.py", + "required_text": "quick-feedback score is never used directly", + }, + "frontier-cs-2-0-bboplace-direct-ispd2005": { + "candidate_limit": 1, + "submission_path": "/app/solution.json", + "required_text": "Submit exactly one placement for `adaptec1`", + }, + "frontier-cs-2-0-bboplace-direct-iccad2015": { + "candidate_limit": 1, + "submission_path": "/app/solution.json", + "required_text": "Submit exactly one placement for `superblue1`", + }, +} EXPECTED_JUDGE_IMAGE = "ghcr.io/frontiercs/frontiercs-bboplace-data:2026-06-ispd-iccad" @@ -24,7 +42,7 @@ def read(path: Path) -> str: return path.read_text(encoding="utf-8") -def check_task(task_dir: Path) -> None: +def check_task(task_dir: Path, expected: dict[str, object]) -> None: env_dir = task_dir / "environment" tests_dir = task_dir / "tests" @@ -34,6 +52,7 @@ def check_task(task_dir: Path) -> None: judge_server = read(env_dir / "judge_server.py") evaluate_py = read(tests_dir / "evaluate.py") instruction = read(task_dir / "instruction.md") + submission_config = read(env_dir / "submission_config.json") require("FROM ubuntu:24.04" in dockerfile, f"{task_dir.name}: main image changed") require( @@ -49,10 +68,18 @@ def check_task(task_dir: Path) -> None: require("Runtime and resources" in instruction, f"{task_dir.name}: missing resource statement") require("no GPU" in instruction, f"{task_dir.name}: missing no-GPU statement") require( - "`max_candidates_per_submission`: 16" in instruction, - f"{task_dir.name}: candidate limit should be 16", + f"`max_candidates_per_submission`: {expected['candidate_limit']}" in instruction + or f"Only one placement is accepted" in instruction, + f"{task_dir.name}: candidate limit should be {expected['candidate_limit']}", + ) + require( + str(expected["required_text"]) in instruction, + f"{task_dir.name}: missing expected BBOPlace instruction text", + ) + require( + str(expected["submission_path"]) in submission_config, + f"{task_dir.name}: generated submission path mismatch", ) - require("quick-feedback score is never used directly" in instruction, f"{task_dir.name}: missing quick/full warning") def main(argv: list[str]) -> int: @@ -60,8 +87,8 @@ def main(argv: list[str]) -> int: print("usage: check_generated_tasks.py /path/to/generated/frontier-cs-2.0", file=sys.stderr) return 2 root = Path(argv[1]) - for task_name in EXPECTED_TASKS: - check_task(root / task_name) + for task_name, expected in EXPECTED_TASKS.items(): + check_task(root / task_name, expected) print("Generated BBOPlace tasks match the expected Harbor flow") return 0