pq-code-package · hanno-becker · May 18, 2026 · May 18, 2026
@@ -25,9 +25,13 @@
 FAIL = "❌"
 
 ProofResult = namedtuple(
-    "ProofResult", ["name", "status", "current", "previous", "change"]
+    "ProofResult", ["name", "solver", "status", "current", "previous", "change"]
 )
 
+# Solver assigned to baseline runtime entries that lack a `solver` field.
+# Older baseline JSONs predate multi-solver support and mostly ran under Z3.
+LEGACY_DEFAULT_SOLVER = "Z3"
+
 
 def get_args():
     parser = argparse.ArgumentParser(description="CBMC proof results reporter")
@@ -78,11 +82,11 @@ def fetch_baseline(cfg):
 def render_table(rows):
     """Render a markdown table from ProofResult rows."""
     lines = [
-        "| Proof | Status | Current | Previous | Change |",
-        "|-------|--------|---------|----------|--------|",
+        "| Proof | Solver | Status | Current | Previous | Change |",
+        "|-------|--------|--------|---------|----------|--------|",
     ]
     lines.extend(
-        f"| `{r.name}` | {r.status} | {r.current} | {r.previous} | {r.change} |"
+        f"| `{r.name}` | {r.solver} | {r.status} | {r.current} | {r.previous} | {r.change} |"
         for r in rows
     )
     return lines
@@ -91,41 +95,87 @@ def render_table(rows):
 def classify_proof(r, baseline_runtimes, cfg):
     """Classify a single proof result, returning (ProofResult, is_alert)."""
     name = r["name"]
-    base = baseline_runtimes.get(name, {})
+    solver = r.get("solver", LEGACY_DEFAULT_SOLVER)
+    base = baseline_runtimes.get((name, solver), {})
     base_val, base_failed = base.get("value"), base.get("status") == "failed"
+    base_omitted = base.get("status") == "omitted"
+    base_inconclusive = base.get("status") == "inconclusive"
+
+    # Solver could not decide -- not a real failure, not a regression.
+    if r.get("status") == "inconclusive":
+        prev = (
+            f"{base_val}s" if base_val
+            else "failed" if base_failed
+            else "inconclusive" if base_inconclusive
+            else "omitted" if base_omitted
+            else "-"
+        )
+        # Was passing in the baseline, now inconclusive: surface as a warning.
+        if base_val is not None and not base_failed:
+            return (
+                ProofResult(name, solver, WARN, "?", prev, "inconclusive"),
+                True,
+            )
+        return ProofResult(name, solver, OK, "?", prev, "inconclusive"), False
+
+    # Pair was intentionally not run.
+    if r.get("status") == "omitted":
+        # Was passing in the baseline, now omitted: surface as a warning.
+        if base_val is not None and not base_failed:
+            return (
+                ProofResult(name, solver, WARN, "-", f"{base_val}s", "omitted"),
+                True,
+            )
+        return ProofResult(name, solver, OK, "-", "-", "omitted"), False
 
     if r.get("status") == "failed":
         prev = "failed" if base_failed else (f"{base_val}s" if base_val else "-")
-        return ProofResult(name, FAIL, "-", prev, "-"), True
+        if base_omitted:
+            prev = "omitted"
+        return ProofResult(name, solver, FAIL, "-", prev, "-"), True
 
     cur_val = r["value"]
     if base_failed:
-        return ProofResult(name, OK, f"{cur_val}s", "failed", "fixed"), False
+        return (
+            ProofResult(name, solver, OK, f"{cur_val}s", "failed", "fixed"),
+            False,
+        )
+    if base_omitted:
+        return (
+            ProofResult(name, solver, OK, f"{cur_val}s", "omitted", "new"),
+            False,
+        )
     if base_val is None:
-        return ProofResult(name, OK, f"{cur_val}s", "-", "new"), False
+        return ProofResult(name, solver, OK, f"{cur_val}s", "-", "new"), False
 
     ratio = cur_val / base_val if base_val > 0 else 1
     change = f"{(ratio - 1) * 100:+.0f}%" if base_val > 0 else "-"
     is_regression = cur_val >= cfg.min_runtime and ratio >= cfg.regression_threshold
     status = WARN if is_regression else OK
     return (
-        ProofResult(name, status, f"{cur_val}s", f"{base_val}s", change),
+        ProofResult(name, solver, status, f"{cur_val}s", f"{base_val}s", change),
         is_regression,
     )
 
 
 def compute_total_runtime(data):
-    """Compute total runtime from proof results."""
+    """Compute total runtime from proof results, ignoring failed/omitted/inconclusive."""
     if not data:
         return None
     return sum(
-        r["value"] for r in data.get("runtimes", []) if r.get("status") != "failed"
+        r["value"]
+        for r in data.get("runtimes", [])
+        if r.get("status") not in ("failed", "omitted", "inconclusive")
+        and "value" in r
     )
 
 
 def build_comment(current, baseline, cfg):
     """Build the PR comment markdown."""
-    baseline_runtimes = {r["name"]: r for r in (baseline or {}).get("runtimes", [])}
+    baseline_runtimes = {
+        (r["name"], r.get("solver", LEGACY_DEFAULT_SOLVER)): r
+        for r in (baseline or {}).get("runtimes", [])
+    }
     alerts, all_rows = [], []
 
     for r in current.get("runtimes", []):
@@ -136,8 +186,8 @@ def build_comment(current, baseline, cfg):
 
     def sort_key(r):
         if r.current == "-":
-            return -1
-        return -int(r.current.rstrip("s"))
+            return (-1, r.name, r.solver)
+        return (-int(r.current.rstrip("s")), r.name, r.solver)
 
     all_rows.sort(key=sort_key)
 
@@ -153,6 +203,7 @@ def sort_key(r):
         total_status = OK
     total_row = ProofResult(
         "**TOTAL**",
+        "-",
         total_status,
         f"{cur_total}s",
         f"{base_total}s" if base_total else "-",

@@ -24,9 +24,9 @@ jobs:
     uses: ./.github/workflows/ci_ec2_reusable.yml
     with:
       name: CBMC (ML-DSA-${{ matrix.parameter_set }}${{ matrix.reduce_ram && ', REDUCE_RAM' || '' }})
-      ec2_instance_type: c7g.4xlarge
+      ec2_instance_type: r7g.4xlarge
       ec2_ami: ubuntu-latest (aarch64)
-      ec2_volume_size: 20
+      ec2_volume_size: 64
       compile_mode: native
       opt: no_opt
       lint: false

@@ -25,7 +25,7 @@
           pkgs-unstable = inputs.nixpkgs-unstable.legacyPackages.${system};
           pkgs-2405 = inputs.nixpkgs-2405.legacyPackages.${system};
           util = pkgs.callPackage ./nix/util.nix {
-            inherit (pkgs) bitwuzla z3;
+            inherit (pkgs) bitwuzla cvc5 z3;
             inherit (pkgs-unstable) cbmc;
             # TODO: switch back to stable python3 for slothy once ortools is fixed in 25.11
             python3-for-slothy = pkgs-unstable.python3;
@@ -240,7 +240,7 @@
             pkgs-unstable = inputs.nixpkgs-unstable.legacyPackages.x86_64-linux;
             util = pkgs.callPackage ./nix/util.nix {
               inherit pkgs;
-              inherit (pkgs) bitwuzla z3;
+              inherit (pkgs) bitwuzla cvc5 z3;
               inherit (pkgs-unstable) cbmc;
               # TODO: switch back to stable python3 for slothy once ortools is fixed in 25.11
               python3-for-slothy = pkgs-unstable.python3;

@@ -6,6 +6,7 @@
 , fetchFromGitHub
 , callPackage
 , bitwuzla
+, cvc5
 , ninja
 , z3
 }:
@@ -37,6 +38,7 @@ buildEnv {
 
       inherit
         bitwuzla# 0.8.2
+        cvc5# 1.3.2
         ninja; # 1.13.2
     };
 }
@@ -2,7 +2,7 @@
 # Copyright (c) The mldsa-native project authors
 # SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
 
-{ pkgs, cbmc, bitwuzla, z3, python3-for-slothy }:
+{ pkgs, cbmc, bitwuzla, cvc5, z3, python3-for-slothy }:
 rec {
   glibc-join = p: p.buildPackages.symlinkJoin {
     name = "glibc-join";
@@ -97,7 +97,7 @@ rec {
   };
 
   cbmc_pkgs = pkgs.callPackage ./cbmc {
-    inherit cbmc bitwuzla z3;
+    inherit cbmc bitwuzla cvc5 z3;
   };
 
   valgrind_varlat = pkgs.callPackage ./valgrind { };

@@ -28,9 +28,25 @@ USE_FUNCTION_CONTRACTS+=mld_shake256_release
 APPLY_LOOP_CONTRACTS=on
 USE_DYNAMIC_FRAMES=1
 
-# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
-EXTERNAL_SAT_SOLVER=
-CBMCFLAGS=--smt2
+# To add or override per-solver entries in the solver matrix, set the
+# variables below for each solver. {SOLVER} is one of the canonical
+# solver identifiers (Z3, BITWUZLA, CVC5), declared in Makefile.common as
+# CBMC_SOLVERS_ALL.
+#
+#     CBMC_SOLVER_{SOLVER}_ENABLED ?= 0/1
+#     CBMC_SOLVER_{SOLVER}_FLAGS   ?= <extra CBMCFLAGS for {SOLVER}>
+#     CBMC_SOLVER_{SOLVER}_EXT_SAT ?= <EXTERNAL_SAT_SOLVER for {SOLVER}>
+#
+# Solvers default to enabled; disable a solver explicitly by setting
+# CBMC_SOLVER_{SOLVER}_ENABLED = 0. The per-harness CBMC_DEFAULT_SOLVER
+# selects which enabled solver is used when the harness is invoked
+# directly (`make report` from this directory). The driver
+# run-cbmc-proofs.py overrides CBMC_SOLVER on the command line to fan
+# out across all enabled solvers.
+#
+# Solver matrix
+CBMC_DEFAULT_SOLVER          = Z3
+CBMC_SOLVER_Z3_ENABLED       = 1
 
 FUNCTION_NAME = mld_h