Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 65 additions & 14 deletions .github/actions/cbmc/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@
FAIL = "❌"

ProofResult = namedtuple(
"ProofResult", ["name", "status", "current", "previous", "change"]
"ProofResult", ["name", "solver", "status", "current", "previous", "change"]
)

# Solver assigned to baseline runtime entries that lack a `solver` field.
# Older baseline JSONs predate multi-solver support and mostly ran under Z3.
LEGACY_DEFAULT_SOLVER = "Z3"


def get_args():
parser = argparse.ArgumentParser(description="CBMC proof results reporter")
Expand Down Expand Up @@ -78,11 +82,11 @@ def fetch_baseline(cfg):
def render_table(rows):
"""Render a markdown table from ProofResult rows."""
lines = [
"| Proof | Status | Current | Previous | Change |",
"|-------|--------|---------|----------|--------|",
"| Proof | Solver | Status | Current | Previous | Change |",
"|-------|--------|--------|---------|----------|--------|",
]
lines.extend(
f"| `{r.name}` | {r.status} | {r.current} | {r.previous} | {r.change} |"
f"| `{r.name}` | {r.solver} | {r.status} | {r.current} | {r.previous} | {r.change} |"
for r in rows
)
return lines
Expand All @@ -91,41 +95,87 @@ def render_table(rows):
def classify_proof(r, baseline_runtimes, cfg):
"""Classify a single proof result, returning (ProofResult, is_alert)."""
name = r["name"]
base = baseline_runtimes.get(name, {})
solver = r.get("solver", LEGACY_DEFAULT_SOLVER)
base = baseline_runtimes.get((name, solver), {})
base_val, base_failed = base.get("value"), base.get("status") == "failed"
base_omitted = base.get("status") == "omitted"
base_inconclusive = base.get("status") == "inconclusive"

# Solver could not decide -- not a real failure, not a regression.
if r.get("status") == "inconclusive":
prev = (
f"{base_val}s" if base_val
else "failed" if base_failed
else "inconclusive" if base_inconclusive
else "omitted" if base_omitted
else "-"
)
# Was passing in the baseline, now inconclusive: surface as a warning.
if base_val is not None and not base_failed:
return (
ProofResult(name, solver, WARN, "?", prev, "inconclusive"),
True,
)
return ProofResult(name, solver, OK, "?", prev, "inconclusive"), False

# Pair was intentionally not run.
if r.get("status") == "omitted":
# Was passing in the baseline, now omitted: surface as a warning.
if base_val is not None and not base_failed:
return (
ProofResult(name, solver, WARN, "-", f"{base_val}s", "omitted"),
True,
)
return ProofResult(name, solver, OK, "-", "-", "omitted"), False

if r.get("status") == "failed":
prev = "failed" if base_failed else (f"{base_val}s" if base_val else "-")
return ProofResult(name, FAIL, "-", prev, "-"), True
if base_omitted:
prev = "omitted"
return ProofResult(name, solver, FAIL, "-", prev, "-"), True

cur_val = r["value"]
if base_failed:
return ProofResult(name, OK, f"{cur_val}s", "failed", "fixed"), False
return (
ProofResult(name, solver, OK, f"{cur_val}s", "failed", "fixed"),
False,
)
if base_omitted:
return (
ProofResult(name, solver, OK, f"{cur_val}s", "omitted", "new"),
False,
)
if base_val is None:
return ProofResult(name, OK, f"{cur_val}s", "-", "new"), False
return ProofResult(name, solver, OK, f"{cur_val}s", "-", "new"), False

ratio = cur_val / base_val if base_val > 0 else 1
change = f"{(ratio - 1) * 100:+.0f}%" if base_val > 0 else "-"
is_regression = cur_val >= cfg.min_runtime and ratio >= cfg.regression_threshold
status = WARN if is_regression else OK
return (
ProofResult(name, status, f"{cur_val}s", f"{base_val}s", change),
ProofResult(name, solver, status, f"{cur_val}s", f"{base_val}s", change),
is_regression,
)


def compute_total_runtime(data):
"""Compute total runtime from proof results."""
"""Compute total runtime from proof results, ignoring failed/omitted/inconclusive."""
if not data:
return None
return sum(
r["value"] for r in data.get("runtimes", []) if r.get("status") != "failed"
r["value"]
for r in data.get("runtimes", [])
if r.get("status") not in ("failed", "omitted", "inconclusive")
and "value" in r
)


def build_comment(current, baseline, cfg):
"""Build the PR comment markdown."""
baseline_runtimes = {r["name"]: r for r in (baseline or {}).get("runtimes", [])}
baseline_runtimes = {
(r["name"], r.get("solver", LEGACY_DEFAULT_SOLVER)): r
for r in (baseline or {}).get("runtimes", [])
}
alerts, all_rows = [], []

for r in current.get("runtimes", []):
Expand All @@ -136,8 +186,8 @@ def build_comment(current, baseline, cfg):

def sort_key(r):
if r.current == "-":
return -1
return -int(r.current.rstrip("s"))
return (-1, r.name, r.solver)
return (-int(r.current.rstrip("s")), r.name, r.solver)

all_rows.sort(key=sort_key)

Expand All @@ -153,6 +203,7 @@ def sort_key(r):
total_status = OK
total_row = ProofResult(
"**TOTAL**",
"-",
total_status,
f"{cur_total}s",
f"{base_total}s" if base_total else "-",
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/cbmc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ jobs:
uses: ./.github/workflows/ci_ec2_reusable.yml
with:
name: CBMC (ML-DSA-${{ matrix.parameter_set }}${{ matrix.reduce_ram && ', REDUCE_RAM' || '' }})
ec2_instance_type: c7g.4xlarge
ec2_instance_type: r7g.4xlarge
ec2_ami: ubuntu-latest (aarch64)
ec2_volume_size: 20
ec2_volume_size: 64
compile_mode: native
opt: no_opt
lint: false
Expand Down
4 changes: 2 additions & 2 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
pkgs-unstable = inputs.nixpkgs-unstable.legacyPackages.${system};
pkgs-2405 = inputs.nixpkgs-2405.legacyPackages.${system};
util = pkgs.callPackage ./nix/util.nix {
inherit (pkgs) bitwuzla z3;
inherit (pkgs) bitwuzla cvc5 z3;
inherit (pkgs-unstable) cbmc;
# TODO: switch back to stable python3 for slothy once ortools is fixed in 25.11
python3-for-slothy = pkgs-unstable.python3;
Expand Down Expand Up @@ -240,7 +240,7 @@
pkgs-unstable = inputs.nixpkgs-unstable.legacyPackages.x86_64-linux;
util = pkgs.callPackage ./nix/util.nix {
inherit pkgs;
inherit (pkgs) bitwuzla z3;
inherit (pkgs) bitwuzla cvc5 z3;
inherit (pkgs-unstable) cbmc;
# TODO: switch back to stable python3 for slothy once ortools is fixed in 25.11
python3-for-slothy = pkgs-unstable.python3;
Expand Down
2 changes: 2 additions & 0 deletions nix/cbmc/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
, fetchFromGitHub
, callPackage
, bitwuzla
, cvc5
, ninja
, z3
}:
Expand Down Expand Up @@ -37,6 +38,7 @@ buildEnv {

inherit
bitwuzla# 0.8.2
cvc5# 1.3.2
ninja; # 1.13.2
};
}
4 changes: 2 additions & 2 deletions nix/util.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright (c) The mldsa-native project authors
# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT

{ pkgs, cbmc, bitwuzla, z3, python3-for-slothy }:
{ pkgs, cbmc, bitwuzla, cvc5, z3, python3-for-slothy }:
rec {
glibc-join = p: p.buildPackages.symlinkJoin {
name = "glibc-join";
Expand Down Expand Up @@ -97,7 +97,7 @@ rec {
};

cbmc_pkgs = pkgs.callPackage ./cbmc {
inherit cbmc bitwuzla z3;
inherit cbmc bitwuzla cvc5 z3;
};

valgrind_varlat = pkgs.callPackage ./valgrind { };
Expand Down
22 changes: 19 additions & 3 deletions proofs/cbmc/H/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,25 @@ USE_FUNCTION_CONTRACTS+=mld_shake256_release
APPLY_LOOP_CONTRACTS=on
USE_DYNAMIC_FRAMES=1

# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
EXTERNAL_SAT_SOLVER=
CBMCFLAGS=--smt2
# To add or override per-solver entries in the solver matrix, set the
# variables below for each solver. {SOLVER} is one of the canonical
# solver identifiers (Z3, BITWUZLA, CVC5), declared in Makefile.common as
# CBMC_SOLVERS_ALL.
#
# CBMC_SOLVER_{SOLVER}_ENABLED ?= 0/1
# CBMC_SOLVER_{SOLVER}_FLAGS ?= <extra CBMCFLAGS for {SOLVER}>
# CBMC_SOLVER_{SOLVER}_EXT_SAT ?= <EXTERNAL_SAT_SOLVER for {SOLVER}>
#
# Solvers default to enabled; disable a solver explicitly by setting
# CBMC_SOLVER_{SOLVER}_ENABLED = 0. The per-harness CBMC_DEFAULT_SOLVER
# selects which enabled solver is used when the harness is invoked
# directly (`make report` from this directory). The driver
# run-cbmc-proofs.py overrides CBMC_SOLVER on the command line to fan
# out across all enabled solvers.
#
# Solver matrix
CBMC_DEFAULT_SOLVER = Z3
CBMC_SOLVER_Z3_ENABLED = 1

FUNCTION_NAME = mld_h

Expand Down
Loading
Loading