Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions scripts/node_suite_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,45 @@

Usage: node_suite_run.py <perry-bin> <repo-root> [comma-separated-modules]
"""
import os, subprocess, sys, tempfile
import os, re, subprocess, sys, tempfile
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict

# Environment-variant tokens that defeat byte-for-byte stdout comparison without
# indicating any Perry defect. Scrubbed SYMMETRICALLY from both node and perry
# output before the equality check (see normalize()).
#
# 1. console.time/timeEnd/timeLog durations. Node's formatTime renders a live
# hrtime delta as the raw float `${ms}ms` (or `${s}s`, or `m:ss.mmm`), so the
# number is non-deterministic run-to-run AND has a variable decimal count —
# node itself prints e.g. 0.004ms / 0.004ms / 0.003ms, and 2.59ms / 2.68ms,
# across runs of the same program. The duration always follows the timer
# label as `<label>: <dur>`, so we anchor on `: ` (a lookbehind) and mask the
# numeric value while KEEPING the unit — a dropped `ms`, a wrong label, or a
# missing trailing arg still surfaces as a diff, and the `: ` anchor keeps
# ordinary numbers elsewhere in the output untouched.
# 2. Stack-trace frames. console.trace, thrown-error inspection, and `[cause]`
# blocks print ` at <path>:<line>:<col>` plus node-internal ESM-loader
# frames Perry cannot reproduce (and Perry's own native-symbol /
# `(… N more identical frames)` placeholders). Paths and line numbers vary by
# machine, so whole frame lines are dropped. Frames are always indented >=4
# spaces, which keeps ordinary 2-space-per-level inspect output untouched.
_DUR_MS = re.compile(r"(?<=: )\d+(?:\.\d+)?(ms|s)\b")
_DUR_CLOCK = re.compile(r"\b\d+:\d{2}\.\d{3} \((h:mm|m):ss\.mmm\)")
_FRAME = re.compile(r"^\s{4,}(at\s|\d+:\s)|… \d+ more identical frames")


def normalize(text: str) -> str:
out = []
for line in text.split("\n"):
if _FRAME.search(line):
continue
line = _DUR_MS.sub(lambda m: "<dur>" + m.group(1), line)
line = _DUR_CLOCK.sub(lambda m: "<dur:" + m.group(1) + ">", line)
out.append(line)
return "\n".join(out)


PERRY = sys.argv[1]
ROOT = sys.argv[2]
MODS = sys.argv[3].split(",") if len(sys.argv) > 3 and sys.argv[3] else None
Expand Down Expand Up @@ -72,7 +107,7 @@ def run_one(args):
# Match stdout byte-for-byte (ignore only trailing-newline noise, not leading
# whitespace) AND exit code — so a Perry crash that happened to print matching
# output before dying is a diff, not a false pass.
ok = (n.stdout.rstrip("\n") == p.stdout.rstrip("\n")) and (n.returncode == p.returncode)
ok = (normalize(n.stdout.rstrip("\n")) == normalize(p.stdout.rstrip("\n"))) and (n.returncode == p.returncode)
return (mod, "pass" if ok else "diff")


Expand Down
6 changes: 3 additions & 3 deletions test-parity/node_suite_baseline.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
"_schema": {
"description": "Floor baseline for scripts/node_suite_regression_check.py. Each module's run must produce pass >= floor.pass; dropping below is a regression (exit 1). Improvements are always accepted and reported as ratchet candidates. Captured in the node-26 environment with scripts/node_suite_run.py (pre-warm + fast/slow lanes).",
"oracle": "node v26.3.0 on Linux (the box)",
"note": "Deterministic modules are floored at full pass. Timing/racy modules (http2, net, stream, diagnostics_channel, fs-promises) carry a small margin below observed pass so ordinary flake does not false-alarm; the guard still catches real regressions, which are large (e.g. dns 6->0, http 19->9)."
"note": "Deterministic modules are floored at full pass. Timing/racy modules (http2, net, stream, diagnostics_channel, fs-promises) carry a small margin below observed pass so ordinary flake does not false-alarm; the guard still catches real regressions, which are large (e.g. dns 6->0, http 19->9). node_suite_run.normalize() scrubs environment-variant tokens (console.time hrtime durations, stack-trace frame lines) symmetrically before the stdout compare, so console is floored at full pass (119) on its deterministic content."
},
"overall": { "pass": 2781, "total": 2863, "pct": 97.1 },
"overall": { "pass": 2792, "total": 2863, "pct": 97.5 },
"modules": {
"assert": { "pass": 70, "total": 70 },
"async_hooks": { "pass": 5, "total": 5 },
"bigint": { "pass": 3, "total": 3 },
"buffer": { "pass": 134, "total": 134 },
"child_process": { "pass": 26, "total": 26 },
"cluster": { "pass": 1, "total": 1 },
"console": { "pass": 108, "total": 119 },
"console": { "pass": 119, "total": 119 },
"constants": { "pass": 4, "total": 4 },
"crypto": { "pass": 240, "total": 242 },
"dgram": { "pass": 4, "total": 4 },
Expand Down
Loading