From de72d52b0d69193b37a821737a84ae866956d95f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Wed, 10 Jun 2026 08:09:01 +0200 Subject: [PATCH] fix(codegen): provenance-based transitive disqualification for integer locals (#4785 bug class) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The integer-locals analysis now records WHY each candidate is believed integer (its init and every LocalSet rhs, with the local ids each verdict relied on) and prunes transitively via a reverse-dependency worklist when any source is disqualified — no admission path (seed, propagation, clamp_fn_ids, flat_const_ids) can escape. Holes closed (clamp + written-init are live miscompiles on main): - clamp3-shaped calls admitted unconditionally even though clamp3 returns an ARGUMENT verbatim; now argument-dependent with recorded deps - candidates with LocalSet writes were never re-validated through their init (let b = a; use(b); b = 1 kept a stale i32 slot) - Expr::Update was unconditionally int-producing - detect_clamp_u8 never verified its 'return v | 0' statement - lower_call clamp3 intrinsification fptosi'd unproven args at every call site (clamp3(2.5, 0, 5) returned 2); now gated on can_lower_expr_as_i32 - i64 whole-function specialization replaced clamp-shaped bodies with an unconditional fptosi wrapper; clamp shapes are now excluded --- crates/perry-codegen/src/codegen/closure.rs | 1 + crates/perry-codegen/src/codegen/entry.rs | 2 + crates/perry-codegen/src/codegen/function.rs | 1 + crates/perry-codegen/src/codegen/method.rs | 2 + crates/perry-codegen/src/codegen/mod.rs | 13 +- .../src/collectors/clamp_detect.rs | 10 +- .../perry-codegen/src/collectors/hir_facts.rs | 162 ++++- .../src/collectors/i32_locals.rs | 21 - .../src/collectors/integer_locals.rs | 552 +++++++++++------- crates/perry-codegen/src/collectors/mod.rs | 6 +- .../perry-codegen/src/lower_call/func_ref.rs | 59 +- .../perry/tests/integer_locals_provenance.rs | 197 +++++++ 12 files changed, 776 insertions(+), 250 deletions(-) create mode 100644 crates/perry/tests/integer_locals_provenance.rs diff --git a/crates/perry-codegen/src/codegen/closure.rs b/crates/perry-codegen/src/codegen/closure.rs index ac89dc717b..20750046ed 100644 --- a/crates/perry-codegen/src/codegen/closure.rs +++ b/crates/perry-codegen/src/codegen/closure.rs @@ -233,6 +233,7 @@ pub(super) fn compile_closure( body, &flat_const_ids, &clamp_fn_ids, + &cross_module.clamp3_functions, &closure_boxed_vars, module_globals, classes, diff --git a/crates/perry-codegen/src/codegen/entry.rs b/crates/perry-codegen/src/codegen/entry.rs index b40b1d371d..8327154f89 100644 --- a/crates/perry-codegen/src/codegen/entry.rs +++ b/crates/perry-codegen/src/codegen/entry.rs @@ -238,6 +238,7 @@ pub(super) fn compile_module_entry( &hir.init, &flat_const_ids, &clamp_fn_ids, + &cross_module.clamp3_functions, &main_boxed_vars, module_globals, classes, @@ -668,6 +669,7 @@ pub(super) fn compile_module_entry( &hir.init, &flat_const_ids, &clamp_fn_ids, + &cross_module.clamp3_functions, &init_boxed_vars, module_globals, classes, diff --git a/crates/perry-codegen/src/codegen/function.rs b/crates/perry-codegen/src/codegen/function.rs index b949797d90..92bab39572 100644 --- a/crates/perry-codegen/src/codegen/function.rs +++ b/crates/perry-codegen/src/codegen/function.rs @@ -142,6 +142,7 @@ pub(super) fn compile_function( &f.body, &flat_const_ids, &clamp_fn_ids, + &cross_module.clamp3_functions, &boxed_vars, module_globals, classes, diff --git a/crates/perry-codegen/src/codegen/method.rs b/crates/perry-codegen/src/codegen/method.rs index 7bd21ee9e8..cdc208bc62 100644 --- a/crates/perry-codegen/src/codegen/method.rs +++ b/crates/perry-codegen/src/codegen/method.rs @@ -126,6 +126,7 @@ pub(super) fn compile_method( &method.body, &flat_const_ids, &clamp_fn_ids, + &cross_module.clamp3_functions, &method_boxed_vars, module_globals, classes, @@ -616,6 +617,7 @@ pub(super) fn compile_static_method( &f.body, &flat_const_ids, &clamp_fn_ids, + &cross_module.clamp3_functions, &static_boxed_vars, module_globals, classes, diff --git a/crates/perry-codegen/src/codegen/mod.rs b/crates/perry-codegen/src/codegen/mod.rs index 9750d3bf91..6d00d535ce 100644 --- a/crates/perry-codegen/src/codegen/mod.rs +++ b/crates/perry-codegen/src/codegen/mod.rs @@ -2153,7 +2153,18 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result> } walks(s, &module_globals) }); - if crate::collectors::is_integer_specializable(f) && !uses_module_globals { + // Skip clamp-shaped functions: their FuncRef call sites with provably + // i32 arguments are intrinsified to smax/smin and never call this + // symbol, so the only remaining callers are exactly the ones whose + // arguments are NOT integers (fractional doubles, NaN-boxed pointers) + // — and clamp3 returns an argument verbatim, so the wrapper's + // unconditional `fptosi` miscompiles every one of them (#4785 bug + // class: `(number).method is not a function`). Those callers need + // the real f64 body. + let is_clamp_shape = + crate::collectors::detect_clamp3(f).is_some() || crate::collectors::detect_clamp_u8(f); + if crate::collectors::is_integer_specializable(f) && !uses_module_globals && !is_clamp_shape + { if let Some(llvm_name) = func_names.get(&f.id) { let i64_name = format!("{}_i64", llvm_name); crate::collectors::emit_i64_function(&mut llmod, f, &i64_name); diff --git a/crates/perry-codegen/src/collectors/clamp_detect.rs b/crates/perry-codegen/src/collectors/clamp_detect.rs index 19b1e4efdc..752f147135 100644 --- a/crates/perry-codegen/src/collectors/clamp_detect.rs +++ b/crates/perry-codegen/src/collectors/clamp_detect.rs @@ -76,6 +76,13 @@ pub fn detect_clamp3(f: &Function) -> Option<(u32, u32, u32)> { } /// Detect a 1-param clampU8 pattern: `if (v < 0) return 0; if (v > 255) return 255; return v|0;` +/// +/// The third statement must actually coerce (`v | 0`, another bitwise op, or +/// an integer literal). A body ending in bare `return v;` passes a fractional +/// in-range `v` through unchanged, so treating its callers' results as +/// int-producing (clamp_u8_functions feeds `clamp_fn_ids` as an +/// argument-INdependent admission in `collect_integer_locals`) would put a +/// truncating i32 shadow slot on a non-integer value. pub fn detect_clamp_u8(f: &Function) -> bool { if f.is_async || f.is_generator || f.params.len() != 1 { return false; @@ -136,7 +143,8 @@ pub fn detect_clamp_u8(f: &Function) -> bool { } else { return false; } - true + // [2] Return of an int-coercing expression (`v | 0`, bitwise, Integer). + matches!(&f.body[2], Stmt::Return(Some(e)) if returns_int_expr(e)) } /// A function is i64-specializable if it's a pure numeric recursive fn. diff --git a/crates/perry-codegen/src/collectors/hir_facts.rs b/crates/perry-codegen/src/collectors/hir_facts.rs index 99abac6932..88d520c0e1 100644 --- a/crates/perry-codegen/src/collectors/hir_facts.rs +++ b/crates/perry-codegen/src/collectors/hir_facts.rs @@ -138,17 +138,23 @@ impl NativeRegionFactGraph { /// Some subgraphs still delegate to established focused collectors; this /// function is the single contract used by codegen entry points so new native /// consumers do not need to rediscover facts independently. +#[allow(clippy::too_many_arguments)] pub(crate) fn collect_native_region_fact_graph( stmts: &[Stmt], flat_const_ids: &HashSet, clamp_fn_ids: &HashSet, + arg_dependent_clamp_fn_ids: &HashSet, boxed_vars: &HashSet, module_globals: &HashMap, classes: &HashMap, compile_time_constants: &HashMap, ) -> NativeRegionFactGraph { - let integer_locals = - super::integer_locals::collect_integer_locals(stmts, flat_const_ids, clamp_fn_ids); + let integer_locals = super::integer_locals::collect_integer_locals( + stmts, + flat_const_ids, + clamp_fn_ids, + arg_dependent_clamp_fn_ids, + ); let unsigned_i32_locals = super::i32_locals::collect_unsigned_i32_locals(stmts); let index_used_locals = super::index_uses::collect_index_used_locals(stmts); let strictly_i32_bounded_locals = super::i32_locals::collect_strictly_i32_bounded_locals( @@ -226,6 +232,7 @@ pub(crate) fn collect_hir_facts( flat_const_ids, clamp_fn_ids, &HashSet::new(), + &HashSet::new(), &HashMap::new(), &HashMap::new(), &HashMap::new(), @@ -458,6 +465,7 @@ mod tests { &HashSet::new(), &pure_helpers, &HashSet::new(), + &HashSet::new(), &HashMap::new(), &HashMap::new(), &constants, @@ -490,6 +498,7 @@ mod tests { &HashSet::new(), &HashSet::new(), &HashSet::new(), + &HashSet::new(), &HashMap::new(), &HashMap::new(), &HashMap::new(), @@ -538,6 +547,7 @@ mod tests { &stmts, &HashSet::new(), &HashSet::new(), + &HashSet::new(), ); assert!( @@ -575,6 +585,7 @@ mod tests { &stmts, &HashSet::new(), &HashSet::new(), + &HashSet::new(), ); assert!(ints.contains(&1), "live |0 accumulator must stay integer"); @@ -612,6 +623,7 @@ mod tests { &stmts, &HashSet::new(), &HashSet::new(), + &HashSet::new(), ); assert!( @@ -627,4 +639,150 @@ mod tests { "const copy of the mutable param binding must not be integer" ); } + + // Provenance hole #1 (the clamp_fn_ids bypass): a local admitted via a + // clamp3-shaped call must be pruned when an *argument* of that call is + // disqualified — clamp3 returns one of its arguments verbatim, so the + // result is only an integer if the arguments are. Previously + // `is_int32_producing_expr` accepted any clamp call unconditionally, so + // the candidate kept its i32 slot forever. + #[test] + fn clamp_admitted_local_is_pruned_when_arg_source_is_disqualified() { + let clamp_call = |arg: Expr| Expr::Call { + callee: Box::new(Expr::FuncRef(7)), + args: vec![arg, Expr::Integer(0), Expr::Integer(100)], + type_args: vec![], + }; + // let src = undefined; src = obj.value; (disqualified seed) + // const xx = clamp3(src, 0, 100); (clamp-admitted) + // const yy = xx; (downstream copy) + let stmts = vec![ + mutable_number_let(1, Expr::Undefined), + Stmt::Expr(Expr::LocalSet( + 1, + Box::new(Expr::PropertyGet { + object: Box::new(Expr::LocalGet(99)), + property: "value".to_string(), + }), + )), + const_let(2, clamp_call(Expr::LocalGet(1))), + const_let(3, Expr::LocalGet(2)), + ]; + let clamp_ids: HashSet = [7].into_iter().collect(); + + let ints = super::super::integer_locals::collect_integer_locals( + &stmts, + &HashSet::new(), + &clamp_ids, + &clamp_ids, + ); + assert!(!ints.contains(&1), "non-int-written seed must be pruned"); + assert!( + !ints.contains(&2), + "clamp3-admitted local must follow its disqualified argument" + ); + assert!( + !ints.contains(&3), + "copy of the clamp3-admitted local must be pruned transitively" + ); + + // Same shape with integer-stable arguments keeps the optimization. + let ok_stmts = vec![ + mutable_number_let(1, Expr::Integer(5)), + const_let(2, clamp_call(Expr::LocalGet(1))), + const_let(3, Expr::LocalGet(2)), + ]; + let ints = super::super::integer_locals::collect_integer_locals( + &ok_stmts, + &HashSet::new(), + &clamp_ids, + &clamp_ids, + ); + assert!(ints.contains(&2), "int-arg clamp3 result must stay integer"); + assert!(ints.contains(&3), "copy of live clamp3 result must stay"); + + // Argument-INdependent clamp functions (clampU8 / returns_integer — + // they coerce internally) must keep admitting double-valued args. + let coercing_stmts = vec![const_let(2, clamp_call(Expr::LocalGet(98)))]; + let ints = super::super::integer_locals::collect_integer_locals( + &coercing_stmts, + &HashSet::new(), + &clamp_ids, + &HashSet::new(), + ); + assert!( + ints.contains(&2), + "internally-coercing clamp result must stay integer regardless of args" + ); + } + + // Provenance hole #2 (init bypass on written locals): a candidate WITH + // `LocalSet` writes was never re-validated through its init, so + // `let b = a; …use b…; b = 1` kept b integer after `a` was disqualified + // — reads between the init and the int write saw a truncated pointer. + #[test] + fn written_local_is_still_revalidated_through_its_init() { + // let a = undefined; a = obj.value; (disqualified seed) + // let b = a; (init copies disqualified a) + // b = 1; (later int write) + let stmts = vec![ + mutable_number_let(1, Expr::Undefined), + Stmt::Expr(Expr::LocalSet( + 1, + Box::new(Expr::PropertyGet { + object: Box::new(Expr::LocalGet(99)), + property: "value".to_string(), + }), + )), + mutable_number_let(2, Expr::LocalGet(1)), + Stmt::Expr(Expr::LocalSet(2, Box::new(Expr::Integer(1)))), + ]; + + let ints = super::super::integer_locals::collect_integer_locals( + &stmts, + &HashSet::new(), + &HashSet::new(), + &HashSet::new(), + ); + assert!( + !ints.contains(&2), + "a written local whose init copies a disqualified source must be pruned" + ); + } + + // Provenance hole #3 (Update bypass): `const y = x++` was unconditionally + // int-producing even when `x` never was (or stopped being) an integer. + #[test] + fn update_admitted_local_follows_its_target() { + // let x = undefined; x = obj.value; const y = x++; + let stmts = vec![ + mutable_number_let(1, Expr::Undefined), + Stmt::Expr(Expr::LocalSet( + 1, + Box::new(Expr::PropertyGet { + object: Box::new(Expr::LocalGet(99)), + property: "value".to_string(), + }), + )), + const_let( + 2, + Expr::Update { + id: 1, + op: perry_hir::UpdateOp::Increment, + prefix: false, + }, + ), + ]; + + let ints = super::super::integer_locals::collect_integer_locals( + &stmts, + &HashSet::new(), + &HashSet::new(), + &HashSet::new(), + ); + assert!( + !ints.contains(&2), + "`x++` over a disqualified local must not stay integer" + ); + } } diff --git a/crates/perry-codegen/src/collectors/i32_locals.rs b/crates/perry-codegen/src/collectors/i32_locals.rs index 12f00914f6..b3ce669b9f 100644 --- a/crates/perry-codegen/src/collectors/i32_locals.rs +++ b/crates/perry-codegen/src/collectors/i32_locals.rs @@ -777,27 +777,6 @@ pub fn collect_integer_let_ids( /// `collect_ref_ids_in_expr`: any new HIR Expr variant must recurse into its /// sub-expressions here, or the walker may miss a LocalSet hidden inside it /// and wrongly mark its target as integer-valued. -/// Walks the HIR and records LocalIds that have at least one LocalSet whose -/// rhs is NOT int32-producing. `collect_integer_locals` uses this to remove -/// locals that lose their integer invariant somewhere in the function. -pub fn collect_non_int_localset_ids_in_stmts( - stmts: &[perry_hir::Stmt], - out: &mut HashSet, - known_int_locals: &HashSet, - flat_const_ids: &HashSet, - flat_row_alias_ids: &HashSet, - clamp_fn_ids: &HashSet, -) { - collect_localset_ids_in_stmts_filtered( - stmts, - out, - Some(known_int_locals), - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); -} - pub fn collect_localset_ids_in_stmts(stmts: &[perry_hir::Stmt], out: &mut HashSet) { let empty = HashSet::new(); collect_localset_ids_in_stmts_filtered(stmts, out, None, &empty, &empty, &empty); diff --git a/crates/perry-codegen/src/collectors/integer_locals.rs b/crates/perry-codegen/src/collectors/integer_locals.rs index 61bc3513c7..08d601cc15 100644 --- a/crates/perry-codegen/src/collectors/integer_locals.rs +++ b/crates/perry-codegen/src/collectors/integer_locals.rs @@ -1,5 +1,49 @@ -use perry_hir::{BinaryOp, Expr, Function, Stmt}; -use std::collections::HashSet; +//! Integer-local analysis: which locals are provably integer-valued, making +//! them eligible for an i32 shadow slot (see `needs_i32_slot` in +//! `stmt/let_stmt.rs`). +//! +//! ## Soundness invariant (regression #4785 and its bug class) +//! +//! A wrong i32 slot is a miscompile: the f64 slot holds a NaN-boxed pointer, +//! the i32 read does `fptosi` on the NaN and produces `i32::MIN`, and user +//! code crashes with `(number).method is not a function`. Losing a slot on a +//! rare pattern is only a missed optimization. The analysis is therefore +//! structured so that **no admission path — syntactic seed, forward +//! propagation, clamp-call admission, or flat-const admission — can escape +//! transitive disqualification**: +//! +//! 1. Admission runs optimistically to a fixed point +//! (`collect_integer_let_ids` + `collect_extra_integer_let_ids`). +//! 2. Every candidate is then re-judged through ALL of its defining +//! expressions — its `Let` init (the lone exemption is the optimistic +//! `let x = undefined`-with-later-writes scaffolding seed, whose real +//! values are its writes) and every `LocalSet` rhs targeting it. The +//! judgment (`int32_producing_deps`) records *provenance*: the exact set +//! of other locals whose candidate-ness the verdict relied on. +//! 3. Any failed judgment disqualifies the local, and disqualification +//! propagates transitively through the recorded reverse-dependency edges +//! with a worklist — any number of hops, regardless of how the dependent +//! was admitted. Locals written inside closures are disqualified +//! unconditionally. +//! +//! `int32_producing_deps` is deliberately stricter than the admission-side +//! `is_int32_producing_expr` in two places where the latter is optimistic: +//! `Expr::Update` requires the updated local to itself be a candidate, and a +//! call to an *argument-dependent* clamp function (`clamp3`-shaped functions +//! return one of their arguments verbatim) requires every argument to be +//! int-producing. Anything admission accepted that judgment rejects is simply +//! pruned. +//! +//! Scoping notes: `clamp_fn_ids` are *function* ids (module-global, no +//! per-function contamination). `flat_const_ids` are module-init local ids of +//! never-mutated `const` int matrices; those facts cannot change during this +//! per-function analysis, so flat-const admissions carry no local deps. (HIR +//! local ids are scope-local, so a function-local id can in principle collide +//! with a module-init flat-const id — that exposure is shared with codegen's +//! `flat_const_arrays` fast-path lowering and is out of scope here.) + +use perry_hir::{BinaryOp, Expr, Stmt}; +use std::collections::{HashMap, HashSet}; use super::*; @@ -7,6 +51,7 @@ pub fn collect_integer_locals( stmts: &[perry_hir::Stmt], flat_const_ids: &HashSet, clamp_fn_ids: &HashSet, + arg_dependent_clamp_fn_ids: &HashSet, ) -> HashSet { let mut candidates: HashSet = HashSet::new(); @@ -55,228 +100,317 @@ pub fn collect_integer_locals( } } - // Iterate to a fixed point (issue #49): `is_int32_producing_expr` now - // recognizes `LocalGet(id)` as int-producing when `id` is itself - // int-stable, and `Add/Sub/Mul` as int-producing when both operands - // are. That makes the analysis mutually recursive across locals — - // disqualifying one candidate may cascade to other candidates whose - // rhs referenced the first via LocalGet. Iterate until the set - // stabilizes. - // Locals that are ever the target of a `LocalSet` are governed by the - // write-based disqualifier above; locals that are NOT (their `Let` init is - // their sole definition — every `const`, plus never-reassigned `let`s such - // as the mutable bindings the *parameter* destructuring path emits) must be - // re-validated through that init when their source leaves the set. + // Provenance-based disqualification (see module comment). One walk + // judges every candidate's defining expressions against the optimistic + // set, recording which other candidates each verdict relied on; a + // worklist then propagates removals through those reverse-dependency + // edges to a fixed point. The judgment is monotone in the candidate set + // (it only relies *positively* on membership), so judging once against + // the optimistic set and pruning transitively is exact — no repeated + // full rescans of the function body per disqualification. let mut localset_written: HashSet = HashSet::new(); collect_localset_ids_in_stmts(stmts, &mut localset_written); - loop { - let mut disqualified: HashSet = HashSet::new(); - collect_non_int_localset_ids_in_stmts( - stmts, - &mut disqualified, - &candidates, - flat_const_ids, - &flat_row_alias_ids, - clamp_fn_ids, - ); - // Re-validate init-only candidates against the *current* - // (shrinking) set. The forward pass above adds a `y = x` (an init-only - // Let whose init is `LocalGet(x)` / an int-producing expr) the moment - // `x` is a candidate — but the LocalSet-based disqualifier never - // revisits it, because the binding's defining init is not a `LocalSet`. - // So when `x` later leaves the set (e.g. a mutable `undefined` seed - // disqualified by a non-int write), `y` was left stranded as a stale - // integer local, and any further `z = y` copy then qualified for an i32 - // shadow slot — fptosi'ing a NaN-boxed value to i32::MIN. This is the - // destructuring-scaffolding regression: the iterator-protocol - // array-binding lowering emits `let __destruct = undefined` - // (mutable+Undefined seed), whose binding copies (`cb = cbBase`, where - // the *parameter* path even makes those bindings `mutable` with no - // reassignment) were mis-typed as integers. Pruning any init-only - // candidate whose init is no longer int-producing closes that hop. - // Locals with real `LocalSet` writes (clampIdx's `xx`) stay governed by - // those writes above. - collect_non_int_init_only_let_ids( - stmts, - &mut disqualified, - &candidates, - &localset_written, - flat_const_ids, - &flat_row_alias_ids, - clamp_fn_ids, - ); - let before = candidates.len(); - candidates.retain(|id| !disqualified.contains(id)); - if candidates.len() == before { - break; + + let mut judge = ProvenanceJudge { + candidates: &candidates, + localset_written: &localset_written, + flat_const_ids, + flat_row_alias_ids: &flat_row_alias_ids, + clamp_fn_ids, + arg_dependent_clamp_fn_ids, + dependents: HashMap::new(), + disqualified: HashSet::new(), + closure_written: HashSet::new(), + }; + judge.walk_stmts(stmts); + + let ProvenanceJudge { + dependents, + mut disqualified, + closure_written, + .. + } = judge; + // Locals written inside a closure body lose integer-ness in the + // enclosing scope unconditionally (the closure body gets its own + // analysis pass; this matches the historical unfiltered closure-body + // collection in `collect_localset_ids_in_expr_filtered`). + for id in closure_written { + if candidates.contains(&id) { + disqualified.insert(id); } } + let mut worklist: Vec = disqualified.iter().copied().collect(); + while let Some(gone) = worklist.pop() { + if let Some(dependent_ids) = dependents.get(&gone) { + for &dep in dependent_ids { + if disqualified.insert(dep) { + worklist.push(dep); + } + } + } + } + candidates.retain(|id| !disqualified.contains(id)); candidates } -/// Walk all `Stmt::Let { id, init: Some(e), .. }` whose `id` is a current -/// candidate, has NO `LocalSet` write (`!localset_written.contains(id)` — its -/// init is its sole definition), and whose init `e` is NOT -/// `is_int32_producing_expr` against the current candidate set, recording `id` -/// in `out`. Used inside `collect_integer_locals`'s disqualify fixed point to -/// prune init-only members (every `const`, plus never-reassigned `let`s like -/// the parameter-destructuring bindings) that the forward propagation added -/// from a source local which has since been disqualified — those carry no -/// `LocalSet` write for `collect_non_int_localset_ids_in_stmts` to catch, so -/// they must be re-validated through their defining init here. -#[allow(clippy::too_many_arguments)] -pub fn collect_non_int_init_only_let_ids( - stmts: &[perry_hir::Stmt], - out: &mut HashSet, - candidates: &HashSet, - localset_written: &HashSet, - flat_const_ids: &HashSet, - flat_row_alias_ids: &HashSet, - clamp_fn_ids: &HashSet, -) { - use perry_hir::Stmt; - for s in stmts { - match s { - Stmt::Let { - id, - init: Some(init), - .. - } => { - if candidates.contains(id) - && !localset_written.contains(id) - && !is_int32_producing_expr( - init, - candidates, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ) - { - out.insert(*id); - } +/// Single-pass obligation collector + judge for the disqualification phase. +/// A candidate's "obligations" are its `Let` init (unless it's the optimistic +/// `mutable`-`undefined`-with-writes scaffolding seed, whose real values are +/// the writes) and every `LocalSet` rhs targeting it. Each obligation is +/// judged with `int32_producing_deps`; a failure lands the id in +/// `disqualified`, a success records reverse-dependency edges in `dependents`. +struct ProvenanceJudge<'a> { + candidates: &'a HashSet, + localset_written: &'a HashSet, + flat_const_ids: &'a HashSet, + flat_row_alias_ids: &'a HashSet, + clamp_fn_ids: &'a HashSet, + arg_dependent_clamp_fn_ids: &'a HashSet, + /// dep local id → candidate ids whose integer verdict relied on it. + dependents: HashMap>, + /// Candidates with at least one failed obligation. + disqualified: HashSet, + /// Ids `LocalSet` anywhere inside a closure body in these stmts. + closure_written: HashSet, +} + +impl ProvenanceJudge<'_> { + fn judge_obligation(&mut self, id: u32, rhs: &Expr) { + let mut deps: HashSet = HashSet::new(); + if int32_producing_deps( + rhs, + self.candidates, + self.flat_const_ids, + self.flat_row_alias_ids, + self.clamp_fn_ids, + self.arg_dependent_clamp_fn_ids, + &mut deps, + ) { + for dep in deps { + self.dependents.entry(dep).or_default().push(id); } - Stmt::If { - then_branch, - else_branch, - .. - } => { - collect_non_int_init_only_let_ids( + } else { + self.disqualified.insert(id); + } + } + + fn walk_stmts(&mut self, stmts: &[Stmt]) { + for s in stmts { + match s { + Stmt::Let { + id, + init: Some(init), + mutable, + .. + } => { + // The `let x = undefined; …writes…` scaffolding seed is + // admitted optimistically — its init is exempt and its + // writes are the obligations. A *write-free* undefined + // init has no writes to vouch for it and must fail. + let optimistic_undefined_seed = *mutable + && matches!(init, Expr::Undefined) + && self.localset_written.contains(id); + if self.candidates.contains(id) && !optimistic_undefined_seed { + self.judge_obligation(*id, init); + } + self.walk_expr(init); + } + Stmt::Let { init: None, .. } => {} + Stmt::Expr(e) | Stmt::Throw(e) => self.walk_expr(e), + Stmt::Return(opt) => { + if let Some(e) = opt { + self.walk_expr(e); + } + } + Stmt::If { + condition, then_branch, - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); - if let Some(eb) = else_branch { - collect_non_int_init_only_let_ids( - eb, - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); + else_branch, + } => { + self.walk_expr(condition); + self.walk_stmts(then_branch); + if let Some(eb) = else_branch { + self.walk_stmts(eb); + } } - } - Stmt::For { init, body, .. } => { - if let Some(init_stmt) = init { - collect_non_int_init_only_let_ids( - std::slice::from_ref(init_stmt), - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); + Stmt::While { condition, body } | Stmt::DoWhile { body, condition } => { + self.walk_expr(condition); + self.walk_stmts(body); } - collect_non_int_init_only_let_ids( + Stmt::For { + init, + condition, + update, body, - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); - } - Stmt::While { body, .. } | Stmt::DoWhile { body, .. } => { - collect_non_int_init_only_let_ids( - body, - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); - } - Stmt::Try { - body, - catch, - finally, - } => { - collect_non_int_init_only_let_ids( + } => { + if let Some(init_stmt) = init { + self.walk_stmts(std::slice::from_ref(init_stmt)); + } + if let Some(cond) = condition { + self.walk_expr(cond); + } + if let Some(upd) = update { + self.walk_expr(upd); + } + self.walk_stmts(body); + } + Stmt::Try { body, - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); - if let Some(c) = catch { - collect_non_int_init_only_let_ids( - &c.body, - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); + catch, + finally, + } => { + self.walk_stmts(body); + if let Some(c) = catch { + self.walk_stmts(&c.body); + } + if let Some(f) = finally { + self.walk_stmts(f); + } } - if let Some(f) = finally { - collect_non_int_init_only_let_ids( - f, - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); + Stmt::Switch { + discriminant, + cases, + } => { + self.walk_expr(discriminant); + for c in cases { + if let Some(t) = &c.test { + self.walk_expr(t); + } + self.walk_stmts(&c.body); + } } + Stmt::Labeled { body, .. } => { + self.walk_stmts(std::slice::from_ref(body.as_ref())); + } + _ => {} } - Stmt::Switch { cases, .. } => { - for c in cases { - collect_non_int_init_only_let_ids( - &c.body, - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); + } + } + + fn walk_expr(&mut self, e: &Expr) { + match e { + Expr::LocalSet(id, value) => { + if self.candidates.contains(id) { + self.judge_obligation(*id, value); } + self.walk_expr(value); } - Stmt::Labeled { body, .. } => { - collect_non_int_init_only_let_ids( - std::slice::from_ref(body.as_ref()), - out, - candidates, - localset_written, - flat_const_ids, - flat_row_alias_ids, - clamp_fn_ids, - ); + Expr::Closure { body, .. } => { + collect_localset_ids_in_stmts(body, &mut self.closure_written); + // The centralized walker visits param-default exprs (but + // not the body — handled above). + perry_hir::walker::walk_expr_children(e, &mut |child| self.walk_expr(child)); + } + _ => { + perry_hir::walker::walk_expr_children(e, &mut |child| self.walk_expr(child)); + } + } + } +} + +/// Disqualification-side judgment: returns `true` if `e` is int-producing +/// given the current candidate set, accumulating into `deps` every local id +/// whose candidate-ness the verdict relied on. `collect_integer_locals` +/// prunes the judged local transitively when any of those deps is later +/// disqualified. +/// +/// Mirrors `is_int32_producing_expr` (the admission-side judgment), except +/// it is strict where admission is optimistic: +/// - `Expr::Update`: int-producing only when the updated local is itself +/// a candidate (admission says unconditionally yes; `++` on a +/// non-integer local yields whatever `ToNumber` gives — possibly a +/// fractional or NaN value). +/// - Calls to *argument-dependent* clamp functions (`clamp3`-shaped: they +/// return one of their arguments verbatim): every argument must be +/// int-producing, and the argument deps are recorded. `clampU8`-shaped +/// and `returns_integer` functions coerce internally (`| 0` / bitwise on +/// every value-returning path) and stay argument-independent. +#[allow(clippy::too_many_arguments)] +fn int32_producing_deps( + e: &perry_hir::Expr, + candidates: &HashSet, + flat_const_ids: &HashSet, + flat_row_alias_ids: &HashSet, + clamp_fn_ids: &HashSet, + arg_dependent_clamp_fn_ids: &HashSet, + deps: &mut HashSet, +) -> bool { + let recurse = |sub: &Expr, deps: &mut HashSet| { + int32_producing_deps( + sub, + candidates, + flat_const_ids, + flat_row_alias_ids, + clamp_fn_ids, + arg_dependent_clamp_fn_ids, + deps, + ) + }; + match e { + Expr::Integer(_) => true, + Expr::Update { id, .. } => { + if candidates.contains(id) { + deps.insert(*id); + true + } else { + false + } + } + Expr::Binary { op, right, .. } + if matches!(op, BinaryOp::BitOr | BinaryOp::UShr) + && matches!(right.as_ref(), Expr::Integer(0)) => + { + true + } + Expr::Binary { op, left, right } + if matches!(op, BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul) => + { + recurse(left, deps) && recurse(right, deps) + } + Expr::Call { callee, args, .. } => { + if let Expr::FuncRef(fid) = callee.as_ref() { + if !clamp_fn_ids.contains(fid) { + return false; + } + if arg_dependent_clamp_fn_ids.contains(fid) { + args.iter().all(|a| recurse(a, deps)) + } else { + true + } + } else { + false + } + } + Expr::Binary { op, .. } => matches!( + op, + BinaryOp::BitAnd + | BinaryOp::BitOr + | BinaryOp::BitXor + | BinaryOp::Shl + | BinaryOp::Shr + | BinaryOp::UShr + ), + Expr::LocalGet(id) => { + if candidates.contains(id) { + deps.insert(*id); + true + } else { + false } - _ => {} } + Expr::Uint8ArrayGet { .. } | Expr::BufferIndexGet { .. } => true, + Expr::MathImul(_, _) => true, + // Issue #50 bridge: element access on a flat-const 2D int array + // produces i32. The flat-const facts are immutable within this + // analysis (never-mutated module consts), so no local deps. + Expr::IndexGet { object, .. } => match object.as_ref() { + Expr::IndexGet { object: inner, .. } => { + matches!(inner.as_ref(), Expr::LocalGet(id) if flat_const_ids.contains(id)) + } + Expr::LocalGet(id) => flat_row_alias_ids.contains(id), + _ => false, + }, + _ => false, } } @@ -492,15 +626,19 @@ pub fn collect_flat_row_aliases( /// Returns `true` if evaluating `e` yields a value that will already be /// integer-valued — so writing it into a local's i32 slot is lossless. /// +/// This is the *admission-side* judgment: it may be optimistic (e.g. +/// `Expr::Update` and clamp calls are accepted unconditionally) because every +/// admitted candidate is re-judged with the strict, provenance-recording +/// `int32_producing_deps` during disqualification — see the module comment. +/// /// Accepted shapes: /// - `Expr::Integer(_)`: trivially integer. /// - `(expr) | 0` and `(expr) >>> 0`: the JS ToInt32 / ToUint32 idiom — /// always yields a 32-bit integer regardless of the inner expression. /// - Pure bitwise ops (`&`, `|`, `^`, `<<`, `>>`, `>>>`): per JS spec /// these coerce both operands to int32 and return int32. -/// - `Expr::Update`: `++` / `--` on an integer-stable local (we don't -/// verify transitively; if the target isn't qualified, the whole chain -/// collapses anyway). +/// - `Expr::Update`: `++` / `--` on an integer-stable local (admission +/// doesn't verify the target; the disqualification judgment does). /// - (issue #49) `LocalGet(id)` when `id` is itself in `known_int_locals` — /// enables the accumulator pattern `acc = acc + int_expr` without /// requiring a `| 0` wrapper on every write. diff --git a/crates/perry-codegen/src/collectors/mod.rs b/crates/perry-codegen/src/collectors/mod.rs index 17d88879cb..8552613726 100644 --- a/crates/perry-codegen/src/collectors/mod.rs +++ b/crates/perry-codegen/src/collectors/mod.rs @@ -51,9 +51,9 @@ pub(crate) use escape_objects::{ pub(crate) use hir_facts::{collect_hir_facts, collect_native_region_fact_graph}; pub(crate) use i32_locals::{ collect_integer_let_ids, collect_localset_ids_in_expr_filtered, collect_localset_ids_in_stmts, - collect_localset_ids_in_stmts_filtered, collect_non_int_localset_ids_in_stmts, - collect_strictly_i32_bounded_locals, collect_unsigned_i32_locals, is_bitwise_expr, - is_flat_const_indexget, is_strictly_i32_bounded_expr, is_ushr_zero, walk_writes_for_strict, + collect_localset_ids_in_stmts_filtered, collect_strictly_i32_bounded_locals, + collect_unsigned_i32_locals, is_bitwise_expr, is_flat_const_indexget, + is_strictly_i32_bounded_expr, is_ushr_zero, walk_writes_for_strict, walk_writes_in_expr_for_strict, }; pub(crate) use i64_emit::{i64_body, i64_cond, i64_val}; diff --git a/crates/perry-codegen/src/lower_call/func_ref.rs b/crates/perry-codegen/src/lower_call/func_ref.rs index 878a59effc..a332d8645d 100644 --- a/crates/perry-codegen/src/lower_call/func_ref.rs +++ b/crates/perry-codegen/src/lower_call/func_ref.rs @@ -28,22 +28,51 @@ pub fn try_lower_func_ref_call( // `is_inlinable`) so this path fires at every call site and the // `dowhile/break` shape that blocked LLVM's auto-vectorizer // never appears in the IR. + // + // clamp3-shaped functions return one of their ARGUMENTS verbatim, so + // the i32 intrinsification is only sound when every argument is + // provably i32-lowerable (`can_lower_expr_as_i32` — whose contract + // `lower_expr_as_i32` requires anyway). Unconditional intrinsification + // fptosi'd fractional doubles (`clamp3(2.5, 0, 5)` returned 2) and + // NaN-boxed pointers (i32::MIN — the #4785 `(number).method is not a + // function` bug class) at every call site. Non-i32 arguments fall + // through to the ordinary direct call, whose compiled body has the + // correct verbatim-return semantics. clampU8 stays unconditional: its + // detector verifies the body ends in `return v | 0`, and fptosi + + // smax(0)/smin(255) agrees with that coercion for every f64 input + // (out-of-range values hit the clamp bounds first; NaN and boxed + // pointers coerce to 0 either way). if ctx.clamp3_functions.contains(fid) && args.len() == 3 { - let v = crate::expr::lower_expr_as_i32(ctx, &args[0])?; - let lo = crate::expr::lower_expr_as_i32(ctx, &args[1])?; - let hi = crate::expr::lower_expr_as_i32(ctx, &args[2])?; - let blk = ctx.block(); - let r1 = blk.fresh_reg(); - blk.emit_raw(format!( - "{} = call i32 @llvm.smax.i32(i32 {}, i32 {})", - r1, v, lo - )); - let r2 = blk.fresh_reg(); - blk.emit_raw(format!( - "{} = call i32 @llvm.smin.i32(i32 {}, i32 {})", - r2, r1, hi - )); - return Ok(Some(blk.sitofp(I32, &r2, DOUBLE))); + let args_are_i32 = args.iter().all(|a| { + crate::expr::can_lower_expr_as_i32( + a, + &ctx.i32_counter_slots, + ctx.flat_const_arrays, + &ctx.array_row_aliases, + ctx.integer_locals, + ctx.clamp3_functions, + ctx.clamp_u8_functions, + ctx.integer_returning_functions, + ctx.i32_identity_functions, + ) + }); + if args_are_i32 { + let v = crate::expr::lower_expr_as_i32(ctx, &args[0])?; + let lo = crate::expr::lower_expr_as_i32(ctx, &args[1])?; + let hi = crate::expr::lower_expr_as_i32(ctx, &args[2])?; + let blk = ctx.block(); + let r1 = blk.fresh_reg(); + blk.emit_raw(format!( + "{} = call i32 @llvm.smax.i32(i32 {}, i32 {})", + r1, v, lo + )); + let r2 = blk.fresh_reg(); + blk.emit_raw(format!( + "{} = call i32 @llvm.smin.i32(i32 {}, i32 {})", + r2, r1, hi + )); + return Ok(Some(blk.sitofp(I32, &r2, DOUBLE))); + } } if ctx.clamp_u8_functions.contains(fid) && args.len() == 1 { let v = crate::expr::lower_expr_as_i32(ctx, &args[0])?; diff --git a/crates/perry/tests/integer_locals_provenance.rs b/crates/perry/tests/integer_locals_provenance.rs new file mode 100644 index 0000000000..7cbd0a5603 --- /dev/null +++ b/crates/perry/tests/integer_locals_provenance.rs @@ -0,0 +1,197 @@ +//! Regression tests for the integer-locals provenance analysis +//! (`crates/perry-codegen/src/collectors/integer_locals.rs`). +//! +//! Bug class (#4785): a local wrongly kept in `integer_locals` gets an i32 +//! shadow slot; when its f64 slot actually holds a NaN-boxed pointer, the +//! i32 read is `fptosi(NaN) = i32::MIN` and user code crashes with +//! `(number).method is not a function`. Each test compiles a TypeScript +//! source with the real binary and asserts the produced executable's stdout, +//! so any admission path that escapes transitive disqualification shows up +//! as a wrong value or a crash here. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +fn perry_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_perry")) +} + +fn compile_and_run(dir: &Path, source: &str) -> String { + let entry = dir.join("main.ts"); + let output = dir.join("main_bin"); + std::fs::write(&entry, source).expect("write entry"); + + let compile = Command::new(perry_bin()) + .current_dir(dir) + .arg("compile") + .arg(&entry) + .arg("-o") + .arg(&output) + .output() + .expect("run perry compile"); + assert!( + compile.status.success(), + "perry compile failed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&compile.stdout), + String::from_utf8_lossy(&compile.stderr) + ); + + let run = Command::new(&output).output().expect("run compiled binary"); + assert!( + run.status.success(), + "compiled binary failed (signal/segfault = stale i32 shadow slot)\nstatus: {:?}\nstdout:\n{}\nstderr:\n{}", + run.status, + String::from_utf8_lossy(&run.stdout), + String::from_utf8_lossy(&run.stderr) + ); + String::from_utf8_lossy(&run.stdout).into_owned() +} + +// The original #4785 shape: array destructuring emits a mutable +// `__destruct = undefined` scaffolding local; the init-only copy chain off it +// (`cb = v`) must not inherit a stale i32 slot when the scaffolding is +// disqualified by its non-integer writes. +#[test] +fn destructured_value_copy_is_not_truncated() { + let dir = tempfile::tempdir().expect("tempdir"); + let stdout = compile_and_run( + dir.path(), + r#" +function build(entry: any): string { + const [k, v] = entry; + const cb = v; + return k + ":" + cb.setName("users"); +} +const builder = { + setName(n: string): string { return "named-" + n; }, +}; +console.log(build(["col", builder])); +console.log(typeof build(["col", builder])); +"#, + ); + assert_eq!(stdout, "col:named-users\nstring\n"); +} + +// A 2+ hop init-only copy chain off a disqualified `undefined` seed: every +// hop must be pruned, not just the first. +#[test] +fn multi_hop_copy_chain_off_disqualified_seed() { + let dir = tempfile::tempdir().expect("tempdir"); + let stdout = compile_and_run( + dir.path(), + r#" +function chain(flag: boolean): string { + let a: any = undefined; + if (flag) { + a = { tag: () => "from-object" }; + } + const b = a; + const c = b; + return c.tag(); +} +console.log(chain(true)); +"#, + ); + assert_eq!(stdout, "from-object\n"); +} + +// Provenance through a clamp-style admission: `clamp3`-shaped helpers return +// one of their ARGUMENTS verbatim, so `const xx = clamp3(src, 0, 100)` is +// only integer when `src` is. With `src` holding an object, `xx` must not +// keep an i32 slot. (Pre-provenance, `is_int32_producing_expr` accepted any +// clamp call unconditionally and the init-only re-validation never pruned +// xx — this is the clamp_fn_ids bypass.) +#[test] +fn clamp_admitted_local_follows_disqualified_source() { + let dir = tempfile::tempdir().expect("tempdir"); + let stdout = compile_and_run( + dir.path(), + r#" +function clamp3(v: number, lo: number, hi: number): number { + if (v < lo) return lo; + if (v > hi) return hi; + return v; +} +function pick(flag: boolean): string { + let src: any = undefined; + if (flag) { + src = { name: () => "still-an-object" }; + } + const xx = clamp3(src, 0, 100); + const yy = xx; + return yy.name(); +} +console.log(pick(true)); +console.log(clamp3(50, 0, 100), clamp3(-3, 0, 100), clamp3(7.5, 0, 5)); +"#, + ); + assert_eq!(stdout, "still-an-object\n50 0 5\n"); +} + +// A candidate WITH later integer writes must still be re-validated through +// its init: reads between `let b = a` (a disqualified) and the later `b = 1` +// must see the object, not a truncated i32. +#[test] +fn written_local_init_read_before_reassignment() { + let dir = tempfile::tempdir().expect("tempdir"); + let stdout = compile_and_run( + dir.path(), + r#" +function probe(flag: boolean): string { + const arr = [10, 20, 30]; + let a: any = undefined; + if (flag) { + a = { m: () => "object-alive" }; + } + let b: any = a; + const seen = b.m(); + b = 1; + return seen + "/" + arr[b]; +} +console.log(probe(true)); +"#, + ); + assert_eq!(stdout, "object-alive/20\n"); +} + +// Positive case: the optimization must still fire — a hot integer loop and a +// clamp-fed index chain still compute correct values. (Slot survival was +// eyeballed via `--trace llvm`; correctness is the hard gate here.) +#[test] +fn integer_loops_still_compute_correctly() { + let dir = tempfile::tempdir().expect("tempdir"); + let stdout = compile_and_run( + dir.path(), + r#" +function hot(n: number): number { + let sum = 0; + for (let i = 0; i < n; i++) { + sum = sum + i; + } + return sum; +} +function clampIdx(v: number, lo: number, hi: number): number { + if (v < lo) return lo; + if (v > hi) return hi; + return v; +} +function kernel(): number { + const data = [1, 2, 3, 4, 5, 6, 7, 8]; + const W = 8; + const hi = W - 1; + let acc = 0; + for (let x = 0; x < W; x++) { + const xx = clampIdx(x + 2, 0, hi); + const idx = xx | 0; + acc = acc + data[idx]; + } + return acc; +} +console.log(hot(100000)); +console.log(kernel()); +"#, + ); + // hot: 0+1+…+99999 = 4999950000 (exceeds i32 — must not truncate). + // kernel: data[2..7] + data[7]*2 = 3+4+5+6+7+8+8+8 = 49. + assert_eq!(stdout, "4999950000\n49\n"); +}