From f13cd9a794fc6ec86171be50c9bebc85516df0af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Thu, 18 Jun 2026 09:27:14 +0200 Subject: [PATCH 1/4] perf(codegen): full-outline class-field IC diamond for oversized modules (#5334 lever B) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pathologically-large modules (the motivating case: a 13MB minified bundle that lowers to ~1.25GB of LLVM IR across ~92K functions) are forced to `clang -O0` (#4880), where the inline class-field-SET IC diamond's ~15-lines-per-site expansion is never optimized away — and clang needs ~15GB RSS just to chew through it. For such modules, replace the ENTIRE diamond (guard call + fast slot store + fallback arm) with a single `call @js_class_field_set_ic(...)`. The runtime helper reproduces the diamond's exact semantics — run the guard, then on PASS do the same raw-f64/boxed slot store, on FAIL record + route by name. This trades a function-call frame on the (cold, startup- dominated) field-set path for a large per-site IR reduction so clang can compile the module at all. Gating (codegen-time, decided once per module in compile_module): - `PERRY_FULL_OUTLINE_IC=1/on/true` forces ON, `=0/off/false` forces OFF; - otherwise auto: function count >= PERRY_FULL_OUTLINE_IC_MIN_FUNCS (default 4000) — the defining trait of the bundle case; ordinary per-file modules stay on the inline diamond and keep the hot fast store. The decision is a thread-local set at the top of compile_module (codegen is sequential per module), not a process-global OnceLock, so it can't pin one module's decision across a multi-module build. NB: the full-outline boxed store always emits the write barrier (via js_object_set_field), so the compile-time non-pointer barrier elision (#5334 lever D) does not apply on this path — acceptable, since it is gated to oversized, non-hot-loop modules. Verified: forced ON collapses the diamond to one call (no fast/fallback blocks, no inline guard call); a class-field-write program runs to the correct result under full-outline; full perry-codegen suite green. The two class-field structure tests now pin PERRY_FULL_OUTLINE_IC off and serialize on ENV_LOCK against the new lever-B test. Final lever of the IR-efficiency roadmap (#5334). Levers A #5351, C #5350 merged; D #5381 in review. --- crates/perry-codegen/src/codegen/helpers.rs | 46 +++++++++++ crates/perry-codegen/src/codegen/mod.rs | 12 ++- crates/perry-codegen/src/expr/property_set.rs | 34 ++++++++ .../src/runtime_decls/objects.rs | 9 +++ crates/perry-codegen/tests/typed_feedback.rs | 55 +++++++++++++ .../src/typed_feedback/guards.rs | 78 +++++++++++++++++++ 6 files changed, 233 insertions(+), 1 deletion(-) diff --git a/crates/perry-codegen/src/codegen/helpers.rs b/crates/perry-codegen/src/codegen/helpers.rs index 63e806b9f..0578606b9 100644 --- a/crates/perry-codegen/src/codegen/helpers.rs +++ b/crates/perry-codegen/src/codegen/helpers.rs @@ -108,6 +108,52 @@ pub(crate) fn write_barriers_enabled() -> bool { }) } +thread_local! { + static FULL_OUTLINE_IC: std::cell::Cell = const { std::cell::Cell::new(false) }; +} + +/// Lever B (#5334) full-outline gate for class-field IC diamonds. Set ONCE per +/// module at the top of `compile_module` (see [`decide_full_outline_ic`]), read +/// at each class-field-set lowering. Thread-local — NOT a process-global +/// `OnceLock` — because codegen runs one module per `compile_module` call and a +/// process-global would wrongly pin the first module's decision for the rest of +/// a multi-module build (and across tests). Codegen within a module is +/// sequential, so a thread-local is safe and avoids threading a flag through all +/// six `FnCtx` construction sites. +pub(crate) fn full_outline_ic_enabled() -> bool { + FULL_OUTLINE_IC.with(|c| c.get()) +} + +pub(crate) fn set_full_outline_ic(enabled: bool) { + FULL_OUTLINE_IC.with(|c| c.set(enabled)); +} + +/// Decide whether a module is large enough to warrant full-outlining its +/// class-field IC diamonds (#5334 lever B). Oversized modules are forced to +/// `clang -O0` (#4880), where the inline diamond's ~15-line-per-site expansion +/// is never optimized away; collapsing each site to one +/// `call @js_class_field_set_ic(...)` keeps the IR small enough for clang to +/// compile at all. Gated on function count — the defining trait of the +/// pathological minified-bundle case (tens of thousands of functions in one +/// module); ordinary per-file modules stay on the inline diamond and keep the +/// hot fast store. +/// +/// `PERRY_FULL_OUTLINE_IC=1`/`on`/`true` forces ON, `=0`/`off`/`false` forces +/// OFF; otherwise auto: `function_count >= PERRY_FULL_OUTLINE_IC_MIN_FUNCS` +/// (default 4000). +pub(crate) fn decide_full_outline_ic(function_count: usize) -> bool { + match std::env::var("PERRY_FULL_OUTLINE_IC").as_deref() { + Ok("1") | Ok("on") | Ok("true") => return true, + Ok("0") | Ok("off") | Ok("false") => return false, + _ => {} + } + let threshold = std::env::var("PERRY_FULL_OUTLINE_IC_MIN_FUNCS") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(4000); + function_count >= threshold +} + pub(super) fn scoped_fn_name(module_prefix: &str, hir_name: &str) -> String { // Use the INJECTIVE sanitizer (same as scoped_static_method_name): plain // `sanitize` maps every non-`[A-Za-z0-9_]` char to `_`, so distinct minified diff --git a/crates/perry-codegen/src/codegen/mod.rs b/crates/perry-codegen/src/codegen/mod.rs index 06366d7f8..bd55bff51 100644 --- a/crates/perry-codegen/src/codegen/mod.rs +++ b/crates/perry-codegen/src/codegen/mod.rs @@ -50,7 +50,10 @@ mod opts; mod string_pool; pub use helpers::resolve_target_triple; -pub(crate) use helpers::{default_target_triple, write_barriers_enabled}; +pub(crate) use helpers::{ + decide_full_outline_ic, default_target_triple, full_outline_ic_enabled, set_full_outline_ic, + write_barriers_enabled, +}; pub use opts::{ AppMetadata, CompileOptions, FpContractMode, ImportedClass, NamespaceEntry, NamespaceEntryKind, }; @@ -92,6 +95,13 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result> let triple = opts.target.clone().unwrap_or_else(default_target_triple); let fp_flags = crate::block::FpFlags::new(opts.fast_math, opts.fp_contract_mode); + // #5334 lever B: decide ONCE, up front, whether this module is large enough + // to full-outline its class-field IC diamonds (read per-site during + // lowering via `full_outline_ic_enabled()`). Thread-local, so it must be set + // afresh for every module — including the `false` case, to clear any prior + // module's decision on this thread. + set_full_outline_ic(decide_full_outline_ic(hir.functions.len())); + let mut llmod = LlModule::new_with_fp_flags(&triple, fp_flags); // Null guard global: a zeroed i32 used as a safe dereference target // when a NaN-unboxed pointer is null/invalid. Prevents segfaults from diff --git a/crates/perry-codegen/src/expr/property_set.rs b/crates/perry-codegen/src/expr/property_set.rs index 2936d8b4f..910e2c53e 100644 --- a/crates/perry-codegen/src/expr/property_set.rs +++ b/crates/perry-codegen/src/expr/property_set.rs @@ -310,6 +310,40 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { .as_ref() .is_some_and(crate::typed_shape::type_is_raw_f64_candidate); let requires_raw_f64_str = if requires_raw_f64 { "1" } else { "0" }; + // #5334 lever B: oversized modules full-outline the entire + // class-field-SET IC diamond (guard + fast store + + // fallback) to a single `js_class_field_set_ic(...)` call. + // This trades a call frame on the (cold, startup- + // dominated) field-set path for a large per-site IR + // reduction, so clang -O0 — which oversized modules are + // forced to (#4880) — can actually compile the module. + // Only the call's own operands are materialized (the key + // handle + expected-keys), not the inline-store scaffolding. + if crate::codegen::full_outline_ic_enabled() { + let (key_raw, expected_keys) = { + let blk = ctx.block(); + let key_box = blk.load(DOUBLE, &key_handle_global); + let key_bits = blk.bitcast_double_to_i64(&key_box); + let key_raw = blk.and(I64, &key_bits, POINTER_MASK_I64); + let expected_keys = + blk.load(I64, &format!("@{}", keys_global_name)); + (key_raw, expected_keys) + }; + ctx.block().call_void( + "js_class_field_set_ic", + &[ + (I64, &site_id), + (DOUBLE, &recv_box), + (I32, &expected_class_id_str), + (I64, &expected_keys), + (I64, &key_raw), + (I32, &field_idx_str), + (DOUBLE, &val_double), + (I32, requires_raw_f64_str), + ], + ); + return Ok(val_double); + } // #5093: build the guard operands once, up front, so both // the inline shape pre-check and the guard-call fallback // can reference them. diff --git a/crates/perry-codegen/src/runtime_decls/objects.rs b/crates/perry-codegen/src/runtime_decls/objects.rs index e10916c27..24aaea2cf 100644 --- a/crates/perry-codegen/src/runtime_decls/objects.rs +++ b/crates/perry-codegen/src/runtime_decls/objects.rs @@ -120,6 +120,15 @@ pub fn declare_phase_b_objects(module: &mut LlModule) { VOID, &[I64, I64, I64, DOUBLE], ); + // #5334 lever B: class-field-SET inline cache, FULLY outlined. For oversized + // modules the whole diamond (guard + fast store + fallback) collapses to one + // call. Args: (site_id, recv, expected_class_id, expected_keys, key, + // field_index, value, require_raw_f64). Same signature as the set guard. + module.declare_function( + "js_class_field_set_ic", + VOID, + &[I64, DOUBLE, I32, I64, I64, I32, DOUBLE, I32], + ); module.declare_function( "js_typed_feedback_class_field_get_guard", I32, diff --git a/crates/perry-codegen/tests/typed_feedback.rs b/crates/perry-codegen/tests/typed_feedback.rs index 9ad0fad33..d1a5c9e6f 100644 --- a/crates/perry-codegen/tests/typed_feedback.rs +++ b/crates/perry-codegen/tests/typed_feedback.rs @@ -266,6 +266,11 @@ fn typed_feedback_instruments_property_and_method_boundaries() { #[test] fn typed_feedback_guards_direct_class_field_specialization() { + // Serialize against the lever-B test (#5334), which sets the process-global + // PERRY_FULL_OUTLINE_IC in this same test binary; pin it off so this test + // always observes the inline diamond. + let _lock = ENV_LOCK.lock().unwrap(); + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); let point = class(101, "Point", vec![field("x", Type::Number)]); let ir = ir_for(module_with_classes( "typed_feedback_class_field.ts", @@ -307,8 +312,58 @@ fn typed_feedback_guards_direct_class_field_specialization() { assert!(ir.contains("call double @js_object_get_field_by_name_f64")); } +#[test] +fn full_outline_ic_collapses_class_field_set_to_single_call() { + // #5334 lever B: when full-outline is enabled (oversized module, or forced + // via env), the entire class-field-SET diamond collapses to a single + // `js_class_field_set_ic` call — no guard call, no fast/fallback blocks. + let build = || { + let point = class(101, "Point", vec![field("x", Type::Number)]); + module_with_classes( + "full_outline_field.ts", + vec![point], + vec![param(1, "p", Type::Named("Point".to_string()))], + Type::Number, + vec![ + Stmt::Expr(Expr::PropertySet { + object: Box::new(Expr::LocalGet(1)), + property: "x".to_string(), + value: Box::new(Expr::Number(7.0)), + }), + Stmt::Return(Some(Expr::Number(0.0))), + ], + ) + }; + + let _lock = ENV_LOCK.lock().unwrap(); + + // Forced ON: one outlined call, no inline diamond. + { + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("1")); + let ir = ir_for(build()); + assert!(ir.contains("call void @js_class_field_set_ic")); + assert!(!ir.contains("class_field_set.fast")); + assert!(!ir.contains("class_field_set.fallback")); + assert!(!ir.contains("call i32 @js_typed_feedback_class_field_set_guard")); + } + + // Forced OFF (the default for normal-sized modules): the inline diamond, + // and no full-outline call. + { + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); + let ir = ir_for(build()); + assert!(!ir.contains("call void @js_class_field_set_ic")); + assert!(ir.contains("class_field_set.fast")); + assert!(ir.contains("js_typed_feedback_class_field_set_guard")); + } +} + #[test] fn typed_feedback_guards_direct_class_method_specialization() { + // Serialize against the lever-B test (#5334) and pin full-outline off so the + // class's synthesized field-set keeps its inline fallback (asserted below). + let _lock = ENV_LOCK.lock().unwrap(); + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); let mut point = class(103, "Point", vec![field("x", Type::Number)]); point.methods.push(Function { id: 7, diff --git a/crates/perry-runtime/src/typed_feedback/guards.rs b/crates/perry-runtime/src/typed_feedback/guards.rs index 2fabad649..c42f9176e 100644 --- a/crates/perry-runtime/src/typed_feedback/guards.rs +++ b/crates/perry-runtime/src/typed_feedback/guards.rs @@ -635,6 +635,83 @@ pub extern "C" fn js_class_field_set_fallback( ); } +/// Class-field-SET inline cache, FULLY OUTLINED (#5334, lever B). +/// +/// For pathologically-large modules (which are forced to `clang -O0`, where the +/// inline IC diamond's ~15-line-per-site expansion is never optimized away), +/// codegen replaces the ENTIRE diamond — guard call, fast slot store, and +/// fallback arm — with a single `call @js_class_field_set_ic(...)`. This trades +/// a function-call frame on the (cold, startup-dominated) field-set path for a +/// large reduction in emitted IR, so clang can actually compile the module. +/// +/// The body reproduces the diamond's exact semantics: +/// 1. run the same `js_typed_feedback_class_field_set_guard`; +/// 2. on a guard PASS, do the same slot store the inline fast block would — +/// a bare `f64` store for a `require_raw_f64` slot (pointer-free by typed +/// shape, no barrier), or `js_object_set_field` for a boxed slot (slot +/// write + layout note + write barrier); +/// 3. on a guard FAIL, record the fallback and route the write by name +/// (handles frozen / accessor / non-writable / setter-in-chain). +/// +/// Frozen/accessor/writable/setter handling all live behind the guard, so no +/// special-casing here. NB: the boxed store always emits the write barrier +/// (via `js_object_set_field`) — the compile-time non-pointer barrier elision +/// (#5334 lever D) does not apply on this path, an acceptable cost since the +/// full-outline path is gated to oversized, startup-dominated modules. +#[no_mangle] +pub extern "C" fn js_class_field_set_ic( + site_id: u64, + receiver: f64, + expected_class_id: u32, + expected_keys: *const ArrayHeader, + key: *const crate::StringHeader, + expected_field_index: u32, + value: f64, + require_raw_f64: i32, +) { + let guard_ok = js_typed_feedback_class_field_set_guard( + site_id, + receiver, + expected_class_id, + expected_keys, + key, + expected_field_index, + value, + require_raw_f64, + ); + + if guard_ok != 0 { + let object_addr = normalize_raw_object_addr(receiver.to_bits()); + if require_raw_f64 != 0 { + // Pointer-free raw-f64 slot: bare store, no GC barrier. + unsafe { + let fields_ptr = + (object_addr as *mut u8).add(std::mem::size_of::()) as *mut f64; + let slot = fields_ptr.add(expected_field_index as usize); + std::ptr::write(slot, value); + } + } else { + // Boxed slot: slot write + layout note + write barrier. + crate::object::js_object_set_field( + object_addr as *mut ObjectHeader, + expected_field_index, + crate::value::JSValue::from_bits(value.to_bits()), + ); + } + return; + } + + // Guard FAIL → record + route by name (same as js_class_field_set_fallback). + crate::typed_feedback::js_typed_feedback_record_fallback_call(site_id); + let obj_bits = receiver.to_bits(); + let key_raw = key as u64 & crate::value::POINTER_MASK; + crate::object::js_object_set_field_by_name( + obj_bits as *mut ObjectHeader, + key_raw as *const crate::StringHeader, + value, + ); +} + #[no_mangle] pub unsafe extern "C" fn js_typed_feedback_native_call_method( site_id: u64, @@ -872,6 +949,7 @@ mod keep_guard_symbols { #[used] static G0: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, i32) -> i32 = js_typed_feedback_class_field_get_guard; #[used] static G1: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, f64, i32) -> i32 = js_typed_feedback_class_field_set_guard; #[used] static G1C: extern "C" fn(u64, u64, u64, f64) = js_class_field_set_fallback; + #[used] static G1D: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, f64, i32) = js_class_field_set_ic; #[used] static G2: unsafe extern "C" fn(u64, f64, u32, *const ArrayHeader, *const i8, usize, *const u8) -> i32 = js_typed_feedback_method_direct_call_guard; #[used] static G3: extern "C" fn(u64, f64, *const u8, u32, u32) -> i32 = js_typed_feedback_closure_direct_call_guard; } From fc6371cb238739aa2ea8cd7df5d16061351cab58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Thu, 18 Jun 2026 09:36:11 +0200 Subject: [PATCH 2/4] review: count class callables in lever-B gate; dedup IC fallback tail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses self-review findings on #5334 lever B (#5385): - Gate denominator: `decide_full_outline_ic` was fed `hir.functions.len()`, which excludes class methods, static methods, accessors, and constructors (those live in `hir.classes[].*`, collected separately). A class-heavy minified bundle — the exact pathology lever B targets — could have a small `functions.len()` yet emit tens of thousands of LLVM functions, so the gate would never fire. New `module_callable_count()` counts top-level functions plus all class callables; the gate now uses it. New test `full_outline_ic_auto_gate_counts_class_methods` covers a class-heavy module triggering with only one top-level function. - Dedup: `js_class_field_set_ic`'s guard-FAIL tail re-implemented `js_class_field_set_fallback` verbatim. It now delegates to that helper, so by-name routing (frozen / accessor / setter-in-chain) is defined once. Full perry-codegen + perry-runtime typed_feedback suites green. --- crates/perry-codegen/src/codegen/helpers.rs | 37 +++++++++++--- crates/perry-codegen/src/codegen/mod.rs | 6 +-- crates/perry-codegen/tests/typed_feedback.rs | 51 +++++++++++++++++++ .../src/typed_feedback/guards.rs | 11 ++-- 4 files changed, 88 insertions(+), 17 deletions(-) diff --git a/crates/perry-codegen/src/codegen/helpers.rs b/crates/perry-codegen/src/codegen/helpers.rs index 0578606b9..9dc383045 100644 --- a/crates/perry-codegen/src/codegen/helpers.rs +++ b/crates/perry-codegen/src/codegen/helpers.rs @@ -128,20 +128,43 @@ pub(crate) fn set_full_outline_ic(enabled: bool) { FULL_OUTLINE_IC.with(|c| c.set(enabled)); } +/// Total number of LLVM functions a module will emit — top-level functions +/// plus every class callable (constructor, instance/static methods, accessor +/// get/set bodies). Used as the lever-B size proxy: class methods and closures +/// do NOT live in `hir.functions`, so a class-heavy minified bundle (the exact +/// pathology lever B targets) can have a small `functions.len()` yet emit tens +/// of thousands of LLVM functions. Counting class callables keeps the gate from +/// silently under-counting and never firing on those modules. +pub(crate) fn module_callable_count(hir: &perry_hir::Module) -> usize { + let class_callables: usize = hir + .classes + .iter() + .map(|c| { + usize::from(c.constructor.is_some()) + + c.methods.len() + + c.static_methods.len() + + c.getters.len() + + c.setters.len() + }) + .sum(); + hir.functions.len() + class_callables +} + /// Decide whether a module is large enough to warrant full-outlining its /// class-field IC diamonds (#5334 lever B). Oversized modules are forced to /// `clang -O0` (#4880), where the inline diamond's ~15-line-per-site expansion /// is never optimized away; collapsing each site to one /// `call @js_class_field_set_ic(...)` keeps the IR small enough for clang to -/// compile at all. Gated on function count — the defining trait of the -/// pathological minified-bundle case (tens of thousands of functions in one -/// module); ordinary per-file modules stay on the inline diamond and keep the -/// hot fast store. +/// compile at all. Gated on the module's total callable count (see +/// [`module_callable_count`]) — the defining trait of the pathological +/// minified-bundle case (tens of thousands of callables in one module); +/// ordinary per-file modules stay on the inline diamond and keep the hot fast +/// store. /// /// `PERRY_FULL_OUTLINE_IC=1`/`on`/`true` forces ON, `=0`/`off`/`false` forces -/// OFF; otherwise auto: `function_count >= PERRY_FULL_OUTLINE_IC_MIN_FUNCS` +/// OFF; otherwise auto: `callable_count >= PERRY_FULL_OUTLINE_IC_MIN_FUNCS` /// (default 4000). -pub(crate) fn decide_full_outline_ic(function_count: usize) -> bool { +pub(crate) fn decide_full_outline_ic(callable_count: usize) -> bool { match std::env::var("PERRY_FULL_OUTLINE_IC").as_deref() { Ok("1") | Ok("on") | Ok("true") => return true, Ok("0") | Ok("off") | Ok("false") => return false, @@ -151,7 +174,7 @@ pub(crate) fn decide_full_outline_ic(function_count: usize) -> bool { .ok() .and_then(|v| v.parse::().ok()) .unwrap_or(4000); - function_count >= threshold + callable_count >= threshold } pub(super) fn scoped_fn_name(module_prefix: &str, hir_name: &str) -> String { diff --git a/crates/perry-codegen/src/codegen/mod.rs b/crates/perry-codegen/src/codegen/mod.rs index bd55bff51..23e1dad4e 100644 --- a/crates/perry-codegen/src/codegen/mod.rs +++ b/crates/perry-codegen/src/codegen/mod.rs @@ -51,8 +51,8 @@ mod string_pool; pub use helpers::resolve_target_triple; pub(crate) use helpers::{ - decide_full_outline_ic, default_target_triple, full_outline_ic_enabled, set_full_outline_ic, - write_barriers_enabled, + decide_full_outline_ic, default_target_triple, full_outline_ic_enabled, module_callable_count, + set_full_outline_ic, write_barriers_enabled, }; pub use opts::{ AppMetadata, CompileOptions, FpContractMode, ImportedClass, NamespaceEntry, NamespaceEntryKind, @@ -100,7 +100,7 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result> // lowering via `full_outline_ic_enabled()`). Thread-local, so it must be set // afresh for every module — including the `false` case, to clear any prior // module's decision on this thread. - set_full_outline_ic(decide_full_outline_ic(hir.functions.len())); + set_full_outline_ic(decide_full_outline_ic(module_callable_count(hir))); let mut llmod = LlModule::new_with_fp_flags(&triple, fp_flags); // Null guard global: a zeroed i32 used as a safe dereference target diff --git a/crates/perry-codegen/tests/typed_feedback.rs b/crates/perry-codegen/tests/typed_feedback.rs index d1a5c9e6f..6590e470f 100644 --- a/crates/perry-codegen/tests/typed_feedback.rs +++ b/crates/perry-codegen/tests/typed_feedback.rs @@ -358,6 +358,57 @@ fn full_outline_ic_collapses_class_field_set_to_single_call() { } } +#[test] +fn full_outline_ic_auto_gate_counts_class_methods() { + // #5334 lever B: the auto size-gate counts class CALLABLES (methods, + // accessors, ctor), not just top-level `hir.functions`. A class-heavy module + // (the minified-bundle pathology) must trigger even though it has only one + // top-level function — class methods/closures don't live in `hir.functions`. + let mut big = class(150, "Big", vec![field("x", Type::Number)]); + for i in 0..6u32 { + big.methods.push(Function { + id: 200 + i, + name: format!("m{i}"), + type_params: Vec::new(), + params: Vec::new(), + return_type: Type::Number, + body: vec![Stmt::Return(Some(Expr::Number(0.0)))], + is_async: false, + is_generator: false, + is_strict: false, + is_exported: false, + captures: Vec::new(), + decorators: Vec::new(), + was_plain_async: false, + was_unrolled: false, + }); + } + let module = module_with_classes( + "auto_gate.ts", + vec![big], + vec![param(1, "p", Type::Named("Big".to_string()))], + Type::Number, + vec![ + Stmt::Expr(Expr::PropertySet { + object: Box::new(Expr::LocalGet(1)), + property: "x".to_string(), + value: Box::new(Expr::Number(7.0)), + }), + Stmt::Return(Some(Expr::Number(0.0))), + ], + ); + + let _lock = ENV_LOCK.lock().unwrap(); + // Auto path (override unset): callable count = 1 probe fn + 6 methods = 7, + // which clears MIN_FUNCS=5 even though `hir.functions.len()` is just 1. The + // pre-fix function-only count (1) would have stayed under the threshold. + let _ic = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", None); + let _min = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC_MIN_FUNCS", Some("5")); + let ir = ir_for(module); + assert!(ir.contains("call void @js_class_field_set_ic")); + assert!(!ir.contains("class_field_set.fast")); +} + #[test] fn typed_feedback_guards_direct_class_method_specialization() { // Serialize against the lever-B test (#5334) and pin full-outline off so the diff --git a/crates/perry-runtime/src/typed_feedback/guards.rs b/crates/perry-runtime/src/typed_feedback/guards.rs index c42f9176e..e678d5f18 100644 --- a/crates/perry-runtime/src/typed_feedback/guards.rs +++ b/crates/perry-runtime/src/typed_feedback/guards.rs @@ -701,15 +701,12 @@ pub extern "C" fn js_class_field_set_ic( return; } - // Guard FAIL → record + route by name (same as js_class_field_set_fallback). - crate::typed_feedback::js_typed_feedback_record_fallback_call(site_id); + // Guard FAIL → identical to the cold guard-miss arm. Delegate to the shared + // fallback helper so by-name routing (frozen / accessor / setter-in-chain) + // stays defined in exactly one place. let obj_bits = receiver.to_bits(); let key_raw = key as u64 & crate::value::POINTER_MASK; - crate::object::js_object_set_field_by_name( - obj_bits as *mut ObjectHeader, - key_raw as *const crate::StringHeader, - value, - ); + js_class_field_set_fallback(site_id, obj_bits, key_raw, value); } #[no_mangle] From 624cea7b5461a454c98972dced19b939e20259d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Thu, 18 Jun 2026 09:46:38 +0200 Subject: [PATCH 3/4] review(coderabbit): count class computed_members in lever-B gate denominator ClassComputedMember holds a Function that compile_module lowers like any method (emits an LLVM function), so computed members must count toward the oversized-module gate alongside methods/static_methods/getters/setters. A class with many computed-key methods could otherwise stay under PERRY_FULL_OUTLINE_IC_MIN_FUNCS and keep the inline diamond when the auto gate should fire. --- crates/perry-codegen/src/codegen/helpers.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/perry-codegen/src/codegen/helpers.rs b/crates/perry-codegen/src/codegen/helpers.rs index 9dc383045..e23094d41 100644 --- a/crates/perry-codegen/src/codegen/helpers.rs +++ b/crates/perry-codegen/src/codegen/helpers.rs @@ -129,8 +129,9 @@ pub(crate) fn set_full_outline_ic(enabled: bool) { } /// Total number of LLVM functions a module will emit — top-level functions -/// plus every class callable (constructor, instance/static methods, accessor -/// get/set bodies). Used as the lever-B size proxy: class methods and closures +/// plus every class callable (constructor, instance/static methods, computed +/// members, accessor get/set bodies). Used as the lever-B size proxy: class +/// methods and closures /// do NOT live in `hir.functions`, so a class-heavy minified bundle (the exact /// pathology lever B targets) can have a small `functions.len()` yet emit tens /// of thousands of LLVM functions. Counting class callables keeps the gate from @@ -143,6 +144,7 @@ pub(crate) fn module_callable_count(hir: &perry_hir::Module) -> usize { usize::from(c.constructor.is_some()) + c.methods.len() + c.static_methods.len() + + c.computed_members.len() + c.getters.len() + c.setters.len() }) From acc60d679412b8c3e77802e48246ecb4a86271ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Thu, 18 Jun 2026 10:10:29 +0200 Subject: [PATCH 4/4] lint: GC_STORE_AUDIT(POINTER_FREE) marker on js_class_field_set_ic raw store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The full-outline IC helper's raw-f64 slot write is a barrier-free store the GC store-site inventory requires to be annotated. A passing guard with require_raw_f64 proves the slot is pointer-free (typed-shape descriptor) and the value is a plain number — identical to the inline class_field_set.fast raw-f64 store. Fixes the failing lint 'GC store-site inventory' step. --- crates/perry-runtime/src/typed_feedback/guards.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/perry-runtime/src/typed_feedback/guards.rs b/crates/perry-runtime/src/typed_feedback/guards.rs index e678d5f18..a6d56af3f 100644 --- a/crates/perry-runtime/src/typed_feedback/guards.rs +++ b/crates/perry-runtime/src/typed_feedback/guards.rs @@ -688,6 +688,10 @@ pub extern "C" fn js_class_field_set_ic( let fields_ptr = (object_addr as *mut u8).add(std::mem::size_of::()) as *mut f64; let slot = fields_ptr.add(expected_field_index as usize); + // GC_STORE_AUDIT(POINTER_FREE): a passing guard with + // require_raw_f64 proved the slot is pointer-free by typed-shape + // descriptor and the value is a plain number — identical to the + // inline `class_field_set.fast` raw-f64 store, which is barrier-free. std::ptr::write(slot, value); } } else {