diff --git a/crates/perry-codegen/src/codegen/helpers.rs b/crates/perry-codegen/src/codegen/helpers.rs index 63e806b9f..e23094d41 100644 --- a/crates/perry-codegen/src/codegen/helpers.rs +++ b/crates/perry-codegen/src/codegen/helpers.rs @@ -108,6 +108,77 @@ pub(crate) fn write_barriers_enabled() -> bool { }) } +thread_local! { + static FULL_OUTLINE_IC: std::cell::Cell = const { std::cell::Cell::new(false) }; +} + +/// Lever B (#5334) full-outline gate for class-field IC diamonds. Set ONCE per +/// module at the top of `compile_module` (see [`decide_full_outline_ic`]), read +/// at each class-field-set lowering. Thread-local — NOT a process-global +/// `OnceLock` — because codegen runs one module per `compile_module` call and a +/// process-global would wrongly pin the first module's decision for the rest of +/// a multi-module build (and across tests). Codegen within a module is +/// sequential, so a thread-local is safe and avoids threading a flag through all +/// six `FnCtx` construction sites. +pub(crate) fn full_outline_ic_enabled() -> bool { + FULL_OUTLINE_IC.with(|c| c.get()) +} + +pub(crate) fn set_full_outline_ic(enabled: bool) { + FULL_OUTLINE_IC.with(|c| c.set(enabled)); +} + +/// Total number of LLVM functions a module will emit — top-level functions +/// plus every class callable (constructor, instance/static methods, computed +/// members, accessor get/set bodies). Used as the lever-B size proxy: class +/// methods and closures +/// do NOT live in `hir.functions`, so a class-heavy minified bundle (the exact +/// pathology lever B targets) can have a small `functions.len()` yet emit tens +/// of thousands of LLVM functions. Counting class callables keeps the gate from +/// silently under-counting and never firing on those modules. +pub(crate) fn module_callable_count(hir: &perry_hir::Module) -> usize { + let class_callables: usize = hir + .classes + .iter() + .map(|c| { + usize::from(c.constructor.is_some()) + + c.methods.len() + + c.static_methods.len() + + c.computed_members.len() + + c.getters.len() + + c.setters.len() + }) + .sum(); + hir.functions.len() + class_callables +} + +/// Decide whether a module is large enough to warrant full-outlining its +/// class-field IC diamonds (#5334 lever B). Oversized modules are forced to +/// `clang -O0` (#4880), where the inline diamond's ~15-line-per-site expansion +/// is never optimized away; collapsing each site to one +/// `call @js_class_field_set_ic(...)` keeps the IR small enough for clang to +/// compile at all. Gated on the module's total callable count (see +/// [`module_callable_count`]) — the defining trait of the pathological +/// minified-bundle case (tens of thousands of callables in one module); +/// ordinary per-file modules stay on the inline diamond and keep the hot fast +/// store. +/// +/// `PERRY_FULL_OUTLINE_IC=1`/`on`/`true` forces ON, `=0`/`off`/`false` forces +/// OFF; otherwise auto: `callable_count >= PERRY_FULL_OUTLINE_IC_MIN_FUNCS` +/// (default 4000). +pub(crate) fn decide_full_outline_ic(callable_count: usize) -> bool { + match std::env::var("PERRY_FULL_OUTLINE_IC").as_deref() { + Ok("1") | Ok("on") | Ok("true") => return true, + Ok("0") | Ok("off") | Ok("false") => return false, + _ => {} + } + let threshold = std::env::var("PERRY_FULL_OUTLINE_IC_MIN_FUNCS") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(4000); + callable_count >= threshold +} + pub(super) fn scoped_fn_name(module_prefix: &str, hir_name: &str) -> String { // Use the INJECTIVE sanitizer (same as scoped_static_method_name): plain // `sanitize` maps every non-`[A-Za-z0-9_]` char to `_`, so distinct minified diff --git a/crates/perry-codegen/src/codegen/mod.rs b/crates/perry-codegen/src/codegen/mod.rs index 06366d7f8..23e1dad4e 100644 --- a/crates/perry-codegen/src/codegen/mod.rs +++ b/crates/perry-codegen/src/codegen/mod.rs @@ -50,7 +50,10 @@ mod opts; mod string_pool; pub use helpers::resolve_target_triple; -pub(crate) use helpers::{default_target_triple, write_barriers_enabled}; +pub(crate) use helpers::{ + decide_full_outline_ic, default_target_triple, full_outline_ic_enabled, module_callable_count, + set_full_outline_ic, write_barriers_enabled, +}; pub use opts::{ AppMetadata, CompileOptions, FpContractMode, ImportedClass, NamespaceEntry, NamespaceEntryKind, }; @@ -92,6 +95,13 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result> let triple = opts.target.clone().unwrap_or_else(default_target_triple); let fp_flags = crate::block::FpFlags::new(opts.fast_math, opts.fp_contract_mode); + // #5334 lever B: decide ONCE, up front, whether this module is large enough + // to full-outline its class-field IC diamonds (read per-site during + // lowering via `full_outline_ic_enabled()`). Thread-local, so it must be set + // afresh for every module — including the `false` case, to clear any prior + // module's decision on this thread. + set_full_outline_ic(decide_full_outline_ic(module_callable_count(hir))); + let mut llmod = LlModule::new_with_fp_flags(&triple, fp_flags); // Null guard global: a zeroed i32 used as a safe dereference target // when a NaN-unboxed pointer is null/invalid. Prevents segfaults from diff --git a/crates/perry-codegen/src/expr/property_set.rs b/crates/perry-codegen/src/expr/property_set.rs index 47cb71ea3..6d52001fc 100644 --- a/crates/perry-codegen/src/expr/property_set.rs +++ b/crates/perry-codegen/src/expr/property_set.rs @@ -311,6 +311,40 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { .as_ref() .is_some_and(crate::typed_shape::type_is_raw_f64_candidate); let requires_raw_f64_str = if requires_raw_f64 { "1" } else { "0" }; + // #5334 lever B: oversized modules full-outline the entire + // class-field-SET IC diamond (guard + fast store + + // fallback) to a single `js_class_field_set_ic(...)` call. + // This trades a call frame on the (cold, startup- + // dominated) field-set path for a large per-site IR + // reduction, so clang -O0 — which oversized modules are + // forced to (#4880) — can actually compile the module. + // Only the call's own operands are materialized (the key + // handle + expected-keys), not the inline-store scaffolding. + if crate::codegen::full_outline_ic_enabled() { + let (key_raw, expected_keys) = { + let blk = ctx.block(); + let key_box = blk.load(DOUBLE, &key_handle_global); + let key_bits = blk.bitcast_double_to_i64(&key_box); + let key_raw = blk.and(I64, &key_bits, POINTER_MASK_I64); + let expected_keys = + blk.load(I64, &format!("@{}", keys_global_name)); + (key_raw, expected_keys) + }; + ctx.block().call_void( + "js_class_field_set_ic", + &[ + (I64, &site_id), + (DOUBLE, &recv_box), + (I32, &expected_class_id_str), + (I64, &expected_keys), + (I64, &key_raw), + (I32, &field_idx_str), + (DOUBLE, &val_double), + (I32, requires_raw_f64_str), + ], + ); + return Ok(val_double); + } // #5093: build the guard operands once, up front, so both // the inline shape pre-check and the guard-call fallback // can reference them. diff --git a/crates/perry-codegen/src/runtime_decls/objects.rs b/crates/perry-codegen/src/runtime_decls/objects.rs index e10916c27..24aaea2cf 100644 --- a/crates/perry-codegen/src/runtime_decls/objects.rs +++ b/crates/perry-codegen/src/runtime_decls/objects.rs @@ -120,6 +120,15 @@ pub fn declare_phase_b_objects(module: &mut LlModule) { VOID, &[I64, I64, I64, DOUBLE], ); + // #5334 lever B: class-field-SET inline cache, FULLY outlined. For oversized + // modules the whole diamond (guard + fast store + fallback) collapses to one + // call. Args: (site_id, recv, expected_class_id, expected_keys, key, + // field_index, value, require_raw_f64). Same signature as the set guard. + module.declare_function( + "js_class_field_set_ic", + VOID, + &[I64, DOUBLE, I32, I64, I64, I32, DOUBLE, I32], + ); module.declare_function( "js_typed_feedback_class_field_get_guard", I32, diff --git a/crates/perry-codegen/tests/typed_feedback.rs b/crates/perry-codegen/tests/typed_feedback.rs index 70673f2d5..bda7faff2 100644 --- a/crates/perry-codegen/tests/typed_feedback.rs +++ b/crates/perry-codegen/tests/typed_feedback.rs @@ -266,6 +266,11 @@ fn typed_feedback_instruments_property_and_method_boundaries() { #[test] fn typed_feedback_guards_direct_class_field_specialization() { + // Serialize against the lever-B test (#5334), which sets the process-global + // PERRY_FULL_OUTLINE_IC in this same test binary; pin it off so this test + // always observes the inline diamond. + let _lock = ENV_LOCK.lock().unwrap(); + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); let point = class(101, "Point", vec![field("x", Type::Number)]); let ir = ir_for(module_with_classes( "typed_feedback_class_field.ts", @@ -307,6 +312,103 @@ fn typed_feedback_guards_direct_class_field_specialization() { assert!(ir.contains("call double @js_object_get_field_by_name_f64")); } +#[test] +fn full_outline_ic_collapses_class_field_set_to_single_call() { + // #5334 lever B: when full-outline is enabled (oversized module, or forced + // via env), the entire class-field-SET diamond collapses to a single + // `js_class_field_set_ic` call — no guard call, no fast/fallback blocks. + let build = || { + let point = class(101, "Point", vec![field("x", Type::Number)]); + module_with_classes( + "full_outline_field.ts", + vec![point], + vec![param(1, "p", Type::Named("Point".to_string()))], + Type::Number, + vec![ + Stmt::Expr(Expr::PropertySet { + object: Box::new(Expr::LocalGet(1)), + property: "x".to_string(), + value: Box::new(Expr::Number(7.0)), + }), + Stmt::Return(Some(Expr::Number(0.0))), + ], + ) + }; + + let _lock = ENV_LOCK.lock().unwrap(); + + // Forced ON: one outlined call, no inline diamond. + { + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("1")); + let ir = ir_for(build()); + assert!(ir.contains("call void @js_class_field_set_ic")); + assert!(!ir.contains("class_field_set.fast")); + assert!(!ir.contains("class_field_set.fallback")); + assert!(!ir.contains("call i32 @js_typed_feedback_class_field_set_guard")); + } + + // Forced OFF (the default for normal-sized modules): the inline diamond, + // and no full-outline call. + { + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); + let ir = ir_for(build()); + assert!(!ir.contains("call void @js_class_field_set_ic")); + assert!(ir.contains("class_field_set.fast")); + assert!(ir.contains("js_typed_feedback_class_field_set_guard")); + } +} + +#[test] +fn full_outline_ic_auto_gate_counts_class_methods() { + // #5334 lever B: the auto size-gate counts class CALLABLES (methods, + // accessors, ctor), not just top-level `hir.functions`. A class-heavy module + // (the minified-bundle pathology) must trigger even though it has only one + // top-level function — class methods/closures don't live in `hir.functions`. + let mut big = class(150, "Big", vec![field("x", Type::Number)]); + for i in 0..6u32 { + big.methods.push(Function { + id: 200 + i, + name: format!("m{i}"), + type_params: Vec::new(), + params: Vec::new(), + return_type: Type::Number, + body: vec![Stmt::Return(Some(Expr::Number(0.0)))], + is_async: false, + is_generator: false, + is_strict: false, + is_exported: false, + captures: Vec::new(), + decorators: Vec::new(), + was_plain_async: false, + was_unrolled: false, + }); + } + let module = module_with_classes( + "auto_gate.ts", + vec![big], + vec![param(1, "p", Type::Named("Big".to_string()))], + Type::Number, + vec![ + Stmt::Expr(Expr::PropertySet { + object: Box::new(Expr::LocalGet(1)), + property: "x".to_string(), + value: Box::new(Expr::Number(7.0)), + }), + Stmt::Return(Some(Expr::Number(0.0))), + ], + ); + + let _lock = ENV_LOCK.lock().unwrap(); + // Auto path (override unset): callable count = 1 probe fn + 6 methods = 7, + // which clears MIN_FUNCS=5 even though `hir.functions.len()` is just 1. The + // pre-fix function-only count (1) would have stayed under the threshold. + let _ic = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", None); + let _min = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC_MIN_FUNCS", Some("5")); + let ir = ir_for(module); + assert!(ir.contains("call void @js_class_field_set_ic")); + assert!(!ir.contains("class_field_set.fast")); +} + #[test] fn class_field_set_elides_write_barrier_for_nonpointer_value() { // #5334 lever D: storing a value that is a non-pointer by construction @@ -314,6 +416,12 @@ fn class_field_set_elides_write_barrier_for_nonpointer_value() { // the generational write barrier, since the store creates no parent→child // heap reference. The layout note still fires (it tracks the slot's // pointer-ness). A value that may be a heap pointer keeps the barrier. + // + // Serialize against the lever-B full-outline test (#5334) and pin + // PERRY_FULL_OUTLINE_IC off, so this test always observes the inline diamond + // (`class_field_set.fast`) rather than the outlined call. + let _lock = ENV_LOCK.lock().unwrap(); + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); let build = |val: Expr| { let c = class(140, "Bx", vec![field("s", Type::String)]); module_with_classes( @@ -348,6 +456,10 @@ fn class_field_set_elides_write_barrier_for_nonpointer_value() { #[test] fn typed_feedback_guards_direct_class_method_specialization() { + // Serialize against the lever-B test (#5334) and pin full-outline off so the + // class's synthesized field-set keeps its inline fallback (asserted below). + let _lock = ENV_LOCK.lock().unwrap(); + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); let mut point = class(103, "Point", vec![field("x", Type::Number)]); point.methods.push(Function { id: 7, diff --git a/crates/perry-runtime/src/typed_feedback/guards.rs b/crates/perry-runtime/src/typed_feedback/guards.rs index 2fabad649..a6d56af3f 100644 --- a/crates/perry-runtime/src/typed_feedback/guards.rs +++ b/crates/perry-runtime/src/typed_feedback/guards.rs @@ -635,6 +635,84 @@ pub extern "C" fn js_class_field_set_fallback( ); } +/// Class-field-SET inline cache, FULLY OUTLINED (#5334, lever B). +/// +/// For pathologically-large modules (which are forced to `clang -O0`, where the +/// inline IC diamond's ~15-line-per-site expansion is never optimized away), +/// codegen replaces the ENTIRE diamond — guard call, fast slot store, and +/// fallback arm — with a single `call @js_class_field_set_ic(...)`. This trades +/// a function-call frame on the (cold, startup-dominated) field-set path for a +/// large reduction in emitted IR, so clang can actually compile the module. +/// +/// The body reproduces the diamond's exact semantics: +/// 1. run the same `js_typed_feedback_class_field_set_guard`; +/// 2. on a guard PASS, do the same slot store the inline fast block would — +/// a bare `f64` store for a `require_raw_f64` slot (pointer-free by typed +/// shape, no barrier), or `js_object_set_field` for a boxed slot (slot +/// write + layout note + write barrier); +/// 3. on a guard FAIL, record the fallback and route the write by name +/// (handles frozen / accessor / non-writable / setter-in-chain). +/// +/// Frozen/accessor/writable/setter handling all live behind the guard, so no +/// special-casing here. NB: the boxed store always emits the write barrier +/// (via `js_object_set_field`) — the compile-time non-pointer barrier elision +/// (#5334 lever D) does not apply on this path, an acceptable cost since the +/// full-outline path is gated to oversized, startup-dominated modules. +#[no_mangle] +pub extern "C" fn js_class_field_set_ic( + site_id: u64, + receiver: f64, + expected_class_id: u32, + expected_keys: *const ArrayHeader, + key: *const crate::StringHeader, + expected_field_index: u32, + value: f64, + require_raw_f64: i32, +) { + let guard_ok = js_typed_feedback_class_field_set_guard( + site_id, + receiver, + expected_class_id, + expected_keys, + key, + expected_field_index, + value, + require_raw_f64, + ); + + if guard_ok != 0 { + let object_addr = normalize_raw_object_addr(receiver.to_bits()); + if require_raw_f64 != 0 { + // Pointer-free raw-f64 slot: bare store, no GC barrier. + unsafe { + let fields_ptr = + (object_addr as *mut u8).add(std::mem::size_of::()) as *mut f64; + let slot = fields_ptr.add(expected_field_index as usize); + // GC_STORE_AUDIT(POINTER_FREE): a passing guard with + // require_raw_f64 proved the slot is pointer-free by typed-shape + // descriptor and the value is a plain number — identical to the + // inline `class_field_set.fast` raw-f64 store, which is barrier-free. + std::ptr::write(slot, value); + } + } else { + // Boxed slot: slot write + layout note + write barrier. + crate::object::js_object_set_field( + object_addr as *mut ObjectHeader, + expected_field_index, + crate::value::JSValue::from_bits(value.to_bits()), + ); + } + return; + } + + // Guard FAIL → identical to the cold guard-miss arm. Delegate to the shared + // fallback helper so by-name routing (frozen / accessor / setter-in-chain) + // stays defined in exactly one place. + let obj_bits = receiver.to_bits(); + let key_raw = key as u64 & crate::value::POINTER_MASK; + js_class_field_set_fallback(site_id, obj_bits, key_raw, value); +} + #[no_mangle] pub unsafe extern "C" fn js_typed_feedback_native_call_method( site_id: u64, @@ -872,6 +950,7 @@ mod keep_guard_symbols { #[used] static G0: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, i32) -> i32 = js_typed_feedback_class_field_get_guard; #[used] static G1: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, f64, i32) -> i32 = js_typed_feedback_class_field_set_guard; #[used] static G1C: extern "C" fn(u64, u64, u64, f64) = js_class_field_set_fallback; + #[used] static G1D: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, f64, i32) = js_class_field_set_ic; #[used] static G2: unsafe extern "C" fn(u64, f64, u32, *const ArrayHeader, *const i8, usize, *const u8) -> i32 = js_typed_feedback_method_direct_call_guard; #[used] static G3: extern "C" fn(u64, f64, *const u8, u32, u32) -> i32 = js_typed_feedback_closure_direct_call_guard; }