Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions crates/perry-codegen/src/codegen/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,77 @@ pub(crate) fn write_barriers_enabled() -> bool {
})
}

thread_local! {
static FULL_OUTLINE_IC: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
}

/// Lever B (#5334) full-outline gate for class-field IC diamonds. Set ONCE per
/// module at the top of `compile_module` (see [`decide_full_outline_ic`]), read
/// at each class-field-set lowering. Thread-local — NOT a process-global
/// `OnceLock` — because codegen runs one module per `compile_module` call and a
/// process-global would wrongly pin the first module's decision for the rest of
/// a multi-module build (and across tests). Codegen within a module is
/// sequential, so a thread-local is safe and avoids threading a flag through all
/// six `FnCtx` construction sites.
pub(crate) fn full_outline_ic_enabled() -> bool {
FULL_OUTLINE_IC.with(|c| c.get())
}

pub(crate) fn set_full_outline_ic(enabled: bool) {
FULL_OUTLINE_IC.with(|c| c.set(enabled));
}

/// Total number of LLVM functions a module will emit — top-level functions
/// plus every class callable (constructor, instance/static methods, computed
/// members, accessor get/set bodies). Used as the lever-B size proxy: class
/// methods and closures
/// do NOT live in `hir.functions`, so a class-heavy minified bundle (the exact
/// pathology lever B targets) can have a small `functions.len()` yet emit tens
/// of thousands of LLVM functions. Counting class callables keeps the gate from
/// silently under-counting and never firing on those modules.
pub(crate) fn module_callable_count(hir: &perry_hir::Module) -> usize {
let class_callables: usize = hir
.classes
.iter()
.map(|c| {
usize::from(c.constructor.is_some())
+ c.methods.len()
+ c.static_methods.len()
+ c.computed_members.len()
+ c.getters.len()
+ c.setters.len()
})
Comment thread
coderabbitai[bot] marked this conversation as resolved.
.sum();
hir.functions.len() + class_callables
}

/// Decide whether a module is large enough to warrant full-outlining its
/// class-field IC diamonds (#5334 lever B). Oversized modules are forced to
/// `clang -O0` (#4880), where the inline diamond's ~15-line-per-site expansion
/// is never optimized away; collapsing each site to one
/// `call @js_class_field_set_ic(...)` keeps the IR small enough for clang to
/// compile at all. Gated on the module's total callable count (see
/// [`module_callable_count`]) — the defining trait of the pathological
/// minified-bundle case (tens of thousands of callables in one module);
/// ordinary per-file modules stay on the inline diamond and keep the hot fast
/// store.
///
/// `PERRY_FULL_OUTLINE_IC=1`/`on`/`true` forces ON, `=0`/`off`/`false` forces
/// OFF; otherwise auto: `callable_count >= PERRY_FULL_OUTLINE_IC_MIN_FUNCS`
/// (default 4000).
pub(crate) fn decide_full_outline_ic(callable_count: usize) -> bool {
match std::env::var("PERRY_FULL_OUTLINE_IC").as_deref() {
Ok("1") | Ok("on") | Ok("true") => return true,
Ok("0") | Ok("off") | Ok("false") => return false,
_ => {}
}
let threshold = std::env::var("PERRY_FULL_OUTLINE_IC_MIN_FUNCS")
.ok()
.and_then(|v| v.parse::<usize>().ok())
.unwrap_or(4000);
callable_count >= threshold
}

pub(super) fn scoped_fn_name(module_prefix: &str, hir_name: &str) -> String {
// Use the INJECTIVE sanitizer (same as scoped_static_method_name): plain
// `sanitize` maps every non-`[A-Za-z0-9_]` char to `_`, so distinct minified
Expand Down
12 changes: 11 additions & 1 deletion crates/perry-codegen/src/codegen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ mod opts;
mod string_pool;

pub use helpers::resolve_target_triple;
pub(crate) use helpers::{default_target_triple, write_barriers_enabled};
pub(crate) use helpers::{
decide_full_outline_ic, default_target_triple, full_outline_ic_enabled, module_callable_count,
set_full_outline_ic, write_barriers_enabled,
};
pub use opts::{
AppMetadata, CompileOptions, FpContractMode, ImportedClass, NamespaceEntry, NamespaceEntryKind,
};
Expand Down Expand Up @@ -92,6 +95,13 @@ pub fn compile_module(hir: &HirModule, opts: CompileOptions) -> Result<Vec<u8>>
let triple = opts.target.clone().unwrap_or_else(default_target_triple);
let fp_flags = crate::block::FpFlags::new(opts.fast_math, opts.fp_contract_mode);

// #5334 lever B: decide ONCE, up front, whether this module is large enough
// to full-outline its class-field IC diamonds (read per-site during
// lowering via `full_outline_ic_enabled()`). Thread-local, so it must be set
// afresh for every module — including the `false` case, to clear any prior
// module's decision on this thread.
set_full_outline_ic(decide_full_outline_ic(module_callable_count(hir)));

let mut llmod = LlModule::new_with_fp_flags(&triple, fp_flags);
// Null guard global: a zeroed i32 used as a safe dereference target
// when a NaN-unboxed pointer is null/invalid. Prevents segfaults from
Expand Down
34 changes: 34 additions & 0 deletions crates/perry-codegen/src/expr/property_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,40 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result<String> {
.as_ref()
.is_some_and(crate::typed_shape::type_is_raw_f64_candidate);
let requires_raw_f64_str = if requires_raw_f64 { "1" } else { "0" };
// #5334 lever B: oversized modules full-outline the entire
// class-field-SET IC diamond (guard + fast store +
// fallback) to a single `js_class_field_set_ic(...)` call.
// This trades a call frame on the (cold, startup-
// dominated) field-set path for a large per-site IR
// reduction, so clang -O0 — which oversized modules are
// forced to (#4880) — can actually compile the module.
// Only the call's own operands are materialized (the key
// handle + expected-keys), not the inline-store scaffolding.
if crate::codegen::full_outline_ic_enabled() {
let (key_raw, expected_keys) = {
let blk = ctx.block();
let key_box = blk.load(DOUBLE, &key_handle_global);
let key_bits = blk.bitcast_double_to_i64(&key_box);
let key_raw = blk.and(I64, &key_bits, POINTER_MASK_I64);
let expected_keys =
blk.load(I64, &format!("@{}", keys_global_name));
(key_raw, expected_keys)
};
ctx.block().call_void(
"js_class_field_set_ic",
&[
(I64, &site_id),
(DOUBLE, &recv_box),
(I32, &expected_class_id_str),
(I64, &expected_keys),
(I64, &key_raw),
(I32, &field_idx_str),
(DOUBLE, &val_double),
(I32, requires_raw_f64_str),
],
);
return Ok(val_double);
}
// #5093: build the guard operands once, up front, so both
// the inline shape pre-check and the guard-call fallback
// can reference them.
Expand Down
9 changes: 9 additions & 0 deletions crates/perry-codegen/src/runtime_decls/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ pub fn declare_phase_b_objects(module: &mut LlModule) {
VOID,
&[I64, I64, I64, DOUBLE],
);
// #5334 lever B: class-field-SET inline cache, FULLY outlined. For oversized
// modules the whole diamond (guard + fast store + fallback) collapses to one
// call. Args: (site_id, recv, expected_class_id, expected_keys, key,
// field_index, value, require_raw_f64). Same signature as the set guard.
module.declare_function(
"js_class_field_set_ic",
VOID,
&[I64, DOUBLE, I32, I64, I64, I32, DOUBLE, I32],
);
module.declare_function(
"js_typed_feedback_class_field_get_guard",
I32,
Expand Down
112 changes: 112 additions & 0 deletions crates/perry-codegen/tests/typed_feedback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,11 @@ fn typed_feedback_instruments_property_and_method_boundaries() {

#[test]
fn typed_feedback_guards_direct_class_field_specialization() {
// Serialize against the lever-B test (#5334), which sets the process-global
// PERRY_FULL_OUTLINE_IC in this same test binary; pin it off so this test
// always observes the inline diamond.
let _lock = ENV_LOCK.lock().unwrap();
let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0"));
let point = class(101, "Point", vec![field("x", Type::Number)]);
let ir = ir_for(module_with_classes(
"typed_feedback_class_field.ts",
Expand Down Expand Up @@ -307,13 +312,116 @@ fn typed_feedback_guards_direct_class_field_specialization() {
assert!(ir.contains("call double @js_object_get_field_by_name_f64"));
}

#[test]
fn full_outline_ic_collapses_class_field_set_to_single_call() {
// #5334 lever B: when full-outline is enabled (oversized module, or forced
// via env), the entire class-field-SET diamond collapses to a single
// `js_class_field_set_ic` call — no guard call, no fast/fallback blocks.
let build = || {
let point = class(101, "Point", vec![field("x", Type::Number)]);
module_with_classes(
"full_outline_field.ts",
vec![point],
vec![param(1, "p", Type::Named("Point".to_string()))],
Type::Number,
vec![
Stmt::Expr(Expr::PropertySet {
object: Box::new(Expr::LocalGet(1)),
property: "x".to_string(),
value: Box::new(Expr::Number(7.0)),
}),
Stmt::Return(Some(Expr::Number(0.0))),
],
)
};

let _lock = ENV_LOCK.lock().unwrap();

// Forced ON: one outlined call, no inline diamond.
{
let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("1"));
let ir = ir_for(build());
assert!(ir.contains("call void @js_class_field_set_ic"));
assert!(!ir.contains("class_field_set.fast"));
assert!(!ir.contains("class_field_set.fallback"));
assert!(!ir.contains("call i32 @js_typed_feedback_class_field_set_guard"));
}

// Forced OFF (the default for normal-sized modules): the inline diamond,
// and no full-outline call.
{
let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0"));
let ir = ir_for(build());
assert!(!ir.contains("call void @js_class_field_set_ic"));
assert!(ir.contains("class_field_set.fast"));
assert!(ir.contains("js_typed_feedback_class_field_set_guard"));
}
}

#[test]
fn full_outline_ic_auto_gate_counts_class_methods() {
// #5334 lever B: the auto size-gate counts class CALLABLES (methods,
// accessors, ctor), not just top-level `hir.functions`. A class-heavy module
// (the minified-bundle pathology) must trigger even though it has only one
// top-level function — class methods/closures don't live in `hir.functions`.
let mut big = class(150, "Big", vec![field("x", Type::Number)]);
for i in 0..6u32 {
big.methods.push(Function {
id: 200 + i,
name: format!("m{i}"),
type_params: Vec::new(),
params: Vec::new(),
return_type: Type::Number,
body: vec![Stmt::Return(Some(Expr::Number(0.0)))],
is_async: false,
is_generator: false,
is_strict: false,
is_exported: false,
captures: Vec::new(),
decorators: Vec::new(),
was_plain_async: false,
was_unrolled: false,
});
}
let module = module_with_classes(
"auto_gate.ts",
vec![big],
vec![param(1, "p", Type::Named("Big".to_string()))],
Type::Number,
vec![
Stmt::Expr(Expr::PropertySet {
object: Box::new(Expr::LocalGet(1)),
property: "x".to_string(),
value: Box::new(Expr::Number(7.0)),
}),
Stmt::Return(Some(Expr::Number(0.0))),
],
);

let _lock = ENV_LOCK.lock().unwrap();
// Auto path (override unset): callable count = 1 probe fn + 6 methods = 7,
// which clears MIN_FUNCS=5 even though `hir.functions.len()` is just 1. The
// pre-fix function-only count (1) would have stayed under the threshold.
let _ic = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", None);
let _min = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC_MIN_FUNCS", Some("5"));
let ir = ir_for(module);
assert!(ir.contains("call void @js_class_field_set_ic"));
assert!(!ir.contains("class_field_set.fast"));
}

#[test]
fn class_field_set_elides_write_barrier_for_nonpointer_value() {
// #5334 lever D: storing a value that is a non-pointer by construction
// into a BOXED class field (a String slot — only Number is raw-f64) skips
// the generational write barrier, since the store creates no parent→child
// heap reference. The layout note still fires (it tracks the slot's
// pointer-ness). A value that may be a heap pointer keeps the barrier.
//
// Serialize against the lever-B full-outline test (#5334) and pin
// PERRY_FULL_OUTLINE_IC off, so this test always observes the inline diamond
// (`class_field_set.fast`) rather than the outlined call.
let _lock = ENV_LOCK.lock().unwrap();
let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0"));
let build = |val: Expr| {
let c = class(140, "Bx", vec![field("s", Type::String)]);
module_with_classes(
Expand Down Expand Up @@ -348,6 +456,10 @@ fn class_field_set_elides_write_barrier_for_nonpointer_value() {

#[test]
fn typed_feedback_guards_direct_class_method_specialization() {
// Serialize against the lever-B test (#5334) and pin full-outline off so the
// class's synthesized field-set keeps its inline fallback (asserted below).
let _lock = ENV_LOCK.lock().unwrap();
let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0"));
let mut point = class(103, "Point", vec![field("x", Type::Number)]);
point.methods.push(Function {
id: 7,
Expand Down
79 changes: 79 additions & 0 deletions crates/perry-runtime/src/typed_feedback/guards.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,84 @@ pub extern "C" fn js_class_field_set_fallback(
);
}

/// Class-field-SET inline cache, FULLY OUTLINED (#5334, lever B).
///
/// For pathologically-large modules (which are forced to `clang -O0`, where the
/// inline IC diamond's ~15-line-per-site expansion is never optimized away),
/// codegen replaces the ENTIRE diamond — guard call, fast slot store, and
/// fallback arm — with a single `call @js_class_field_set_ic(...)`. This trades
/// a function-call frame on the (cold, startup-dominated) field-set path for a
/// large reduction in emitted IR, so clang can actually compile the module.
///
/// The body reproduces the diamond's exact semantics:
/// 1. run the same `js_typed_feedback_class_field_set_guard`;
/// 2. on a guard PASS, do the same slot store the inline fast block would —
/// a bare `f64` store for a `require_raw_f64` slot (pointer-free by typed
/// shape, no barrier), or `js_object_set_field` for a boxed slot (slot
/// write + layout note + write barrier);
/// 3. on a guard FAIL, record the fallback and route the write by name
/// (handles frozen / accessor / non-writable / setter-in-chain).
///
/// Frozen/accessor/writable/setter handling all live behind the guard, so no
/// special-casing here. NB: the boxed store always emits the write barrier
/// (via `js_object_set_field`) — the compile-time non-pointer barrier elision
/// (#5334 lever D) does not apply on this path, an acceptable cost since the
/// full-outline path is gated to oversized, startup-dominated modules.
#[no_mangle]
pub extern "C" fn js_class_field_set_ic(
site_id: u64,
receiver: f64,
expected_class_id: u32,
expected_keys: *const ArrayHeader,
key: *const crate::StringHeader,
expected_field_index: u32,
value: f64,
require_raw_f64: i32,
) {
let guard_ok = js_typed_feedback_class_field_set_guard(
site_id,
receiver,
expected_class_id,
expected_keys,
key,
expected_field_index,
value,
require_raw_f64,
);

if guard_ok != 0 {
let object_addr = normalize_raw_object_addr(receiver.to_bits());
if require_raw_f64 != 0 {
// Pointer-free raw-f64 slot: bare store, no GC barrier.
unsafe {
let fields_ptr =
(object_addr as *mut u8).add(std::mem::size_of::<ObjectHeader>()) as *mut f64;
let slot = fields_ptr.add(expected_field_index as usize);
// GC_STORE_AUDIT(POINTER_FREE): a passing guard with
// require_raw_f64 proved the slot is pointer-free by typed-shape
// descriptor and the value is a plain number — identical to the
// inline `class_field_set.fast` raw-f64 store, which is barrier-free.
std::ptr::write(slot, value);
}
} else {
// Boxed slot: slot write + layout note + write barrier.
crate::object::js_object_set_field(
object_addr as *mut ObjectHeader,
expected_field_index,
crate::value::JSValue::from_bits(value.to_bits()),
);
}
return;
}

// Guard FAIL → identical to the cold guard-miss arm. Delegate to the shared
// fallback helper so by-name routing (frozen / accessor / setter-in-chain)
// stays defined in exactly one place.
let obj_bits = receiver.to_bits();
let key_raw = key as u64 & crate::value::POINTER_MASK;
js_class_field_set_fallback(site_id, obj_bits, key_raw, value);
}

#[no_mangle]
pub unsafe extern "C" fn js_typed_feedback_native_call_method(
site_id: u64,
Expand Down Expand Up @@ -872,6 +950,7 @@ mod keep_guard_symbols {
#[used] static G0: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, i32) -> i32 = js_typed_feedback_class_field_get_guard;
#[used] static G1: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, f64, i32) -> i32 = js_typed_feedback_class_field_set_guard;
#[used] static G1C: extern "C" fn(u64, u64, u64, f64) = js_class_field_set_fallback;
#[used] static G1D: extern "C" fn(u64, f64, u32, *const ArrayHeader, *const crate::StringHeader, u32, f64, i32) = js_class_field_set_ic;
#[used] static G2: unsafe extern "C" fn(u64, f64, u32, *const ArrayHeader, *const i8, usize, *const u8) -> i32 = js_typed_feedback_method_direct_call_guard;
#[used] static G3: extern "C" fn(u64, f64, *const u8, u32, u32) -> i32 = js_typed_feedback_closure_direct_call_guard;
}
Loading