diff --git a/crates/perry-codegen/src/expr/array_literal.rs b/crates/perry-codegen/src/expr/array_literal.rs index a4e8ddd9e..b1ca9e7e2 100644 --- a/crates/perry-codegen/src/expr/array_literal.rs +++ b/crates/perry-codegen/src/expr/array_literal.rs @@ -9,7 +9,7 @@ use super::{ nanbox_pointer_inline, FnCtx, }; use crate::type_analysis::is_numeric_expr; -use crate::types::{I32, I64, I8, PTR}; +use crate::types::{DOUBLE, I32, I64, I8, PTR}; /// Lower an array literal `[a, b, c, …]`. /// @@ -58,6 +58,26 @@ pub(crate) fn lower_array_literal(ctx: &mut FnCtx<'_>, elements: &[Expr]) -> Res vals.push(lower_expr(ctx, value_expr)?); } + // #5391: oversized modules outline array-literal construction. The inline + // bump-alloc + N×(store + layout-note + barrier) sequence makes minified + // data-table builders huge (single 18MB functions clang -O0 can't compile + // in practical time). Instead spill the already-evaluated element values to + // a per-literal stack buffer and build the array in ONE runtime call. The + // buffer is hoisted to the entry block (fixed size per site; bounded total + // stack) and consumed immediately by the call, so no GC-visible window. + if crate::codegen::full_outline_ic_enabled() { + let buf = ctx.func.alloca_entry_array(DOUBLE, n); + for (i, v) in vals.iter().enumerate() { + let slot = ctx.block().gep(DOUBLE, &buf, &[(I64, &i.to_string())]); + ctx.block().store(DOUBLE, v, &slot); + } + let n_str = n.to_string(); + let arr = ctx + .block() + .call(I64, "js_array_from_values", &[(PTR, &buf), (I32, &n_str)]); + return Ok(nanbox_pointer_inline(ctx.block(), &arr)); + } + // Inline bump-allocator path for small literals. Size threshold matches // `MAX_SCALAR_ARRAY_LEN` in collectors.rs so every candidate the escape // pass rejects can still benefit from the inline alloc. diff --git a/crates/perry-codegen/src/runtime_decls/arrays.rs b/crates/perry-codegen/src/runtime_decls/arrays.rs index db89ecf06..c5353f720 100644 --- a/crates/perry-codegen/src/runtime_decls/arrays.rs +++ b/crates/perry-codegen/src/runtime_decls/arrays.rs @@ -34,6 +34,9 @@ pub fn declare_phase_b_arrays(module: &mut LlModule) { // Exact-sized literal allocator — one call + N direct stores replaces // alloc + N×push_f64. See `js_array_alloc_literal` in perry-runtime/src/array.rs. module.declare_function("js_array_alloc_literal", I64, &[I32]); + // #5391: build an array literal from a stack buffer of N values in one call + // (outlines the inline alloc + per-element store/note/barrier). (values_ptr, n). + module.declare_function("js_array_from_values", I64, &[PTR, I32]); module.declare_function("js_array_push_f64", I64, &[I64, DOUBLE]); module.declare_function("js_array_push_hole", I64, &[I64]); module.declare_function("js_array_numeric_push_f64_unboxed", I64, &[I64, DOUBLE]); diff --git a/crates/perry-codegen/tests/typed_feedback.rs b/crates/perry-codegen/tests/typed_feedback.rs index b4acebd2e..565e91d40 100644 --- a/crates/perry-codegen/tests/typed_feedback.rs +++ b/crates/perry-codegen/tests/typed_feedback.rs @@ -406,6 +406,45 @@ fn full_outline_ic_collapses_class_field_get_to_single_call() { } } +#[test] +fn full_outline_array_literal_uses_builder_call() { + // #5391: full-outline replaces the inline array-literal construction + // (bump-alloc diamond + per-element store/note/barrier) with one + // `js_array_from_values` call over a stack buffer. + // A param-based array (not all-const) so it reaches `lower_array_literal` + // rather than const-folding to a flat rodata global. + let build = || { + module( + "outline_arr.ts", + vec![param(1, "x", Type::Number)], + Type::Any, + vec![Stmt::Return(Some(Expr::Array(vec![ + Expr::LocalGet(1), + Expr::Number(2.0), + Expr::LocalGet(1), + ])))], + ) + }; + + let _lock = ENV_LOCK.lock().unwrap(); + + { + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("1")); + let ir = ir_for(build()); + assert!(ir.contains("call i64 @js_array_from_values")); + assert!(!ir.contains("arrlit.fast")); + // the inline bump-alloc CALL (not its always-present declare) is gone + assert!(!ir.contains("call ptr @js_inline_arena_slow_alloc")); + } + { + // OFF: the inline construction path (whatever it is for this literal) — + // crucially NOT the outlined builder. + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); + let ir = ir_for(build()); + assert!(!ir.contains("call i64 @js_array_from_values")); + } +} + #[test] fn full_outline_ic_auto_gate_counts_class_methods() { // #5334 lever B: the auto size-gate counts class CALLABLES (methods, @@ -670,6 +709,11 @@ fn typed_feedback_guards_numeric_array_push_specialization() { #[test] fn typed_feedback_marks_numeric_array_literals() { + // Serialize against the array-literal full-outline test and pin it off, so + // this test always observes the inline numeric-array construction + // (js_array_mark_numeric_f64_layout) rather than the outlined builder. + let _lock = ENV_LOCK.lock().unwrap(); + let _g = EnvVarGuard::set("PERRY_FULL_OUTLINE_IC", Some("0")); let numeric_ir = ir_for(module( "typed_feedback_numeric_array_literal.ts", Vec::new(), diff --git a/crates/perry-runtime/src/array/alloc.rs b/crates/perry-runtime/src/array/alloc.rs index d0af6f73e..ed20cd7af 100644 --- a/crates/perry-runtime/src/array/alloc.rs +++ b/crates/perry-runtime/src/array/alloc.rs @@ -326,6 +326,41 @@ pub extern "C" fn js_array_alloc_literal(capacity: u32) -> *mut ArrayHeader { ptr } +/// #5391: build an array literal from a stack buffer of `n` pre-evaluated +/// element values in ONE call, replacing the inline alloc + per-element +/// store/layout-note/barrier sequence codegen otherwise emits at every literal +/// site. For oversized modules that inline expansion makes individual functions +/// enormous (a minified data-table builder reached 18MB of IR), which `clang +/// -O0` compiles in superlinear time; outlining the construction keeps the call +/// site to a buffer fill + one call. +/// +/// Mirrors the inline `lower_array_literal` semantics: allocate via +/// `js_array_alloc_literal` (length pre-set to `n`), copy each value, then note +/// the slot layout and emit the write barrier per element. Both GC helpers +/// no-op for non-pointer values, so the per-element calls are unconditional and +/// correct for any mix of numbers and heap references. +#[no_mangle] +pub extern "C" fn js_array_from_values(values: *const f64, n: u32) -> *mut ArrayHeader { + let arr = js_array_alloc_literal(n); + if values.is_null() || n == 0 { + return arr; + } + let parent = arr as u64; + let elems = unsafe { (arr as *mut u8).add(std::mem::size_of::()) as *mut f64 }; + for i in 0..n as usize { + let v = unsafe { *values.add(i) }; + let slot = unsafe { elems.add(i) }; + // GC_STORE_AUDIT(BARRIERED): element store immediately followed by the + // slot layout note + write barrier below, identical to the inline + // array-literal element store via emit_jsvalue_slot_store_on_block. + unsafe { core::ptr::write(slot, v) }; + let vbits = v.to_bits(); + crate::gc::js_gc_note_slot_layout(parent, i as u32, vbits); + crate::gc::js_write_barrier_slot(parent, slot as u64, vbits); + } + arr +} + /// Issue #179 Phase 2: if `arr` points at a `LazyArrayHeader` /// (`GcHeader::obj_type == GC_TYPE_LAZY_ARRAY`), force the lazy /// value to materialize and return the real `ArrayHeader` pointer.