Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions crates/perry-codegen/src/expr/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ use anyhow::{anyhow, Result};

use super::{
emit_array_numeric_write_note_on_block, emit_jsvalue_slot_store_on_block,
emit_write_barrier_slot_on_block, nanbox_pointer_inline, raw_f64_layout_fact, FnCtx,
emit_jsvalue_slot_store_scalar_aware_on_block, emit_write_barrier_slot_on_block,
nanbox_pointer_inline, raw_f64_layout_fact, FnCtx,
};
use crate::block::LlBlock;
use crate::nanbox::POINTER_MASK_I64;
Expand Down Expand Up @@ -218,7 +219,11 @@ pub(crate) fn lower_index_set_fast(
);
} else {
let (element_addr, element_ptr) = element_slot(blk, &arr_handle, &idx_i32);
let value_bits = emit_jsvalue_slot_store_on_block(
// In-place overwrite of a non-raw-layout (e.g. downgraded `any[]`)
// array element: the slot holds a valid value, so the scalar-aware
// note skips the GC layout hashmap on scalar-over-scalar stores
// (#5094 — ~9× on bench_numeric_array_downgrade).
let value_bits = emit_jsvalue_slot_store_scalar_aware_on_block(
blk,
&element_ptr,
val_double,
Expand Down
6 changes: 3 additions & 3 deletions crates/perry-codegen/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ pub(crate) use v8_interop::{
};
pub(crate) use write_barrier::{
emit_array_numeric_write_note_on_block, emit_jsvalue_slot_store_on_block,
emit_layout_note_slot_on_block, emit_root_heap_word_store_on_block,
emit_root_nanbox_store_on_block, emit_write_barrier, emit_write_barrier_slot_on_block,
lower_node_stream_super_init, lower_stream_super_init,
emit_jsvalue_slot_store_scalar_aware_on_block, emit_layout_note_slot_on_block,
emit_root_heap_word_store_on_block, emit_root_nanbox_store_on_block, emit_write_barrier,
emit_write_barrier_slot_on_block, lower_node_stream_super_init, lower_stream_super_init,
};

/// One in-flight inline-constructor return target. See
Expand Down
110 changes: 109 additions & 1 deletion crates/perry-codegen/src/expr/write_barrier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,28 @@ pub(crate) fn emit_layout_note_slot_on_block(
);
}

/// Scalar-aware layout note: passes the slot's previous value (`old_bits`) so
/// the runtime can skip the thread-local layout hashmap when the store does not
/// change the slot's pointer-ness (scalar-over-scalar). See
/// `js_gc_note_slot_layout_aware`.
pub(crate) fn emit_layout_note_slot_aware_on_block(
blk: &mut LlBlock,
parent_bits: &str,
slot_index: &str,
value_bits: &str,
old_bits: &str,
) {
blk.call_void(
"js_gc_note_slot_layout_aware",
&[
(I64, parent_bits),
(I32, slot_index),
(I64, value_bits),
(I64, old_bits),
],
);
}

pub(crate) fn emit_array_numeric_write_note_on_block(
blk: &mut LlBlock,
array_bits: &str,
Expand All @@ -94,14 +116,100 @@ pub(crate) fn emit_jsvalue_slot_store_on_block(
slot_addr: &str,
write_barrier_needed: bool,
) -> Option<String> {
emit_jsvalue_slot_store_on_block_inner(
blk,
slot_ptr,
value_double,
layout_parent_bits,
slot_index,
layout_note_needed,
barrier_parent_bits,
slot_addr,
write_barrier_needed,
false,
)
}

/// As [`emit_jsvalue_slot_store_on_block`], but for an **in-place element
/// overwrite** of a slot that already holds a valid value: routes the layout
/// note through `js_gc_note_slot_layout_aware`, which loads the previous slot
/// value and skips the thread-local layout hashmap when neither old nor new is
/// a heap pointer. Use only where the slot is guaranteed initialized (array
/// `arr[i] = …` overwrites), not for fresh-slot appends/literals or object
/// field writes (which are POINTER_FREE-dominated and only pay the extra load).
/// This is the dominant per-write cost on downgraded `any[]` numeric loops
/// (#5094) and gives ~9× on `bench_numeric_array_downgrade` without regressing
/// `bench_object_property`.
pub(crate) fn emit_jsvalue_slot_store_scalar_aware_on_block(
blk: &mut LlBlock,
slot_ptr: &str,
value_double: &str,
layout_parent_bits: &str,
slot_index: &str,
layout_note_needed: bool,
barrier_parent_bits: &str,
slot_addr: &str,
write_barrier_needed: bool,
) -> Option<String> {
emit_jsvalue_slot_store_on_block_inner(
blk,
slot_ptr,
value_double,
layout_parent_bits,
slot_index,
layout_note_needed,
barrier_parent_bits,
slot_addr,
write_barrier_needed,
true,
)
}

#[allow(clippy::too_many_arguments)]
fn emit_jsvalue_slot_store_on_block_inner(
blk: &mut LlBlock,
slot_ptr: &str,
value_double: &str,
layout_parent_bits: &str,
slot_index: &str,
layout_note_needed: bool,
barrier_parent_bits: &str,
slot_addr: &str,
write_barrier_needed: bool,
scalar_aware: bool,
) -> Option<String> {
// The scalar-aware layout note needs the slot's PREVIOUS value to decide
// whether the slot's pointer-ness actually changed; load it before the
// store overwrites it. Only when both a note is needed and the caller opted
// into the scalar-aware path (the slot is a valid in-place overwrite).
let old_bits = if scalar_aware && layout_note_needed {
let old_double = blk.load(DOUBLE, slot_ptr);
Some(blk.bitcast_double_to_i64(&old_double))
} else {
None
};
// GC_STORE_AUDIT(BARRIERED): generated heap JSValue stores route through this shared emitter.
blk.store(DOUBLE, value_double, slot_ptr);
if !layout_note_needed && !write_barrier_needed {
return None;
}
let value_bits = blk.bitcast_double_to_i64(value_double);
if layout_note_needed {
emit_layout_note_slot_on_block(blk, layout_parent_bits, slot_index, &value_bits);
match old_bits.as_deref() {
// Scalar-over-scalar stores leave the GC slot layout unchanged — the
// aware note skips the thread-local layout hashmap when neither the
// new nor the old value is a heap pointer (#5094).
Some(old) => emit_layout_note_slot_aware_on_block(
blk,
layout_parent_bits,
slot_index,
&value_bits,
old,
),
None => {
emit_layout_note_slot_on_block(blk, layout_parent_bits, slot_index, &value_bits)
}
}
}
if write_barrier_needed {
emit_write_barrier_slot_on_block(blk, barrier_parent_bits, slot_addr, &value_bits);
Expand Down
2 changes: 2 additions & 0 deletions crates/perry-codegen/src/runtime_decls/arrays.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ pub fn declare_phase_b_arrays(module: &mut LlModule) {
module.declare_function("js_write_barrier_root_nanbox", VOID, &[I64]);
module.declare_function("js_write_barrier_root_heap_word", VOID, &[I64]);
module.declare_function("js_gc_note_slot_layout", VOID, &[I64, I32, I64]);
// js_gc_note_slot_layout_aware(parent, slot_index, value_bits, old_bits)
module.declare_function("js_gc_note_slot_layout_aware", VOID, &[I64, I32, I64, I64]);
module.declare_function(
"js_gc_init_typed_shape_layout",
VOID,
Expand Down
27 changes: 27 additions & 0 deletions crates/perry-runtime/src/gc/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,33 @@ pub extern "C" fn js_gc_note_slot_layout(parent: u64, slot_index: u32, value_bit
layout_note_slot(parent_user, slot_index as usize, value_bits);
}

/// Scalar-aware variant of [`js_gc_note_slot_layout`]: `old_bits` is the value
/// previously held in the slot. When **neither** the new value nor the old
/// value is a heap pointer, the slot's pointer-ness is unchanged, so the
/// per-slot GC layout mask needs no update — the `SIDE_MASK`/typed path's
/// thread-local hashmap touch is skipped. The mask invariant ("bit set ⟺ slot
/// holds a pointer") is preserved because the full path still runs whenever a
/// pointer is involved on either side (`new` is a pointer → set; `old` was a
/// pointer → clear), which is exactly when the mask must change. This is the
/// dominant per-write cost on heterogeneous `any[]` numeric write loops
/// (stubbing `layout_note_slot` makes `bench_numeric_array_downgrade` 11×
/// faster). `layout_pointer_bearing_bits` is the same predicate the layout
/// machinery uses internally, so raw-pointer array slots are classified
/// correctly (not just NaN-boxed tags).
#[no_mangle]
pub extern "C" fn js_gc_note_slot_layout_aware(
parent: u64,
slot_index: u32,
value_bits: u64,
old_bits: u64,
) {
if !layout_pointer_bearing_bits(value_bits) && !layout_pointer_bearing_bits(old_bits) {
return;
}
let parent_user = strip_nanbox_user_ptr(parent);
layout_note_slot(parent_user, slot_index as usize, value_bits);
}

unsafe fn init_typed_shape_layout(
user_ptr: usize,
slot_count: usize,
Expand Down