From f5a3245969a7229b47655fb3e453a1deac9995b1 Mon Sep 17 00:00:00 2001 From: machineloop <3682072+machineloop@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:21:04 -0400 Subject: [PATCH] fix(runtime,codegen): demote unique strings stored into array elements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #5533, which demoted heap-stored unique strings to shared for object-field stores (`runtime_store_jsvalue_slot`) but left array-element stores uncovered. The same aliasing bug applies: a uniquely-owned (refcount==1) string written into an array element is aliased by the array, so a later in-place `+=` on the source local (`js_string_append`'s refcount==1 fast path) mutates the stored element and corrupts it. Array element stores reach the slot through several paths that don't share one choke point, so apply the same tag-checked demote (`js_string_addref_if_heap_string`, added in #5533 — a no-op for small-string-optimized / non-string values) at each. Codegen (the inline fast paths that bypass the runtime store helpers): - `emit_jsvalue_slot_store_on_block` (write_barrier.rs): the shared inline element-store emitter for array literals, `push`, and `arr[i] =`. Gated on the existing "value may be a heap pointer" flag, so numeric stores pay nothing. - the scalar-replaced array-literal init in let_stmt.rs (`const a = [s]`): demote the element where it is captured into its slot, before the deferred build — mirrors the object scalar-field demote. Runtime (the paths codegen hands off to a store helper): - `js_array_push_f64` (push realloc / forwarding / proxy paths; the grow path receives the already-demoted value). - `js_array_set_f64` / `js_array_set_f64_extend` (`arr[i] =`). - `js_array_from_values` (the outline array-literal construction path). - the splice inserted-items loop. Internal reshuffles (sort, splice tail shift, slice copy) only move values already stored in an array — already shared — so no demote is needed there. Tests: crates/perry/tests/string_append_heap_alias.rs gains 4 compile-run regression cases (array literal, `arr[i] =`, push, splice). Each stores a non-SSO refcount==1 string then grows the source; all fail without the demotes. --- .../perry-codegen/src/expr/write_barrier.rs | 11 ++ crates/perry-codegen/src/stmt/let_stmt.rs | 20 ++- crates/perry-runtime/src/array/alloc.rs | 5 + crates/perry-runtime/src/array/indexing.rs | 6 + crates/perry-runtime/src/array/push_pop.rs | 7 + .../perry-runtime/src/array/splice_slice.rs | 8 +- .../perry/tests/string_append_heap_alias.rs | 156 +++++++++++++++++- 7 files changed, 205 insertions(+), 8 deletions(-) diff --git a/crates/perry-codegen/src/expr/write_barrier.rs b/crates/perry-codegen/src/expr/write_barrier.rs index a9d60ce78..816384f67 100644 --- a/crates/perry-codegen/src/expr/write_barrier.rs +++ b/crates/perry-codegen/src/expr/write_barrier.rs @@ -204,6 +204,17 @@ fn emit_jsvalue_slot_store_on_block_inner( }; // GC_STORE_AUDIT(BARRIERED): generated heap JSValue stores route through this shared emitter. blk.store(DOUBLE, value_double, slot_ptr); + // A uniquely-owned (refcount==1) string written into this slot now aliases + // it — demote it to shared so a later in-place `+=` on the source local + // allocates fresh instead of mutating the stored element. This is the inline + // codegen choke point the runtime store functions (`js_array_push_f64`, …) + // are bypassed for on the fast paths. Tag-checked at runtime (a no-op for + // SSO / non-string), and only emitted when the value can be a heap pointer + // (`layout_note_needed`), so numeric stores pay nothing. Mirrors the + // object-field demote in `runtime_store_jsvalue_slot` (#5533). + if layout_note_needed { + blk.call_void("js_string_addref_if_heap_string", &[(DOUBLE, value_double)]); + } if !layout_note_needed && !write_barrier_needed { return None; } diff --git a/crates/perry-codegen/src/stmt/let_stmt.rs b/crates/perry-codegen/src/stmt/let_stmt.rs index c8f6c0454..4ddbfb8f0 100644 --- a/crates/perry-codegen/src/stmt/let_stmt.rs +++ b/crates/perry-codegen/src/stmt/let_stmt.rs @@ -2,7 +2,10 @@ use super::*; -use crate::expr::{emit_root_nanbox_store_on_block, lower_expr_with_expected_type}; +use crate::expr::{ + emit_root_nanbox_store_on_block, expr_produces_non_pointer_bits_by_construction, + lower_expr_with_expected_type, +}; use crate::native_value::{ AliasState, BufferAccessMode, BufferElem, BufferIndexUnit, BufferViewSlot, LengthSource, LoweredValue, MaterializationReason, NativeOwnedViewSlot, NativeRep, PodLayoutDecision, @@ -401,6 +404,21 @@ pub(crate) fn lower_let( } let v = lower_expr(ctx, elem)?; ctx.block().store(DOUBLE, &v, &slots[i]); + // A uniquely-owned string captured into this scalar-replaced + // array slot aliases its heap buffer; demote it to shared so a + // later in-place `+=` on the source local doesn't mutate the + // stored element. Only a `LocalGet` can be `+=`'d in place after + // the store; gate on "value may be a heap pointer" (not an exact + // `string` type) so `any`-typed and union locals are covered too. + // The runtime helper is tag-checked (a no-op for SSO / non-string), + // and proven-numeric locals are skipped to avoid the call. Mirrors + // the object scalar-field demote and the runtime array-store demotes. + let needs_string_demote = matches!(elem, perry_hir::Expr::LocalGet(_)) + && !expr_produces_non_pointer_bits_by_construction(ctx, elem); + if needs_string_demote { + ctx.block() + .call_void("js_string_addref_if_heap_string", &[(DOUBLE, &v)]); + } let lowered = LoweredValue { semantic: SemanticKind::JsValue, rep: NativeRep::JsValue, diff --git a/crates/perry-runtime/src/array/alloc.rs b/crates/perry-runtime/src/array/alloc.rs index ed20cd7af..b595ea9f5 100644 --- a/crates/perry-runtime/src/array/alloc.rs +++ b/crates/perry-runtime/src/array/alloc.rs @@ -350,6 +350,11 @@ pub extern "C" fn js_array_from_values(values: *const f64, n: u32) -> *mut Array for i in 0..n as usize { let v = unsafe { *values.add(i) }; let slot = unsafe { elems.add(i) }; + // A uniquely-owned string element now aliases this slot — demote it to + // shared so a later `s += x` allocates fresh instead of mutating the + // stored element. No-op for SSO / non-string. (This outline path doesn't + // funnel through `note_array_slot`.) + crate::string::js_string_addref_if_heap_string(v); // GC_STORE_AUDIT(BARRIERED): element store immediately followed by the // slot layout note + write barrier below, identical to the inline // array-literal element store via emit_jsvalue_slot_store_on_block. diff --git a/crates/perry-runtime/src/array/indexing.rs b/crates/perry-runtime/src/array/indexing.rs index 691234cb9..eebb4308d 100644 --- a/crates/perry-runtime/src/array/indexing.rs +++ b/crates/perry-runtime/src/array/indexing.rs @@ -757,6 +757,10 @@ pub extern "C" fn js_array_numeric_set_f64_unboxed( /// Note: This does NOT extend the array if index >= length #[no_mangle] pub extern "C" fn js_array_set_f64(arr: *mut ArrayHeader, index: u32, value: f64) { + // A uniquely-owned string assigned to an element (`arr[i] = s`) aliases this + // slot — demote it to shared so a later `s += x` doesn't mutate the stored + // element in place. No-op for SSO / non-string. + crate::string::js_string_addref_if_heap_string(value); let arr = clean_arr_ptr_mut(arr); if arr.is_null() { return; @@ -809,6 +813,8 @@ pub extern "C" fn js_array_set_f64_extend( index: u32, value: f64, ) -> *mut ArrayHeader { + // Demote a uniquely-owned string source — see `js_array_set_f64`. + crate::string::js_string_addref_if_heap_string(value); let arr = clean_arr_ptr_mut(arr); if arr.is_null() { return js_array_alloc(0); diff --git a/crates/perry-runtime/src/array/push_pop.rs b/crates/perry-runtime/src/array/push_pop.rs index f181adec9..b60eeba46 100644 --- a/crates/perry-runtime/src/array/push_pop.rs +++ b/crates/perry-runtime/src/array/push_pop.rs @@ -163,6 +163,13 @@ unsafe fn proxy_set_str_key(proxy: f64, key_bytes: &[u8], value: f64) { /// Returns a pointer to the (possibly reallocated) array #[no_mangle] pub extern "C" fn js_array_push_f64(arr: *mut ArrayHeader, value: f64) -> *mut ArrayHeader { + // A uniquely-owned (refcount==1) string pushed into the array aliases an + // element slot — demote it to shared so a later `s += x` on the source local + // allocates fresh instead of mutating the stored element in place. No-op for + // SSO / non-string. Done on the raw value, covering the inline, grow, and + // proxy paths below (mirrors the object-field demote in + // `runtime_store_jsvalue_slot`). + crate::string::js_string_addref_if_heap_string(value); // #5135: a Proxy whose static type is an array (immer drafts) reaches here // with the masked proxy id. Perform the spec `Array.prototype.push` for a // single element directly through the proxy's `get`/`set` traps: diff --git a/crates/perry-runtime/src/array/splice_slice.rs b/crates/perry-runtime/src/array/splice_slice.rs index abbe55e90..1874d24fd 100644 --- a/crates/perry-runtime/src/array/splice_slice.rs +++ b/crates/perry-runtime/src/array/splice_slice.rs @@ -138,8 +138,14 @@ pub extern "C" fn js_array_splice( // Insert new items if items_count > 0 && !items.is_null() { for i in 0..items_count as usize { + let item = *items.add(i); + // A uniquely-owned string spliced in now aliases the array slot — + // demote it to shared so a later `s += x` doesn't mutate it in + // place. No-op for SSO / non-string. (This insert path doesn't + // funnel through `note_array_slot`.) + crate::string::js_string_addref_if_heap_string(item); // GC_STORE_AUDIT(BARRIERED): splice inserted item writes are followed by layout/barrier rebuild. - ptr::write(elements_ptr.add(start_idx as usize + i), *items.add(i)); + ptr::write(elements_ptr.add(start_idx as usize + i), item); } } diff --git a/crates/perry/tests/string_append_heap_alias.rs b/crates/perry/tests/string_append_heap_alias.rs index 549fbd5a3..01fc3a2e3 100644 --- a/crates/perry/tests/string_append_heap_alias.rs +++ b/crates/perry/tests/string_append_heap_alias.rs @@ -114,12 +114,156 @@ console.log("o.v=" + o.v + " s=" + s); ); } -// NOTE: non-escaping ARRAY element stores (`const a = [s]`, `a.push(s)`, -// `a[i] = s`) have the same aliasing exposure as object fields, but the array -// store paths are numerous (small-literal `js_array_alloc` + `js_array_push_f64`, -// the inline bump-allocator, `js_array_from_values`, indexing/splice/…) and span -// ~9 runtime files. Covering them is tracked as a follow-up rather than bundled -// here, to keep this change focused on the object-field path it set out to fix. +// Array-element stores have the same aliasing exposure as object fields, across +// every store path: literal construction, `arr[i] = s`, `push`, and `splice`. +// Each stores a NON-SSO string made uniquely-owned (refcount==1) via a first +// append, then grows the source — the stored element must keep its value. These +// fail without the array-store demotes (`note_array_slot`, `js_array_from_values`, +// the splice insert, and the inline array-literal codegen). + +/// `const a = [s]` — small literal (lowers to `js_array_alloc` + `js_array_push_f64`). +#[test] +fn unique_string_stored_in_array_literal_is_not_corrupted() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + std::fs::write( + &entry, + r#" +let s = "prefix"; // non-SSO, shared literal +s += "_init"; // append on shared -> fresh heap string, refcount==1 +const a = [s]; // element store -> must demote s to shared +s += "_more"; // refcount==1 in-place append -> must NOT corrupt a[0] +console.log("a0=" + a[0] + " s=" + s); +"#, + ) + .expect("write entry"); + let out = compile_and_run(dir.path(), &entry); + assert!( + out.contains("a0=prefix_init s=prefix_init_more"), + "string stored in an array literal must not be corrupted by a later += (got: {out:?})" + ); +} + +/// `a[i] = s` — index assignment. +#[test] +fn unique_string_stored_via_index_set_is_not_corrupted() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + std::fs::write( + &entry, + r#" +const a = ["placeholder"]; +let s = "prefix"; +s += "_init"; +a[0] = s; // index-set -> must demote s to shared +s += "_more"; +console.log("a0=" + a[0] + " s=" + s); +"#, + ) + .expect("write entry"); + let out = compile_and_run(dir.path(), &entry); + assert!( + out.contains("a0=prefix_init s=prefix_init_more"), + "string stored via arr[i]= must not be corrupted by a later += (got: {out:?})" + ); +} + +/// `a.push(s)` — push. +#[test] +fn unique_string_pushed_into_array_is_not_corrupted() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + std::fs::write( + &entry, + r#" +const a = []; +let s = "prefix"; +s += "_init"; +a.push(s); // push -> must demote s to shared +s += "_more"; +console.log("a0=" + a[0] + " s=" + s); +"#, + ) + .expect("write entry"); + let out = compile_and_run(dir.path(), &entry); + assert!( + out.contains("a0=prefix_init s=prefix_init_more"), + "string pushed into an array must not be corrupted by a later += (got: {out:?})" + ); +} + +/// `a.splice(i, 0, s)` — splice insertion. +#[test] +fn unique_string_spliced_into_array_is_not_corrupted() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + std::fs::write( + &entry, + r#" +const a = ["head", "tail"]; +let s = "prefix"; +s += "_init"; +a.splice(1, 0, s); // splice insert -> must demote s to shared +s += "_more"; +console.log("a1=" + a[1] + " s=" + s); +"#, + ) + .expect("write entry"); + let out = compile_and_run(dir.path(), &entry); + assert!( + out.contains("a1=prefix_init s=prefix_init_more"), + "string spliced into an array must not be corrupted by a later += (got: {out:?})" + ); +} + +/// The OUTLINED array-literal construction path (`js_array_from_values`), forced +/// via `PERRY_FULL_OUTLINE_IC=1`. The array must escape so it isn't +/// scalar-replaced and routes through `lower_array_literal`'s outline branch. +#[test] +fn unique_string_in_outlined_array_literal_is_not_corrupted() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + std::fs::write( + &entry, + r#" +let s = "prefix"; +s += "_init"; +const a = [s]; // escapes below -> js_array_from_values (full-outline) +s += "_more"; +(globalThis as any).keep = a; // escape so the literal isn't scalar-replaced +console.log("a0=" + a[0] + " s=" + s); +"#, + ) + .expect("write entry"); + let output = dir.path().join("main_bin"); + let compile = Command::new(perry_bin()) + .current_dir(dir.path()) + .env("PERRY_FULL_OUTLINE_IC", "1") + .arg("compile") + .arg(&entry) + .arg("-o") + .arg(&output) + .output() + .expect("run perry compile"); + assert!( + compile.status.success(), + "perry compile failed\nstderr:\n{}", + String::from_utf8_lossy(&compile.stderr) + ); + let run = Command::new(&output).output().expect("run compiled binary"); + assert!( + run.status.success(), + "compiled binary failed\nstatus: {:?}\nstdout:\n{}\nstderr:\n{}", + run.status, + String::from_utf8_lossy(&run.stdout), + String::from_utf8_lossy(&run.stderr) + ); + let out = String::from_utf8_lossy(&run.stdout).to_string(); + assert!( + out.contains("a0=prefix_init s=prefix_init_more"), + "string in an outlined array literal must not be corrupted by a later += (got: {out:?})" + ); +} /// The snapshot-then-grow shape end to end: store the latest value into a heap /// field each step, then keep growing the source. Every stored snapshot must