From 8941249b670255edf848a81ca306ea55b1089281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 00:04:05 +0200 Subject: [PATCH] fix(string): replace/replaceAll detect a RegExp searchValue at runtime (#4871) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codegen's needle_is_regex detection covers RegExp literals and RegExp-typed locals only. A RegExp read from an object property (or destructured in a for...of) lowered as an opaque value and was ToString-coerced — str.replace(obj.regex, …) literally searched for "/foo/g" and silently returned the input unchanged (surfaced as a PII-redaction pass that did nothing in the native binary). Add js_string_replace_search_dyn / js_string_replace_all_search_dyn, which probe the registered-RegExp set before coercing and defer to the existing _dyn replacement-shape dispatchers; route every statically untyped searchValue through them. Statically known regex/string needles keep their existing direct paths. --- .../perry-codegen/src/lower_string_method.rs | 24 ++++++ .../src/runtime_decls/strings_part2.rs | 6 ++ crates/perry-runtime/src/regex/replace_fn.rs | 84 +++++++++++++++++++ 3 files changed, 114 insertions(+) diff --git a/crates/perry-codegen/src/lower_string_method.rs b/crates/perry-codegen/src/lower_string_method.rs index 5af3ba74bd..3ebf88f68d 100644 --- a/crates/perry-codegen/src/lower_string_method.rs +++ b/crates/perry-codegen/src/lower_string_method.rs @@ -457,6 +457,30 @@ pub(crate) fn lower_string_method( let repl_box = lower_expr(ctx, &args[1])?; let blk = ctx.block(); let recv_handle = unbox_str_handle(blk, &recv_box); + // #4871: a `searchValue` codegen can't type (an object-property + // read, a destructured loop binding, a call result) may still be + // a RegExp at runtime. ToString-coercing it here turned + // `str.replace(obj.regex, …)` into a literal search for "/foo/g" + // — a silent no-op. Route through the runtime search dispatcher, + // which checks the registered-RegExp set before coercing (and + // handles every replacement shape via the `_dyn` family). + if !needle_is_regex && !needle_is_str { + let runtime_fn = if property == "replaceAll" { + "js_string_replace_all_search_dyn" + } else { + "js_string_replace_search_dyn" + }; + let result = blk.call( + I64, + runtime_fn, + &[ + (I64, &recv_handle), + (DOUBLE, &needle_box), + (DOUBLE, &repl_box), + ], + ); + return Ok(nanbox_string_inline(blk, &result)); + } let needle_handle = if needle_is_regex || needle_is_str { unbox_str_handle(blk, &needle_box) } else { diff --git a/crates/perry-codegen/src/runtime_decls/strings_part2.rs b/crates/perry-codegen/src/runtime_decls/strings_part2.rs index d3f8c3114a..07ab7ff044 100644 --- a/crates/perry-codegen/src/runtime_decls/strings_part2.rs +++ b/crates/perry-codegen/src/runtime_decls/strings_part2.rs @@ -340,6 +340,12 @@ pub(crate) fn declare_phase_b_strings_part2(module: &mut LlModule) { module.declare_function("js_string_replace_all_string_dyn", I64, &[I64, I64, DOUBLE]); module.declare_function("js_string_replace_regex_dyn", I64, &[I64, I64, DOUBLE]); module.declare_function("js_string_replace_all_regex_dyn", I64, &[I64, I64, DOUBLE]); + module.declare_function("js_string_replace_search_dyn", I64, &[I64, DOUBLE, DOUBLE]); + module.declare_function( + "js_string_replace_all_search_dyn", + I64, + &[I64, DOUBLE, DOUBLE], + ); module.declare_function("js_string_replace_all_string_fn", I64, &[I64, I64, DOUBLE]); module.declare_function("js_string_replace_regex_fn", I64, &[I64, I64, DOUBLE]); module.declare_function("js_string_replace_all_regex_fn", I64, &[I64, I64, DOUBLE]); diff --git a/crates/perry-runtime/src/regex/replace_fn.rs b/crates/perry-runtime/src/regex/replace_fn.rs index 78ea7b47cc..a7c2366a98 100644 --- a/crates/perry-runtime/src/regex/replace_fn.rs +++ b/crates/perry-runtime/src/regex/replace_fn.rs @@ -298,6 +298,59 @@ pub extern "C" fn js_string_replace_all_string_dyn( js_string_replace_all_string(s, pattern, crate::builtins::js_string_coerce(replacement)) } +/// Resolve a runtime-dynamic `searchValue` (an object-property read, call +/// result, destructured loop binding, …) to a registered RegExp pointer, or +/// `None` when the value isn't a RegExp. +fn needle_regex_ptr(needle: f64) -> Option<*const crate::regex::RegExpHeader> { + let bits = needle.to_bits(); + let top16 = bits >> 48; + let addr = if top16 == 0x7FFD { + (bits & crate::value::POINTER_MASK) as usize + } else if top16 == 0 { + // Module-level slots store heap pointers as raw I64 bits. + bits as usize + } else { + return None; + }; + if crate::regex::is_regex_pointer(addr as *const u8) { + Some(addr as *const crate::regex::RegExpHeader) + } else { + None + } +} + +/// `searchValue` whose RegExp-ness is only knowable at RUNTIME (#4871): +/// codegen's static detection covers RegExp literals and RegExp-typed locals, +/// but a RegExp read back from an object property (or destructured in a +/// `for...of`) arrives as an opaque NaN-boxed value. Pre-fix it was +/// ToString-coerced to "/foo/g" and searched literally — replace silently +/// became a no-op. Dispatch on the registered-RegExp check, then defer to the +/// replacement-shape dispatchers. +#[no_mangle] +pub extern "C" fn js_string_replace_search_dyn( + s: *const StringHeader, + needle: f64, + replacement: f64, +) -> *mut StringHeader { + if let Some(re) = needle_regex_ptr(needle) { + return js_string_replace_regex_dyn(s, re, replacement); + } + js_string_replace_string_dyn(s, crate::builtins::js_string_coerce(needle), replacement) +} + +/// `replaceAll` twin of [`js_string_replace_search_dyn`]. +#[no_mangle] +pub extern "C" fn js_string_replace_all_search_dyn( + s: *const StringHeader, + needle: f64, + replacement: f64, +) -> *mut StringHeader { + if let Some(re) = needle_regex_ptr(needle) { + return js_string_replace_all_regex_dyn(s, re, replacement); + } + js_string_replace_all_string_dyn(s, crate::builtins::js_string_coerce(needle), replacement) +} + #[no_mangle] pub extern "C" fn js_string_replace_regex_dyn( s: *const StringHeader, @@ -330,3 +383,34 @@ pub extern "C" fn js_string_replace_all_regex_dyn( crate::builtins::js_string_coerce(replacement), ) } + +#[cfg(test)] +mod tests { + use super::*; + + /// #4871: a RegExp arriving as an opaque NaN-boxed value (object-property + /// read) must dispatch to the regex path, not be ToString-coerced into a + /// literal "/foo/g" search. + #[test] + fn search_dyn_dispatches_runtime_regex_and_coerces_non_regex() { + let s = js_string_from_str("foofoo"); + let pat = js_string_from_str("foo"); + let flags = js_string_from_str("g"); + let re = crate::regex::js_regexp_new(pat, flags); + let re_boxed = f64::from_bits(0x7FFD_0000_0000_0000u64 | (re as u64 & 0xFFFF_FFFF_FFFF)); + let repl = js_nanbox_string(js_string_from_str("X") as i64); + + // /foo/g: the g flag makes .replace substitute every match. + let out = js_string_replace_search_dyn(s, re_boxed, repl); + assert_eq!(string_as_str(out), "XX"); + + let out_all = js_string_replace_all_search_dyn(s, re_boxed, repl); + assert_eq!(string_as_str(out_all), "XX"); + + // Non-regex needle: ToString-coerce and search literally. + let needle_num = 12.0_f64; + let s2 = js_string_from_str("a12b"); + let out2 = js_string_replace_search_dyn(s2, needle_num, repl); + assert_eq!(string_as_str(out2), "aXb"); + } +}