From e71c161dc0475da73fbe90813d96a2a0f34b95ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Fri, 12 Jun 2026 23:15:16 +0200 Subject: [PATCH 01/15] fix(compile,runtime): 14 walls from Next.js standalone compile validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling a real Next.js 16.2.9 app (output:'standalone', whole framework in perry.compilePackages) surfaced 14 independent Perry bugs. Each was minimized and fixed; the binary now compiles, links (123MB), and boots through Next's environment-extension init chain. Compile-time: 1. perry-parser looks_like_es_module: track regex literals — picomatch's /(^[*!]|[/()[\]{}"])/ desynced the string state, masking the wrap's trailing `export default _cjs` (parsed as Script → ImportExportInScript). 2. collect_modules/codegen: unresolvable worker_threads Worker filename is a warning + runtime throw (js_throw_error_with_code), not a compile error — Next's build-time worker pools never run in production. 3. cjs_wrap detect: empty/whitespace-only file is CJS (react's client-only 0-byte index.js needs the wrap's `export default {}`). 4. cjs_wrap detect: ALL token scans run on comment/string/regex/ nested-template-stripped source. Four real-world defeats: "import " in a header comment; ESM `import` lines generated inside (nested) template literals (next/dist/build/utils.js writes server.js via `${m ? `import …` : `…`}`); regex-with-quote masking a trailing module.exports (comment-json). 5. cjs_wrap hoist_classes: hoist column-0 classes only — an indented class is nested inside a function; hoisting severed its closure over enclosing locals (ws event classes: `this[f]` → ReferenceError). 6. resolve: exports-map resolution walks ALL condition branches and takes the first target that exists on disk — Next's file tracing prunes @swc/helpers' esm/ (the `import` condition target) while cjs/ exists. Codegen: 7. lower_call/builtin #602 flip: ambiguous DB ctors (Client/Pool/Database/ DatabaseSync/Session/StatementSync/Redis/MongoClient/Decimal) fire only on POSITIVE import-source evidence. None is a Node global; undici's `var Client = class …` inside next/dist/compiled/@edge-runtime hit pg's arm and emitted an undefined js_pg_client_new link reference. 8. property_get: `.normalize` forces the String path only at ≤1 arg — Next's route normalizers define 2-arg normalize(pathname, matched). lower_string_method: surplus normalize args evaluated then ignored (spec) instead of a codegen error. 9. dyn_extern_i18n: a default-import alias of a Node builtin module used as a VALUE materializes the native namespace instead of TAG_TRUE. Runtime: 10. Event subclassing (ws CloseEvent/ErrorEvent/MessageEvent): js_event_subclass_init applies Event fields to the subclass `this`; js_register_class_parent_dynamic resolves globalThis builtin ctor parents through the shared global_builtin_constructor_class_id table (extracted from the dynamic instanceof path) and registers the chain edge; is_event_instance walks the parent chain; SuperCall arm in this_super_call.rs. 11. class_registry: `extends ` (stream.Writable, EventEmitter as BOUND_METHOD closures) keeps the parentless baseline instead of throwing "not a constructor". 12. reflect_support obj_value_has_own_key: native-module namespaces expose virtual keys — console.error's first defineProperty patch no longer records {configurable:false} ("new property" defaults), so Next's second/third console patches stop throwing. 13. node:timers is a real module value: submodule default object (was TAG_TRUE sentinel), namespace key table for member reads, and a GC-scanned native-namespace property override store so `require('node:timers').setImmediate = patched` round-trips (write via ordinary_set_with_receiver arm, read via vt_get_own_field). 14. field_get_set: `fn.toString` value read reifies like bind/call/apply (Next preserves patched-fn toString via original.toString.bind); native_module: Module.prototype{require} exists for require-hook's `mod.prototype.require` read+patch. Tests: unit tests added for parser regex/division, cjs detect (empty, comment-import, nested-template, regex-mask), exports candidates. Targeted suites green (perry-parser/hir/codegen/perry all green; perry-runtime 1025/1027 — date full_year is the known pre-existing macOS-local failure; url path_to_file_url + builtin_prototype_reject_new are sporadic thread-interaction flakes, pass in isolation and reproduce on clean main variations). Behavior-changing notes for review: (7) requires import evidence where the pre-#602 fallback fired on bare names; (4) is_commonjs now scans stripped source — files whose only CJS/ESM markers lived in comments or strings change classification (toward correctness). Code-only PR: no version bump / changelog per maintainer-fold convention. Validation harness (app, Node baseline, verify script): /tmp/perry-nextjs-demo — Node baseline 21/21 green; Perry binary boots to next/dist/compiled/zod init (errToObj, #838 family) — follow-up. --- .../perry-codegen/src/expr/dyn_extern_i18n.rs | 48 ++++ .../perry-codegen/src/expr/this_super_call.rs | 40 ++++ .../perry-codegen/src/lower_call/builtin.rs | 46 ++-- .../src/lower_call/property_get.rs | 10 +- .../perry-codegen/src/lower_string_method.rs | 20 +- .../src/runtime_decls/strings.rs | 5 + .../src/runtime_decls/strings_part2.rs | 7 + crates/perry-parser/src/lib.rs | 92 ++++++++ crates/perry-runtime/src/event_target.rs | 65 +++++- .../perry-runtime/src/node_submodules/mod.rs | 7 + .../src/object/class_registry.rs | 39 ++++ .../perry-runtime/src/object/field_get_set.rs | 5 + .../src/object/field_set_by_name.rs | 12 + crates/perry-runtime/src/object/instanceof.rs | 119 +++++----- .../perry-runtime/src/object/native_module.rs | 66 ++++++ .../src/object/reflect_support.rs | 19 ++ crates/perry-runtime/src/proxy.rs | 40 +++- .../src/commands/compile/cjs_wrap/detect.rs | 215 +++++++++++++++++- .../compile/cjs_wrap/hoist_classes.rs | 13 +- .../src/commands/compile/cjs_wrap/mod.rs | 70 ++++++ .../src/commands/compile/collect_modules.rs | 14 +- crates/perry/src/commands/compile/resolve.rs | 62 ++++- .../src/commands/compile/resolve/tests.rs | 39 ++++ 23 files changed, 943 insertions(+), 110 deletions(-) diff --git a/crates/perry-codegen/src/expr/dyn_extern_i18n.rs b/crates/perry-codegen/src/expr/dyn_extern_i18n.rs index 79ac272aa8..4618c6a5fb 100644 --- a/crates/perry-codegen/src/expr/dyn_extern_i18n.rs +++ b/crates/perry-codegen/src/expr/dyn_extern_i18n.rs @@ -59,6 +59,33 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { } else { double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)) }; + // An empty `paths` list means collect_modules could not resolve + // the filename statically (it warned at compile time). Many real + // packages construct Workers only on cold paths (e.g. Next.js + // build-time worker pools) — throw if one is actually reached at + // runtime instead of failing the whole compile. + if paths.is_empty() { + let msg = "worker_threads Worker filename was not statically \ + resolvable at compile time; constructing this Worker \ + is unsupported in the compiled binary"; + let msg_idx = ctx.strings.intern(msg); + let msg_entry = ctx.strings.entry(msg_idx); + let msg_bytes_global = format!("@{}", msg_entry.bytes_global); + let msg_len_str = msg_entry.byte_len.to_string(); + let blk = ctx.block(); + blk.call_void( + "js_throw_error_with_code", + &[ + (PTR, &msg_bytes_global), + (I64, &msg_len_str), + (PTR, &"null".to_string()), + (I64, &"0".to_string()), + (I32, &"0".to_string()), + ], + ); + blk.unreachable(); + return Ok(double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED))); + } if paths.len() != 1 { bail!( "worker_threads Worker requires exactly one compile-time-resolved filename, got {}", @@ -550,6 +577,27 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { &[(PTR, &name_bytes_global), (I64, &name_len)], )); } + // A default-import alias of a Node builtin module used as a VALUE + // (`const nodeTimers = require('node:timers')`, adopted to an + // import by the CJS wrap) — materialize the real native-module + // namespace object so member reads, monkey-patch writes, and + // enumeration behave. Previously fell through to TAG_TRUE: + // `typeof nodeTimers === "boolean"` and Next.js's + // fast-set-immediate extension threw on + // `nodeTimers.setImmediate = patched` at startup. + if let Some(source) = ctx.imported_class_sources.get(name) { + let bare = source.strip_prefix("node:").unwrap_or(source).to_string(); + if perry_hir::is_node_builtin_module(&bare) { + let module_label = emit_string_literal_global(ctx, &bare); + let module_len = bare.len(); + let blk = ctx.block(); + return Ok(blk.call( + DOUBLE, + "js_create_native_module_namespace", + &[(PTR, &module_label), (I64, &module_len.to_string())], + )); + } + } Ok(double_literal(f64::from_bits(crate::nanbox::TAG_TRUE))) } diff --git a/crates/perry-codegen/src/expr/this_super_call.rs b/crates/perry-codegen/src/expr/this_super_call.rs index e9fdfecf2b..264b117054 100644 --- a/crates/perry-codegen/src/expr/this_super_call.rs +++ b/crates/perry-codegen/src/expr/this_super_call.rs @@ -198,6 +198,8 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { | "TransformStream" | "Request" | "Response" + | "Event" + | "CustomEvent" ) || is_other_builtin_constructor_name(parent_name.as_str()); if !is_builtin_parent_name { if let Some(extends_expr) = current_class.extends_expr.as_deref() { @@ -361,6 +363,44 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { // handle at runtime (see `fetch_subclass_handle_id`). This // makes `class Request extends GlobalRequest {}` — exactly // what `@hono/node-server` does — produce a working Request. + // `class X extends Event` / `extends CustomEvent` (the `ws` + // package's CloseEvent/ErrorEvent/MessageEvent): `super(type, + // options)` initializes the standard Event fields/methods onto + // `this`. The `X → Event` registry edge (registered at class- + // definition time via js_register_class_parent_dynamic) keeps + // `instanceof Event` and EventTarget dispatch acceptance. + if matches!(parent_name.as_str(), "Event" | "CustomEvent") { + let undef = double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)); + let mut lowered: Vec = Vec::with_capacity(super_args.len()); + for a in super_args { + lowered.push(lower_expr(ctx, a)?); + } + let arg0 = lowered.first().cloned().unwrap_or_else(|| undef.clone()); + let arg1 = lowered.get(1).cloned().unwrap_or_else(|| undef.clone()); + let this_box = match ctx.this_stack.last().cloned() { + Some(slot) => ctx.block().load(DOUBLE, &slot), + None => undef.clone(), + }; + let argc = super_args.len().min(2).to_string(); + ctx.block().call( + DOUBLE, + "js_event_subclass_init", + &[ + (DOUBLE, &this_box), + (DOUBLE, &arg0), + (DOUBLE, &arg1), + (I32, &argc), + ], + ); + let current_class_name = + ctx.class_stack.last().cloned().unwrap_or_default(); + crate::lower_call::apply_field_initializers_recursive( + ctx, + ¤t_class_name, + crate::lower_call::FieldInitMode::SelfOnly, + )?; + return Ok(double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED))); + } let fetch_subclass_fn = match parent_name.as_str() { "Request" => Some("js_request_subclass_init"), "Response" => Some("js_response_subclass_init"), diff --git a/crates/perry-codegen/src/lower_call/builtin.rs b/crates/perry-codegen/src/lower_call/builtin.rs index c255657b55..33765c6fb9 100644 --- a/crates/perry-codegen/src/lower_call/builtin.rs +++ b/crates/perry-codegen/src/lower_call/builtin.rs @@ -28,33 +28,35 @@ pub(super) fn lower_builtin_new( args: &[Expr], ) -> Result> { // Issue #602: ambiguously-named built-in constructors (Client / Pool / - // Database / Redis / MongoClient / Decimal) collide with default-import - // aliases from unrelated packages — `import Client from "better-sqlite3"` - // would otherwise dispatch through pg's Client arm and emit an undefined - // `js_pg_client_new` reference at link time. When `class_name` matches an - // ambiguous arm AND we know the import source is NOT the package the arm - // is for, return `None` so `lower_new` falls through to the generic path. - // Names without a recorded import source (top-level globals, locally- - // defined classes already filtered upstream, etc.) keep their pre-#602 - // behavior — the arm still fires. + // Database / Redis / MongoClient / Decimal) collide with bindings from + // unrelated packages — `import Client from "better-sqlite3"` would + // otherwise dispatch through pg's Client arm and emit an undefined + // `js_pg_client_new` reference at link time. None of these names is a + // Node global, so the arm fires ONLY on positive evidence: a recorded + // import binding whose source is the arm's package (the CJS wrap's + // require-adoption records these too). Names without a matching import + // source fall through to the generic path — this covers function-scoped + // class expressions like undici's `var Client = class _Client …` inside + // bundled vendor code (Next.js `@edge-runtime/primitives`), which are + // invisible to `ctx.classes` and previously hit pg's arm, breaking the + // link of any program that bundles undici without importing pg. let import_src = ctx .imported_class_sources .get(class_name) .map(|s| s.as_str()); - let arm_mismatches_source = match (class_name, import_src) { - ("Client", Some(src)) => src != "pg", - ("Pool", Some(src)) => src != "pg", - ("Database", Some(src)) => src != "better-sqlite3", - ("DatabaseSync", Some(src)) => src != "sqlite", - ("Session", Some(src)) => src != "sqlite", - ("StatementSync", Some(src)) => src != "sqlite", - ("Redis", Some(src)) => src != "ioredis" && src != "redis", - ("MongoClient", Some(src)) => src != "mongodb", - ("Decimal", Some(src)) => src != "decimal.js", - _ => false, + let required_sources: Option<&[&str]> = match class_name { + "Client" | "Pool" => Some(&["pg"]), + "Database" => Some(&["better-sqlite3"]), + "DatabaseSync" | "Session" | "StatementSync" => Some(&["sqlite", "node:sqlite"]), + "Redis" => Some(&["ioredis", "redis"]), + "MongoClient" => Some(&["mongodb"]), + "Decimal" => Some(&["decimal.js"]), + _ => None, }; - if arm_mismatches_source { - return Ok(None); + if let Some(sources) = required_sources { + if !import_src.is_some_and(|src| sources.contains(&src)) { + return Ok(None); + } } match class_name { "Utf8Stream" diff --git a/crates/perry-codegen/src/lower_call/property_get.rs b/crates/perry-codegen/src/lower_call/property_get.rs index fb8e251f7e..dc36fdad3c 100644 --- a/crates/perry-codegen/src/lower_call/property_get.rs +++ b/crates/perry-codegen/src/lower_call/property_get.rs @@ -329,7 +329,7 @@ pub fn try_lower_property_get_method_call( "split" | "charCodeAt" | "charAt" | "trim" | "trimStart" | "trimEnd" | "substring" | "substr" | "toLowerCase" | "toUpperCase" | "toLocaleLowerCase" | "toLocaleUpperCase" | "replaceAll" | "padStart" | "padEnd" | "repeat" - | "normalize" | "codePointAt" | "localeCompare" => true, + | "codePointAt" | "localeCompare" => true, // Annex B §B.2.2 HTML wrappers (`bold`, `link`, `anchor`, …) are // string-only in the spec but collide with common user method // names — chalk's `chalk.bold(s)` is a styled-string builder @@ -337,6 +337,9 @@ pub fn try_lower_property_get_method_call( // to its source text and wrapped it in ``. An Any-typed // receiver that really is a string still gets them via the // `jsval.is_string()` arm of `js_native_call_method`. + // (`normalize` is intentionally NOT in this unconditional list — the + // arg-gated `"normalize" if args.len() <= 1` arm below handles it so + // user 2-arg `normalize(pathname, matched)` methods fall through.) // Issue #638: `replace` is also string-exclusive, but routing // it here unconditionally caused regressions in async dispatch // pathways. Only fire when args[1] is statically detectable as @@ -363,6 +366,11 @@ pub fn try_lower_property_get_method_call( // startsWith / endsWith only exist on String — both 1-arg // and 2-arg (searchString, position) forms route here. "startsWith" | "endsWith" if args.len() == 1 || args.len() == 2 => true, + // `normalize` is string-exclusive only at 0/1 args. User classes + // commonly define 2-arg `normalize(pathname, matched)` methods + // (Next.js route normalizers) — those must fall through to the + // runtime dispatcher instead of erroring on String arity. + "normalize" if args.len() <= 1 => true, "lastIndexOf" if args.len() == 1 => true, _ => false, }; diff --git a/crates/perry-codegen/src/lower_string_method.rs b/crates/perry-codegen/src/lower_string_method.rs index 3ebf88f68d..8b6808a2bf 100644 --- a/crates/perry-codegen/src/lower_string_method.rs +++ b/crates/perry-codegen/src/lower_string_method.rs @@ -735,19 +735,19 @@ pub(crate) fn lower_string_method( Ok(nanbox_string_inline(blk, &result)) } "normalize" => { - // 0 or 1 arg. The runtime applies ToString + form validation: - // omitted (undefined) → NFC default; explicit null/""/"BAD" → - // RangeError. Pass the raw NaN-boxed form value (#2782). - if args.len() > 1 { - bail!( - "perry-codegen: String.normalize expects 0 or 1 args, got {}", - args.len() - ); - } + // Takes the form from args[0]; per spec, surplus args are + // evaluated then ignored. The runtime applies ToString + form + // validation: omitted (undefined) → NFC default; explicit + // null/""/"BAD" → RangeError. Pass the raw NaN-boxed form + // value (#2782). let form_box = if args.is_empty() { crate::nanbox::double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)) } else { - lower_expr(ctx, &args[0])? + let form = lower_expr(ctx, &args[0])?; + for extra in &args[1..] { + let _ = lower_expr(ctx, extra)?; + } + form }; let blk = ctx.block(); let recv_handle = unbox_str_handle(blk, &recv_box); diff --git a/crates/perry-codegen/src/runtime_decls/strings.rs b/crates/perry-codegen/src/runtime_decls/strings.rs index c27e08532f..6cb883d8cd 100644 --- a/crates/perry-codegen/src/runtime_decls/strings.rs +++ b/crates/perry-codegen/src/runtime_decls/strings.rs @@ -310,6 +310,11 @@ pub fn declare_phase_b_strings(module: &mut LlModule) { VOID, &[PTR, I64, PTR, I64], ); + // Generic "throw Error/TypeError/RangeError with optional Node `.code`". + // Args: (msg_ptr, msg_len, code_ptr, code_len, kind). Used by the + // WorkerNew unresolved-path fallback. Helper diverges (`-> !`); declared + // as void-return for LLVM purposes. + module.declare_function("js_throw_error_with_code", VOID, &[PTR, I64, PTR, I64, I32]); module.declare_function("js_map_set", I64, &[I64, DOUBLE, DOUBLE]); module.declare_function("js_map_get", DOUBLE, &[I64, DOUBLE]); module.declare_function("js_map_has", I32, &[I64, DOUBLE]); diff --git a/crates/perry-codegen/src/runtime_decls/strings_part2.rs b/crates/perry-codegen/src/runtime_decls/strings_part2.rs index 07ab7ff044..88358d0121 100644 --- a/crates/perry-codegen/src/runtime_decls/strings_part2.rs +++ b/crates/perry-codegen/src/runtime_decls/strings_part2.rs @@ -1191,6 +1191,13 @@ pub(crate) fn declare_phase_b_strings_part2(module: &mut LlModule) { module.declare_function("js_abort_signal_throw_if_aborted", DOUBLE, &[I64]); module.declare_function("js_event_target_new", I64, &[]); module.declare_function("js_event_new", I64, &[DOUBLE, DOUBLE, I32]); + // `super(type, options)` from `class X extends Event/CustomEvent` — + // initializes Event fields onto the existing subclass `this`. + module.declare_function( + "js_event_subclass_init", + DOUBLE, + &[DOUBLE, DOUBLE, DOUBLE, I32], + ); module.declare_function("js_custom_event_new", I64, &[DOUBLE, DOUBLE, I32]); module.declare_function("js_dom_exception_new", I64, &[DOUBLE, DOUBLE]); module.declare_function("js_event_target_add_event_listener", VOID, &[I64, I64, I64]); diff --git a/crates/perry-parser/src/lib.rs b/crates/perry-parser/src/lib.rs index 99c1639ce1..8d87ab0f6e 100644 --- a/crates/perry-parser/src/lib.rs +++ b/crates/perry-parser/src/lib.rs @@ -353,6 +353,73 @@ fn looks_like_es_module(source: &str) -> bool { Some(end) } + // A `/` starts a regex literal (not division) when the preceding token + // cannot end an expression: an operator/punctuator, start of input, or a + // keyword like `return`. Regex literals may contain unescaped quote chars + // (e.g. picomatch's `/(^[*!]|[/()[\]{}"])/`), which would desync the + // string-state scan below if skipped as ordinary code. + fn regex_can_start_here(bytes: &[u8], slash_at: usize) -> bool { + let mut i = slash_at; + while i > 0 { + i -= 1; + match bytes[i] { + b' ' | b'\t' | b'\r' | b'\n' => continue, + b'=' | b'(' | b',' | b':' | b'[' | b'!' | b'&' | b'|' | b'?' | b'{' | b'}' + | b';' | b'+' | b'-' | b'*' | b'%' | b'~' | b'^' | b'<' | b'>' => return true, + c if is_ident(c) => { + let end = i + 1; + let mut start = end; + while start > 0 && is_ident(bytes[start - 1]) { + start -= 1; + } + return matches!( + &bytes[start..end], + b"return" + | b"typeof" + | b"instanceof" + | b"in" + | b"of" + | b"case" + | b"do" + | b"else" + | b"void" + | b"delete" + | b"throw" + | b"new" + | b"yield" + | b"await" + ); + } + _ => return false, + } + } + true + } + + // Returns the index just past the closing `/`, or None if no regex + // terminator is found on this line (then it was division after all). + fn skip_regex_literal(bytes: &[u8], slash_at: usize) -> Option { + let mut i = slash_at + 1; + let mut in_class = false; + while i < bytes.len() { + match bytes[i] { + b'\\' => i += 2, + b'\n' => return None, + b'[' => { + in_class = true; + i += 1; + } + b']' => { + in_class = false; + i += 1; + } + b'/' if !in_class => return Some(i + 1), + _ => i += 1, + } + } + None + } + let bytes = source.as_bytes(); let mut i = 0; let mut state = State::Code; @@ -368,6 +435,11 @@ fn looks_like_es_module(source: &str) -> bool { } else if bytes[i] == b'/' && bytes.get(i + 1) == Some(&b'*') { state = State::BlockComment; i += 2; + } else if bytes[i] == b'/' && regex_can_start_here(bytes, i) { + match skip_regex_literal(bytes, i) { + Some(end) => i = end, + None => i += 1, + } } else { if prev_allows_module_item(bytes, i) { if let Some(end) = next_after_keyword(bytes, i, b"export") { @@ -707,6 +779,26 @@ pub fn swc_span_to_span(swc_span: swc_common::Span, file_id: FileId) -> Span { mod tests { use super::*; + #[test] + fn test_looks_like_es_module_survives_regex_with_quote() { + // Regression: picomatch's bundled source contains a regex literal with + // an unescaped `"` inside a character class. The module-detection scan + // must not enter string state there, or a trailing `export` (appended + // by the CJS wrap) is missed and the file parses as a Script. + let source = "const re = /(^[*!]|[/()[\\]{}\"])/;\nconst x = \"ok\";\nexport default x;\n"; + let module = parse_typescript(source, "vendored.js").unwrap(); + assert_eq!(module.body.len(), 3); + } + + #[test] + fn test_division_not_treated_as_regex() { + // `a / b` must not be consumed as a regex literal that would swallow + // the following string quote. + let source = "const a = 1, b = 2;\nconst c = a / b; const s = \"x\";\nexport default c;\n"; + let module = parse_typescript(source, "math.js").unwrap(); + assert_eq!(module.body.len(), 4); + } + #[test] fn test_parse_simple_function() { let source = r#" diff --git a/crates/perry-runtime/src/event_target.rs b/crates/perry-runtime/src/event_target.rs index aa8f4e3c62..cf57d60fd6 100644 --- a/crates/perry-runtime/src/event_target.rs +++ b/crates/perry-runtime/src/event_target.rs @@ -217,6 +217,21 @@ fn construct_event( if event.is_null() { return std::ptr::null_mut(); } + init_event_fields(event, type_value, options, constructor_name, detail); + event +} + +/// Shared Event field/method initialization, applied either to a freshly +/// allocated Event (`construct_event`) or to an existing subclass instance +/// (`js_event_subclass_init` — `super(type, options)` from +/// `class X extends Event`). +fn init_event_fields( + event: *mut ObjectHeader, + type_value: f64, + options: f64, + constructor_name: &[u8], + detail: Option, +) { let type_ptr = string_from_value(type_value); set_event_field( event, @@ -263,15 +278,61 @@ fn construct_event( "stopImmediatePropagation", event_stop_immediate_propagation_thunk, ); - event } +/// `super(type, options)` from a user `class X extends Event` / +/// `extends CustomEvent`: initialize the standard Event fields and methods +/// onto the EXISTING subclass instance (`this`) instead of allocating a new +/// Event. The subclass's own class id stays on the header — the +/// `Subclass → Event` registry edge registered at class-definition time +/// keeps `instanceof Event` and dispatch acceptance working. +#[no_mangle] +pub extern "C" fn js_event_subclass_init( + this_value: f64, + type_value: f64, + options: f64, + argc: u32, +) -> f64 { + let Some(event) = value_as_ptr::(this_value) else { + return undefined_value(); + }; + if argc == 0 { + throw_missing_arg("type"); + } + init_event_fields(event, type_value, options, b"Event", None); + undefined_value() +} + +/// Keepalive anchor for the auto-optimize whole-program build — +/// `js_event_subclass_init` is a generated-code-only callee. +#[used] +static KEEP_JS_EVENT_SUBCLASS_INIT: extern "C" fn(f64, f64, f64, u32) -> f64 = + js_event_subclass_init; + fn is_event_instance(event: *const ObjectHeader) -> bool { if event.is_null() { return false; } let class_id = unsafe { (*event).class_id }; - class_id == CLASS_ID_EVENT || class_id == CLASS_ID_CUSTOM_EVENT + if class_id == CLASS_ID_EVENT || class_id == CLASS_ID_CUSTOM_EVENT { + return true; + } + // A user subclass (`class CloseEvent extends Event`, e.g. the `ws` + // package's WebSocket events) carries its own class id; walk the + // registered parent chain looking for the Event base. + let mut cur = class_id; + for _ in 0..64 { + match crate::object::get_parent_class_id(cur) { + Some(parent) if parent != 0 && parent != cur => { + if parent == CLASS_ID_EVENT || parent == CLASS_ID_CUSTOM_EVENT { + return true; + } + cur = parent; + } + _ => return false, + } + } + false } /// `new Event(type, options?)`. diff --git a/crates/perry-runtime/src/node_submodules/mod.rs b/crates/perry-runtime/src/node_submodules/mod.rs index aa0ca62e69..91c8828991 100644 --- a/crates/perry-runtime/src/node_submodules/mod.rs +++ b/crates/perry-runtime/src/node_submodules/mod.rs @@ -985,6 +985,13 @@ fn submodule_has_default_object(submod_key: &str) -> bool { | "stream_consumers" | "stream_web" | "test_reporters" + // `const nodeTimers = require('node:timers')` (Next.js's + // fast-set-immediate extension) — without a default object the + // binding read the TAG_TRUE sentinel, so member reads were + // undefined and the `nodeTimers.setImmediate = patched` + // monkey-patch threw at module init. + | "timers" + | "timers_promises" ) } diff --git a/crates/perry-runtime/src/object/class_registry.rs b/crates/perry-runtime/src/object/class_registry.rs index 993de20828..ade80c3b8c 100644 --- a/crates/perry-runtime/src/object/class_registry.rs +++ b/crates/perry-runtime/src/object/class_registry.rs @@ -39,6 +39,22 @@ fn is_non_constructable_builtin_function_value(value: f64) -> bool { super::native_module::builtin_closure_is_non_constructable_value(value) } +/// True when `value` is a bound native-module method/export closure +/// (`BOUND_METHOD_FUNC_PTR` trampoline — what a `require('stream').Writable` +/// property read produces). These represent real Node classes/functions and +/// must be accepted as `extends` targets. +fn is_bound_native_method_closure_value(value: f64) -> bool { + use crate::value::JSValue; + let jv = JSValue::from_bits(value.to_bits()); + if !jv.is_pointer() { + return false; + } + let raw_ptr = jv.as_pointer::(); + let closure_ptr = crate::closure::clean_closure_ptr(raw_ptr); + let func_ptr = crate::closure::get_valid_func_ptr(closure_ptr); + !func_ptr.is_null() && func_ptr == crate::closure::BOUND_METHOD_FUNC_PTR +} + fn throw_non_constructable_builtin_function() -> ! { super::object_ops::throw_object_type_error(b"Function is not a constructor") } @@ -4301,6 +4317,29 @@ pub extern "C" fn js_register_class_parent(class_id: u32, parent_class_id: u32) /// recursive helper that returns its receiver can't create a cycle. #[no_mangle] pub extern "C" fn js_register_class_parent_dynamic(class_id: u32, parent_value: f64) { + // A globalThis builtin constructor closure is a valid superclass + // (`class CloseEvent extends Event` — the `ws` package's WebSocket + // events). Resolve it through the same name table the dynamic + // `instanceof` path uses and register the edge when the builtin has a + // runtime class id, so subclass instances satisfy `instanceof Event` + // and Event-shaped dispatch gates. Builtins without a class id keep the + // parentless baseline (no throw — they ARE constructors). + if let Some(name) = identify_global_builtin_constructor(parent_value) { + let parent_cid = super::instanceof::global_builtin_constructor_class_id(name); + if parent_cid != 0 && parent_cid != class_id { + register_class(class_id, parent_cid); + } + return; + } + // A bound native-module export (`const { Writable } = require('stream'); + // class Receiver extends Writable` — the `ws` package's shape) is a real + // Node constructor even though Perry models it as a BOUND_METHOD closure. + // Keep the parentless baseline rather than mis-throwing; native-parent + // method inheritance is handled by codegen's extends_name machinery, not + // by this registry edge. + if is_bound_native_method_closure_value(parent_value) { + return; + } // Spec: a non-`null` superclass that is not a constructor throws a TypeError // at class-definition time (before any `.prototype` access). (Test262 // subclass/superclass-* and definition/invalid-extends.) diff --git a/crates/perry-runtime/src/object/field_get_set.rs b/crates/perry-runtime/src/object/field_get_set.rs index 2adf3103e5..23594f5572 100644 --- a/crates/perry-runtime/src/object/field_get_set.rs +++ b/crates/perry-runtime/src/object/field_get_set.rs @@ -2681,6 +2681,11 @@ fn reified_function_method_name(name: &str) -> Option<&'static [u8]> { "call" => Some(b"call"), "apply" => Some(b"apply"), "isPrototypeOf" => Some(b"isPrototypeOf"), + // `fn.toString` read as a VALUE (`original.toString.bind(original)` — + // Next.js's unhandled-rejection extension preserves patched-function + // toString this way). Previously read back `undefined`, so the + // subsequent `.bind` threw "Bind must be called on a function". + "toString" => Some(b"toString"), _ => None, } } diff --git a/crates/perry-runtime/src/object/field_set_by_name.rs b/crates/perry-runtime/src/object/field_set_by_name.rs index 0f06136a9b..e16f604e10 100644 --- a/crates/perry-runtime/src/object/field_set_by_name.rs +++ b/crates/perry-runtime/src/object/field_set_by_name.rs @@ -617,6 +617,18 @@ pub extern "C" fn js_object_set_field_by_name( super::set_buffer_pool_size(value); return; } + // CommonJS module exports are MUTABLE in Node: monkey-patching + // like Next.js's `require('node:timers').setImmediate = patched` + // must store the override (read back via `vt_get_own_field`) + // instead of falling through to the frozen-object throw. + if !module_name.is_empty() && property_name != "__module__" { + super::native_module::native_namespace_prop_override_store( + &module_name, + property_name, + value, + ); + return; + } } // Refs #486 (hono): class setter dispatch. JS spec: a `set X(...)` diff --git a/crates/perry-runtime/src/object/instanceof.rs b/crates/perry-runtime/src/object/instanceof.rs index a68632f773..c7e0de1fa3 100644 --- a/crates/perry-runtime/src/object/instanceof.rs +++ b/crates/perry-runtime/src/object/instanceof.rs @@ -269,59 +269,7 @@ pub extern "C" fn js_instanceof_dynamic(value: f64, type_ref: f64) -> f64 { } _ => {} } - let class_id = match name { - // Reference-type global constructors used as runtime *values* - // (e.g. `Function.prototype[Symbol.hasInstance].call(Map, m)`, or a - // dynamic `x instanceof ctorVar`). These mirror the synthetic ids - // the compile-time `instanceof` operator emits — see - // perry-codegen/src/expr/instance_misc1.rs — which `js_instanceof` - // resolves via the per-type registries (#3662). `Array`/`Object`/ - // `Date` carry their own coercion thunks rather than the shared - // noop thunk; #4102 added those thunks to the - // `identify_global_builtin_constructor` allow-list so the dynamic / - // reflective path now resolves them here just like the literal-RHS - // operator does at compile time. - "Map" => 0xFFFF0022, - "Set" => 0xFFFF0023, - "RegExp" => 0xFFFF0021, - "ArrayBuffer" => 0xFFFF0025, - "Array" => 0xFFFF0024, - "Object" => 0xFFFF0050, - "Function" => CLASS_ID_FUNCTION, - "Number" => 0xFFFF00D0, - "String" => 0xFFFF00D1, - "Boolean" => 0xFFFF00D2, - "BigInt" => 0xFFFF00D3, - "Symbol" => 0xFFFF00D4, - "Date" => 0xFFFF0020, - "Error" => crate::error::CLASS_ID_ERROR, - "TypeError" => crate::error::CLASS_ID_TYPE_ERROR, - "RangeError" => crate::error::CLASS_ID_RANGE_ERROR, - "ReferenceError" => crate::error::CLASS_ID_REFERENCE_ERROR, - "SyntaxError" => crate::error::CLASS_ID_SYNTAX_ERROR, - "EvalError" => crate::error::CLASS_ID_EVAL_ERROR, - "URIError" => crate::error::CLASS_ID_URI_ERROR, - "AggregateError" => crate::error::CLASS_ID_AGGREGATE_ERROR, - "Promise" => CLASS_ID_PROMISE, - "Navigator" => crate::navigator::NAVIGATOR_CLASS_ID, - "TextEncoderStream" => crate::object::CLASS_ID_TEXT_ENCODER_STREAM, - "TextDecoderStream" => crate::object::CLASS_ID_TEXT_DECODER_STREAM, - "CompressionStream" => crate::object::CLASS_ID_COMPRESSION_STREAM, - "DecompressionStream" => crate::object::CLASS_ID_DECOMPRESSION_STREAM, - "Event" => crate::event_target::CLASS_ID_EVENT, - "CustomEvent" => crate::event_target::CLASS_ID_CUSTOM_EVENT, - "DOMException" => crate::event_target::CLASS_ID_DOM_EXCEPTION, - // TypedArray constructors used as runtime *values* (a dynamic - // `x instanceof TA` where `TA` is a variable — e.g. test262's - // `testWithTypedArrayConstructors`). Mirrors the per-kind synthetic - // ids the compile-time `instanceof Float64Array` operator resolves. - "Int8Array" | "Uint8Array" | "Uint8ClampedArray" | "Int16Array" | "Uint16Array" - | "Int32Array" | "Uint32Array" | "Float16Array" | "Float32Array" | "Float64Array" - | "BigInt64Array" | "BigUint64Array" => crate::typedarray::kind_for_name(name) - .map(crate::typedarray::class_id_for_kind) - .unwrap_or(0), - _ => 0, - }; + let class_id = global_builtin_constructor_class_id(name); if class_id != 0 { return js_instanceof(value, class_id); } @@ -333,6 +281,71 @@ pub extern "C" fn js_instanceof_dynamic(value: f64, type_ref: f64) -> f64 { f64::from_bits(TAG_FALSE) }; } + return js_instanceof_dynamic_tail(value, type_ref); +} + +/// Runtime class id for a globalThis built-in constructor *name*. +/// +/// Reference-type global constructors used as runtime values (e.g. +/// `Function.prototype[Symbol.hasInstance].call(Map, m)`, or a dynamic +/// `x instanceof ctorVar`). These mirror the synthetic ids the compile-time +/// `instanceof` operator emits — see perry-codegen/src/expr/instance_misc1.rs +/// — which `js_instanceof` resolves via the per-type registries (#3662). +/// `Array`/`Object`/`Date` carry their own coercion thunks rather than the +/// shared noop thunk; #4102 added those thunks to the +/// `identify_global_builtin_constructor` allow-list so the dynamic / +/// reflective path resolves them just like the literal-RHS operator does at +/// compile time. Also consulted by `js_register_class_parent_dynamic` so a +/// user `class X extends Event` registers the `X → Event` chain edge. +/// Returns 0 for names without a runtime class id. +pub(crate) fn global_builtin_constructor_class_id(name: &str) -> u32 { + match name { + "Map" => 0xFFFF0022, + "Set" => 0xFFFF0023, + "RegExp" => 0xFFFF0021, + "ArrayBuffer" => 0xFFFF0025, + "Array" => 0xFFFF0024, + "Object" => 0xFFFF0050, + "Function" => CLASS_ID_FUNCTION, + "Number" => 0xFFFF00D0, + "String" => 0xFFFF00D1, + "Boolean" => 0xFFFF00D2, + "BigInt" => 0xFFFF00D3, + "Symbol" => 0xFFFF00D4, + "Date" => 0xFFFF0020, + "Error" => crate::error::CLASS_ID_ERROR, + "TypeError" => crate::error::CLASS_ID_TYPE_ERROR, + "RangeError" => crate::error::CLASS_ID_RANGE_ERROR, + "ReferenceError" => crate::error::CLASS_ID_REFERENCE_ERROR, + "SyntaxError" => crate::error::CLASS_ID_SYNTAX_ERROR, + "EvalError" => crate::error::CLASS_ID_EVAL_ERROR, + "URIError" => crate::error::CLASS_ID_URI_ERROR, + "AggregateError" => crate::error::CLASS_ID_AGGREGATE_ERROR, + "Promise" => CLASS_ID_PROMISE, + "Navigator" => crate::navigator::NAVIGATOR_CLASS_ID, + "TextEncoderStream" => crate::object::CLASS_ID_TEXT_ENCODER_STREAM, + "TextDecoderStream" => crate::object::CLASS_ID_TEXT_DECODER_STREAM, + "CompressionStream" => crate::object::CLASS_ID_COMPRESSION_STREAM, + "DecompressionStream" => crate::object::CLASS_ID_DECOMPRESSION_STREAM, + "Event" => crate::event_target::CLASS_ID_EVENT, + "CustomEvent" => crate::event_target::CLASS_ID_CUSTOM_EVENT, + "DOMException" => crate::event_target::CLASS_ID_DOM_EXCEPTION, + // TypedArray constructors used as runtime *values* (a dynamic + // `x instanceof TA` where `TA` is a variable — e.g. test262's + // `testWithTypedArrayConstructors`). Mirrors the per-kind synthetic + // ids the compile-time `instanceof Float64Array` operator resolves. + "Int8Array" | "Uint8Array" | "Uint8ClampedArray" | "Int16Array" | "Uint16Array" + | "Int32Array" | "Uint32Array" | "Float16Array" | "Float32Array" | "Float64Array" + | "BigInt64Array" | "BigUint64Array" => crate::typedarray::kind_for_name(name) + .map(crate::typedarray::class_id_for_kind) + .unwrap_or(0), + _ => 0, + } +} + +#[inline] +fn js_instanceof_dynamic_tail(value: f64, type_ref: f64) -> f64 { + use crate::value::TAG_FALSE; if crate::node_submodules::is_diagnostics_bounded_channel_constructor_value(type_ref) { return if crate::node_submodules::diagnostics_bounded_channel_is_instance_value(value) { f64::from_bits(crate::value::TAG_TRUE) diff --git a/crates/perry-runtime/src/object/native_module.rs b/crates/perry-runtime/src/object/native_module.rs index 7b1d9b2b92..4637bc660a 100644 --- a/crates/perry-runtime/src/object/native_module.rs +++ b/crates/perry-runtime/src/object/native_module.rs @@ -36,6 +36,32 @@ thread_local! { static MODULE_CJS_GLOBAL_PATHS_VALUE: Cell = const { Cell::new(0) }; static NATIVE_MODULE_NAMESPACES: RefCell> = RefCell::new(HashMap::new()); + /// User overrides of native-module namespace properties, keyed + /// `"{module}\0{prop}"`. CommonJS module exports are MUTABLE in Node — + /// monkey-patching like Next.js's + /// `require('node:timers').setImmediate = patched` must store and win + /// subsequent property reads instead of throwing read-only. + static NATIVE_NAMESPACE_PROP_OVERRIDES: RefCell> = + RefCell::new(HashMap::new()); +} + +/// Store a user override for a native-module namespace property +/// (`require('node:timers').setImmediate = fn`). Wins subsequent reads via +/// `vt_get_own_field`. +pub(crate) fn native_namespace_prop_override_store(module: &str, prop: &str, value: f64) { + NATIVE_NAMESPACE_PROP_OVERRIDES.with(|m| { + m.borrow_mut() + .insert(format!("{module}\0{prop}"), value.to_bits()); + }); +} + +/// Read back a stored native-namespace property override, if any. +pub(crate) fn native_namespace_prop_override_get(module: &str, prop: &str) -> Option { + NATIVE_NAMESPACE_PROP_OVERRIDES.with(|m| { + m.borrow() + .get(&format!("{module}\0{prop}")) + .map(|bits| f64::from_bits(*bits)) + }) } fn bound_native_method_length(name: &str) -> Option { @@ -59,6 +85,12 @@ pub fn scan_native_callable_export_roots_mut(visitor: &mut crate::gc::RuntimeRoo visitor.visit_nanbox_u64_slot(value_bits); } }); + NATIVE_NAMESPACE_PROP_OVERRIDES.with(|cache| { + let mut cache = cache.borrow_mut(); + for value_bits in cache.values_mut() { + visitor.visit_nanbox_u64_slot(value_bits); + } + }); NATIVE_MODULE_ACCESSOR_EXPORTS.with(|cache| { let mut cache = cache.borrow_mut(); for value_bits in cache.values_mut() { @@ -2936,6 +2968,18 @@ pub(crate) fn native_module_enumerable_keys(module_name: &str) -> Option<&'stati VM_NAMESPACE_KEYS }), "vm.constants" => Some(VM_CONSTANTS_KEYS), + // Plain `timers` was missing — `require('node:timers').setImmediate` + // read undefined (Next.js's fast-set-immediate extension reads and + // patches it at module init). + "timers" => Some(&[ + b"setTimeout", + b"clearTimeout", + b"setInterval", + b"clearInterval", + b"setImmediate", + b"clearImmediate", + b"promises", + ]), "timers/promises" => Some(&[b"setTimeout", b"setImmediate", b"setInterval", b"scheduler"]), "readline/promises" => Some(&[b"Interface", b"Readline", b"createInterface"]), "zlib" => Some(&[b"codes"]), @@ -4545,6 +4589,23 @@ fn attach_module_cjs_constructor_statics(closure_addr: usize) { bound_native_callable_export_value("module", name), ); } + // `Module.prototype` — Node's require-hook pattern (Next.js): + // `const mod = require('module'); const orig = mod.prototype.require; + // mod.prototype.require = function(request) {…}`. Expose a plain object + // carrying a `require` method so the read+patch round-trips; the patch + // is inert under AOT compilation (Perry resolves modules at compile + // time), but startup must not throw on the access. + let proto = js_object_alloc(0, 1); + native_set_field( + proto, + "require", + bound_native_callable_export_value("module", "_load"), + ); + crate::closure::closure_set_dynamic_prop( + closure_addr, + "prototype", + crate::value::js_nanbox_pointer(proto as i64), + ); } fn native_color_tuple(open: i32, close: i32) -> f64 { @@ -8440,6 +8501,11 @@ unsafe fn vt_get_own_field( } let property_name = std::str::from_utf8(std::slice::from_raw_parts(key_ptr, key_len)).unwrap_or(""); + // A user override (`require('node:timers').setImmediate = patched`) + // wins all built-in resolution below — CJS exports are mutable in Node. + if let Some(value) = native_namespace_prop_override_get(&module_name, property_name) { + return Some(JSValue::from_bits(value.to_bits())); + } if matches!( module_name, "process" | "process.namespace" | "process.default" diff --git a/crates/perry-runtime/src/object/reflect_support.rs b/crates/perry-runtime/src/object/reflect_support.rs index 018ae51f2e..d6a3f6b08e 100644 --- a/crates/perry-runtime/src/object/reflect_support.rs +++ b/crates/perry-runtime/src/object/reflect_support.rs @@ -87,6 +87,25 @@ pub(crate) fn obj_value_has_own_key(value: f64, key: f64) -> bool { }; return super::has_own_helpers::closure_own_key_present(obj_addr, &key_name); } + // Native-module namespaces (console, fs, …) expose their members as + // VIRTUAL keys — dispatch tables, not keys_array entries. Mirror the + // `js_object_get_own_property_descriptor` arm so a redefinition like + // `Object.defineProperty(console, 'error', { value })` (Next.js + // patches console methods this way, repeatedly) is treated as + // redefining an EXISTING property — absent descriptor attributes then + // retain the property's writable/enumerable/configurable=true + // defaults instead of collapsing to the new-property `false`s (which + // made the SECOND patch throw `Cannot redefine property`). + if (*obj).class_id == super::native_module::NATIVE_MODULE_CLASS_ID { + if let (Some(module_name), Some(key_name)) = ( + super::native_module::read_native_module_name(obj), + key_to_rust_string(key), + ) { + if super::native_module::native_module_has_enumerable_key(&module_name, &key_name) { + return true; + } + } + } let key_str = crate::builtins::js_string_coerce(key); if key_str.is_null() { return false; diff --git a/crates/perry-runtime/src/proxy.rs b/crates/perry-runtime/src/proxy.rs index 64650d5c98..efdf0cb0dc 100644 --- a/crates/perry-runtime/src/proxy.rs +++ b/crates/perry-runtime/src/proxy.rs @@ -1107,14 +1107,6 @@ fn ordinary_set_with_receiver(target: f64, key: f64, value: f64, receiver: f64) return ok; } - // #5054 fast path: the spec walk below probes own_set_descriptor on the - // target, which ends in a LINEAR keys_array scan — so every dynamic - // `obj[key] = v` was O(own-key-count) and building a wide dynamic object - // quadratic (10k props ~ 12s). When nothing the walk models can apply — - // plain GC_TYPE_OBJECT receiver written as itself, no property/accessor - // descriptor ever installed in the process (monotonic global), no class - // machinery (class_id 0), no recorded setPrototypeOf target, extensible, - // string key — the write reduces to the ordinary data-property store. // #5054 fast path: the spec walk below probes own_set_descriptor on the // target, which ends in a LINEAR keys_array scan — so every dynamic // `obj[key] = v` was O(own-key-count) and building a wide dynamic object @@ -1161,6 +1153,38 @@ fn ordinary_set_with_receiver(target: f64, key: f64, value: f64, receiver: f64) } } + // CommonJS native-module namespaces are MUTABLE in Node — monkey-patching + // like Next.js's `require('node:timers').setImmediate = patched` must + // store the override (read back through the namespace vtable's + // `get_own_field`) rather than reporting the built-in member + // non-writable and throwing under strict mode. + { + let jv = crate::value::JSValue::from_bits(target.to_bits()); + if jv.is_pointer() { + let obj = extract_pointer(target.to_bits()) as *const crate::object::ObjectHeader; + if !obj.is_null() && unsafe { (*obj).class_id } == crate::object::NATIVE_MODULE_CLASS_ID + { + let module_name = unsafe { crate::object::get_module_name_from_namespace(target) }; + if let (false, Some(prop)) = + (module_name.is_empty(), property_key_to_rust_string(key)) + { + if prop != "__module__" { + if module_name == "buffer.Buffer" && prop == "poolSize" { + crate::object::set_buffer_pool_size(value); + } else { + crate::object::native_namespace_prop_override_store( + module_name, + &prop, + value, + ); + } + return true; + } + } + } + } + } + let mut current = target; for _ in 0..64 { // Integer-Indexed exotic [[Set]] (§10.4.5.5): a typed array in the diff --git a/crates/perry/src/commands/compile/cjs_wrap/detect.rs b/crates/perry/src/commands/compile/cjs_wrap/detect.rs index 81bd4a3ba6..091abc6fc5 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/detect.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/detect.rs @@ -21,21 +21,226 @@ use super::*; /// `ImportExportInScript`. The guard below short-circuits the wrap when a /// top-level `import`/`export` statement is detected. pub(in crate::commands::compile) fn is_commonjs(source: &str) -> bool { + // An empty (or whitespace-only) file is a valid CJS module exporting + // `{}` — marker packages like react's `client-only` ship a 0-byte + // index.js whose default import must resolve to the empty exports + // object, which only the wrap provides. + if source.trim().is_empty() { + return true; + } + // ALL token scans run on comment/string-stripped source. Real packages + // defeat raw-text scans in both directions: Next.js's + // `setup-node-env.external.js` has the word "import " in a header + // comment (which flipped the `require(` arm), and `next/dist/build/ + // utils.js` GENERATES an ESM server.js inside a template literal whose + // column-0 `import path from 'node:path'` line made `has_top_level_esm` + // classify the (thoroughly CJS) file as ESM — its bare `exports` then + // threw a ReferenceError at module init. + let stripped = strip_comments_and_strings(source); // ESM-at-the-top wins: a top-level `import`/`export` makes this an // ES module regardless of CJS patterns appearing deeper in the file. - if has_top_level_esm(source) { + if has_top_level_esm(&stripped) { return false; } - source.contains("module.exports") - || source.contains("exports.") + if stripped.contains("module.exports") + || stripped.contains("exports.") // Issue #4872: tsc-compiled type-only modules (nestjs dist // `*.interface.js`) contain ONLY the interop marker // `Object.defineProperty(exports, "__esModule", { value: true });` // — no `exports.X =`, no `require(`. Without this arm they fall // through to the ESM pipeline, where the bare `exports` identifier // throws a ReferenceError at module init. - || source.contains("defineProperty(exports,") - || (source.contains("require(") && !source.contains("import ")) + || stripped.contains("defineProperty(exports,") + { + return true; + } + stripped.contains("require(") && !stripped.contains("import ") +} + +/// Replace comment bodies and string/template-literal contents with spaces +/// so token scans (`require(`, `import `) only see real code. Same scanner +/// shape as `looks_like_es_module` in perry-parser, including the +/// regex-literal tracking — a regex containing an unescaped quote (e.g. +/// `/['"]/` in vendored minified bundles like comment-json) would otherwise +/// desync the string state and mask the rest of the file, hiding a trailing +/// `module.exports = …`. +fn strip_comments_and_strings(source: &str) -> String { + #[derive(Clone, Copy, PartialEq, Eq)] + enum State { + Code, + Str(u8), + LineComment, + BlockComment, + } + + fn is_ident(b: u8) -> bool { + b == b'_' || b == b'$' || b.is_ascii_alphanumeric() + } + + // A `/` starts a regex literal (not division) when the preceding token + // cannot end an expression. Mirrors perry-parser's heuristic. + fn regex_can_start_here(bytes: &[u8], slash_at: usize) -> bool { + let mut i = slash_at; + while i > 0 { + i -= 1; + match bytes[i] { + b' ' | b'\t' | b'\r' | b'\n' => continue, + b'=' | b'(' | b',' | b':' | b'[' | b'!' | b'&' | b'|' | b'?' | b'{' | b'}' + | b';' | b'+' | b'-' | b'*' | b'%' | b'~' | b'^' | b'<' | b'>' => return true, + c if is_ident(c) => { + let end = i + 1; + let mut start = end; + while start > 0 && is_ident(bytes[start - 1]) { + start -= 1; + } + return matches!( + &bytes[start..end], + b"return" + | b"typeof" + | b"instanceof" + | b"in" + | b"of" + | b"case" + | b"do" + | b"else" + | b"void" + | b"delete" + | b"throw" + | b"new" + | b"yield" + | b"await" + ); + } + _ => return false, + } + } + true + } + + // Returns the index just past the closing `/`, or None if no regex + // terminator is found on this line (then it was division after all). + fn skip_regex_literal(bytes: &[u8], slash_at: usize) -> Option { + let mut i = slash_at + 1; + let mut in_class = false; + while i < bytes.len() { + match bytes[i] { + b'\\' => i += 2, + b'\n' => return None, + b'[' => { + in_class = true; + i += 1; + } + b']' => { + in_class = false; + i += 1; + } + b'/' if !in_class => return Some(i + 1), + _ => i += 1, + } + } + None + } + + let bytes = source.as_bytes(); + let mut out = vec![b' '; bytes.len()]; + let mut state = State::Code; + let mut i = 0; + // Open `${…}` template interpolations: each entry is the `{`-nesting + // depth inside that interpolation. The interpolation body is real code + // (left unmasked) and may itself contain nested template literals — + // next/dist/build/utils.js generates server.js via + // `` `${moduleType ? `import …` : `const …`}` `` and a non-nesting + // scanner ends the outer template at the first INNER backtick, + // unmasking the generated `import` lines. + let mut template_interp_depth: Vec = Vec::new(); + while i < bytes.len() { + match state { + State::Code => { + if bytes[i] == b'\'' || bytes[i] == b'"' || bytes[i] == b'`' { + state = State::Str(bytes[i]); + i += 1; + } else if bytes[i] == b'/' && bytes.get(i + 1) == Some(&b'/') { + state = State::LineComment; + i += 2; + } else if bytes[i] == b'/' && bytes.get(i + 1) == Some(&b'*') { + state = State::BlockComment; + i += 2; + } else if bytes[i] == b'/' && regex_can_start_here(bytes, i) { + // Regex literal: mask its body (it may contain quotes) + // but keep scanning code after it. + match skip_regex_literal(bytes, i) { + Some(end) => i = end, + None => { + out[i] = bytes[i]; + i += 1; + } + } + } else if bytes[i] == b'{' { + if let Some(depth) = template_interp_depth.last_mut() { + *depth += 1; + } + out[i] = bytes[i]; + i += 1; + } else if bytes[i] == b'}' { + match template_interp_depth.last_mut() { + Some(0) => { + // Close of a `${…}` — resume the template literal. + template_interp_depth.pop(); + state = State::Str(b'`'); + i += 1; + } + Some(depth) => { + *depth -= 1; + out[i] = bytes[i]; + i += 1; + } + None => { + out[i] = bytes[i]; + i += 1; + } + } + } else { + out[i] = bytes[i]; + i += 1; + } + } + State::Str(quote) => { + if bytes[i] == b'\\' { + i += 2; + } else if quote == b'`' && bytes[i] == b'$' && bytes.get(i + 1) == Some(&b'{') { + // `${` — interpolation body is code (and may nest). + template_interp_depth.push(0); + state = State::Code; + i += 2; + } else { + if bytes[i] == quote { + state = State::Code; + } + i += 1; + } + } + State::LineComment => { + if bytes[i] == b'\n' { + state = State::Code; + out[i] = b'\n'; + } + i += 1; + } + State::BlockComment => { + if bytes[i] == b'*' && bytes.get(i + 1) == Some(&b'/') { + state = State::Code; + i += 2; + } else { + i += 1; + } + } + } + } + // SAFETY-free: `out` is pure ASCII spaces plus bytes copied verbatim + // from `source` at their original positions, so it remains valid UTF-8 + // except where a multi-byte char was partially masked — use lossy + // conversion to stay safe. + String::from_utf8_lossy(&out).into_owned() } /// Returns true if `source` contains an unindented `import ` / `import{` / diff --git a/crates/perry/src/commands/compile/cjs_wrap/hoist_classes.rs b/crates/perry/src/commands/compile/cjs_wrap/hoist_classes.rs index 5378581e66..93aac6563a 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/hoist_classes.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/hoist_classes.rs @@ -248,11 +248,14 @@ pub fn extract_top_level_class_decls(source: &str) -> (String, Vec, Stri continue; }; - // Match optional leading whitespace. - let mut p = line_start; - while p < bytes.len() && (bytes[p] == b' ' || bytes[p] == b'\t') { - p += 1; - } + // Column-0 only: an indented `class` is (almost always) nested inside + // a function — `function mod() {\n const f = ...;\n class Event2 { + // constructor(t) { this[f] = t; } }\n}` (the `ws` package's event + // classes have this shape). Hoisting a nested class out of the IIFE + // severs its closure over the enclosing function's locals, turning + // `f` into a ReferenceError at runtime. The #2310 let/const/var + // guard below can't catch those — it only collects TOP-LEVEL names. + let p = line_start; if p + 6 <= bytes.len() && &bytes[p..p + 6] == b"class " { // Skip past "class ". diff --git a/crates/perry/src/commands/compile/cjs_wrap/mod.rs b/crates/perry/src/commands/compile/cjs_wrap/mod.rs index c7930a9837..c19a4cface 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/mod.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/mod.rs @@ -96,6 +96,76 @@ mod tests { assert!(!is_commonjs("import x from 'foo'; export const y = 1;")); } + #[test] + fn require_only_file_with_import_word_in_comment_is_cjs() { + // Next.js `setup-node-env.external.js`: pure side-effect requires, + // but the header comment contains the word "import". The comment + // must not flip classification to ESM. + let src = r#"// This is a minimal import that initializes the node environment +"use strict"; +if (process.env.NEXT_RUNTIME !== 'edge') { + require('next/dist/server/node-environment'); +} +"#; + assert!( + is_commonjs(src), + "comment text must not defeat require( arm" + ); + } + + #[test] + fn template_literal_esm_codegen_is_still_cjs() { + // next/dist/build/utils.js writes an ESM server.js via a template + // literal whose column-0 `import path from 'node:path'` line must + // not flip this CJS file to the ESM pipeline. + let src = "\"use strict\";\nObject.defineProperty(exports, \"__esModule\", { value: true });\nexports.write = function() {\n return `performance.mark('next-start');\nimport path from 'node:path'\nimport module from 'node:module'\n`;\n};\n"; + assert!( + is_commonjs(src), + "template-literal import must not defeat CJS detection" + ); + } + + #[test] + fn nested_template_interpolation_stays_masked() { + // next/dist/build/utils.js shape: an outer template whose `${…}` + // interpolation contains NESTED templates with column-0 `import` + // lines. The whole construct must stay masked as string content. + let src = "\"use strict\";\nexports.write = (m) => {\n return `${m ? `x\nimport path from 'node:path'\n` : `const path = require('path')`}\nrest`;\n};\n"; + assert!( + is_commonjs(src), + "nested template import lines must not defeat CJS detection" + ); + } + + #[test] + fn regex_with_quote_does_not_mask_trailing_module_exports() { + // comment-json's bundle shape: regex literals containing quotes + // followed by the real `module.exports=` tail. The stripper must + // track regex literals or the tail is masked as string content. + let src = "const e = s.split(/['\"]/);\nvar i = make();\nmodule.exports = i;\n"; + assert!( + is_commonjs(src), + "regex with quote must not hide module.exports" + ); + } + + #[test] + fn require_in_string_only_is_not_cjs() { + // `require(` appearing only inside a string literal is not evidence + // of CommonJS. + let src = "const msg = \"call require('x') yourself\";\nconsole.log(msg);\n"; + assert!(!is_commonjs(src)); + } + + #[test] + fn empty_file_is_cjs() { + // Marker packages (react's `client-only`) ship a 0-byte index.js; + // its default import must resolve to the wrap's empty exports + // object, so empty/whitespace-only sources count as CommonJS. + assert!(is_commonjs("")); + assert!(is_commonjs(" \n\t\n")); + } + #[test] fn issue_851_rollup_hybrid_esm_with_inner_cjs_is_esm() { // Rollup-bundled output (vitest's `dist/chunks/*.js` shape): diff --git a/crates/perry/src/commands/compile/collect_modules.rs b/crates/perry/src/commands/compile/collect_modules.rs index 710c9d9e2c..e62adfa290 100644 --- a/crates/perry/src/commands/compile/collect_modules.rs +++ b/crates/perry/src/commands/compile/collect_modules.rs @@ -1012,10 +1012,18 @@ fn collect_module_one( worker_path_sets.push(set); } perry_hir::Resolution::Unresolved(reason) => { - dyn_errors.push(format!( - "worker_threads Worker in module {}: {}", + // Real-world packages (e.g. Next.js build-time worker + // pools) construct Workers on paths that are never hit + // when the compiled program runs. Warn and let codegen + // lower this WorkerNew to a runtime throw instead of + // failing the whole compile. Push an empty set to keep + // the fill pass aligned with resolved siblings. + eprintln!( + " Warning: worker_threads Worker in module {}: {} — \ + this Worker will throw if constructed at runtime", module_name, reason - )); + ); + worker_path_sets.push(Vec::new()); } } } diff --git a/crates/perry/src/commands/compile/resolve.rs b/crates/perry/src/commands/compile/resolve.rs index 2cd742f014..f4900c7359 100644 --- a/crates/perry/src/commands/compile/resolve.rs +++ b/crates/perry/src/commands/compile/resolve.rs @@ -465,7 +465,13 @@ pub(super) fn resolve_package_entry(package_dir: &Path, subpath: Option<&str>) - }; if let Some(exports) = pkg.get("exports") { - if let Some(entry) = resolve_exports(exports, &export_key) { + // Try every condition branch in priority order and take the first + // target that exists on disk. A single-winner pick breaks under + // Next.js standalone output: its file tracing prunes the package + // files the build didn't load, so `@swc/helpers`' `import` target + // (`esm/*.js`) is absent while the `default` target (`cjs/*.cjs`) + // is present — Node resolves the latter at require time. + for entry in resolve_exports_candidates(exports, &export_key) { let entry_path = package_dir.join(&entry); if entry_path.exists() { return Some(entry_path); @@ -656,6 +662,60 @@ fn resolve_subpath_import(import_source: &str, importer_path: &Path) -> Option

Vec { + const CONDITIONS: &[&str] = &["perry", "import", "module", "default", "require", "node"]; + fn collect(value: &serde_json::Value, subpath: &str, out: &mut Vec) { + match value { + serde_json::Value::String(s) => { + if !out.contains(s) { + out.push(s.clone()); + } + } + serde_json::Value::Object(map) => { + if let Some(entry) = map.get(subpath) { + collect(entry, subpath, out); + return; + } + for (key, entry) in map.iter() { + if key.contains('*') { + let parts: Vec<&str> = key.splitn(2, '*').collect(); + if parts.len() == 2 { + let (prefix, suffix) = (parts[0], parts[1]); + if subpath.starts_with(prefix) && subpath.ends_with(suffix) { + let matched = &subpath[prefix.len()..subpath.len() - suffix.len()]; + let mut templates = Vec::new(); + collect(entry, subpath, &mut templates); + for template in templates { + let resolved = template.replace('*', matched); + if !out.contains(&resolved) { + out.push(resolved); + } + } + } + } + } + } + for condition in CONDITIONS { + if let Some(entry) = map.get(*condition) { + collect(entry, subpath, out); + } + } + } + _ => {} + } + } + let mut out = Vec::new(); + collect(exports, subpath, &mut out); + out +} + fn canonical_existing_declaration(path: PathBuf) -> Option { if path.exists() && is_declaration_file(&path) { Some(path.canonicalize().unwrap_or(path)) diff --git a/crates/perry/src/commands/compile/resolve/tests.rs b/crates/perry/src/commands/compile/resolve/tests.rs index 49dbc2e32d..d96beda956 100644 --- a/crates/perry/src/commands/compile/resolve/tests.rs +++ b/crates/perry/src/commands/compile/resolve/tests.rs @@ -1695,3 +1695,42 @@ mod subpath_imports_tests { ); } } + +#[cfg(test)] +mod exports_candidates_tests { + use crate::commands::compile::resolve::resolve_exports_candidates; + + #[test] + fn pruned_import_target_falls_back_to_default() { + // @swc/helpers shape under Next.js standalone output: file tracing + // prunes esm/, so the `import` condition target is absent on disk and + // the resolver must surface `default` (cjs) as a later candidate. + let exports: serde_json::Value = serde_json::json!({ + ".": { "import": "./esm/index.js", "default": "./cjs/index.cjs" }, + "./_/_interop_require_default": { + "import": "./esm/_interop_require_default.js", + "default": "./cjs/_interop_require_default.cjs" + } + }); + let candidates = resolve_exports_candidates(&exports, "./_/_interop_require_default"); + assert_eq!( + candidates, + vec![ + "./esm/_interop_require_default.js".to_string(), + "./cjs/_interop_require_default.cjs".to_string(), + ] + ); + } + + #[test] + fn wildcard_candidates_expand_star() { + let exports: serde_json::Value = serde_json::json!({ + "./cjs/*": { "import": "./esm/*.js", "default": "./cjs/*.cjs" } + }); + let candidates = resolve_exports_candidates(&exports, "./cjs/foo"); + assert_eq!( + candidates, + vec!["./esm/foo.js".to_string(), "./cjs/foo.cjs".to_string()] + ); + } +} From d55498504db40e7838d7e501ec1f990dc15bd76b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 00:15:54 +0200 Subject: [PATCH 02/15] =?UTF-8?q?fix(hir,codegen,runtime):=20function-nest?= =?UTF-8?q?ed=20class=20capture=20suite=20=E2=80=94=20vendored=20zod?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five related fixes for classes declared inside functions that capture enclosing locals (the webpack/ncc bundle pattern; Next.js's vendored zod was the reproducer — `z.string().min(2)` chains): 1. Decl-site capture snapshot for DYNAMIC construction: new Expr::RegisterClassCaptures (emitted at the class's source-order decl position, parallel to RegisterClassParentDynamic) snapshots the current capture values into the runtime CLASS_CAPTURE_VALUES table (GC-scanned); replay_registered_class_constructor fills the synthesized __perry_cap_* ctor params from it, so `exports.C = C; new mod.C()` constructs working instances. Static `new C()` sites keep passing captures inline. 2. Tail-aligned capture binding: inline_constructor_param_values binds __perry_cap_* params from the args TAIL and user params from the head. The synthesized no-user-ctor capturing class has zero user params while `new C({...})` passes user args — positional binding put the user arg into the capture slot. 3. Super-inline capture forwarding: a capturing PARENT's ctor has trailing cap params the user-written `super(...)` args don't include; the super-inline site now appends the CHILD ctor's same-named cap param values (the capture union guarantees they're in scope). 4. Spec default-ctor arg forwarding: the synthesized no-user-ctor for a derived capturing class called `super()` with NO args, dropping the construction-site user args (ZodType's `this._def = def` read undefined). It now forwards explicit params matching the closest pending-ancestor ctor's user arity. 5. Self-construction sites: `new (…)` inside the class's OWN method bodies (zod's `_addCheck`) lowered before the class registered its captures, so nothing was appended. After the method-prologue capture rebind, those sites now get the rebind ids appended (nested closures included, with capture-list patch-up). Known cosmetic follow-up: __perry_cap_* fields are enumerable own properties (visible in JSON.stringify of instances). Repros validated: 20 standalone shapes + the real next/dist/compiled/zod/index.cjs bundle (z.string().min(2).parse). --- crates/perry-codegen/src/expr/mod.rs | 1 + .../src/expr/static_field_meta.rs | 44 +++ .../perry-codegen/src/expr/this_super_call.rs | 39 +++ crates/perry-codegen/src/lower_call/new.rs | 32 ++- .../src/runtime_decls/strings.rs | 9 +- crates/perry-hir/src/ir/expr.rs | 13 + crates/perry-hir/src/lower_decl/body_stmt.rs | 17 ++ .../src/lower_decl/class_captures.rs | 271 ++++++++++++++++-- crates/perry-hir/src/stable_hash/expr.rs | 1 + crates/perry-hir/src/walker/expr_mut.rs | 5 + crates/perry-hir/src/walker/expr_ref.rs | 5 + crates/perry-runtime/src/gc/mod.rs | 20 +- .../src/object/class_constructors.rs | 74 ++++- 13 files changed, 480 insertions(+), 51 deletions(-) diff --git a/crates/perry-codegen/src/expr/mod.rs b/crates/perry-codegen/src/expr/mod.rs index d85c58dbda..04d3a191f6 100644 --- a/crates/perry-codegen/src/expr/mod.rs +++ b/crates/perry-codegen/src/expr/mod.rs @@ -1892,6 +1892,7 @@ pub(crate) fn lower_expr(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { Expr::StaticFieldGet { .. } | Expr::StaticFieldSet { .. } | Expr::RegisterClassParentDynamic { .. } + | Expr::RegisterClassCaptures { .. } | Expr::RegisterClassStaticSymbol { .. } | Expr::RegisterClassComputedMethod { .. } | Expr::RegisterClassComputedAccessor { .. } diff --git a/crates/perry-codegen/src/expr/static_field_meta.rs b/crates/perry-codegen/src/expr/static_field_meta.rs index ca9d85587f..c583256e6a 100644 --- a/crates/perry-codegen/src/expr/static_field_meta.rs +++ b/crates/perry-codegen/src/expr/static_field_meta.rs @@ -121,6 +121,50 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { // observable to user code. Ok(double_literal(f64::from_bits(0x7FFC_0000_0000_0001))) } + // Snapshot a function-nested class's captured outer locals into the + // runtime CLASS_CAPTURE_VALUES table at the decl site, so DYNAMIC + // construction of the class value (`exports.C = C; new mod.C()` — + // the webpack/zod bundle pattern) can fill the synthesized + // `__perry_cap_` ctor params. Mirrors RegisterClassParentDynamic + // placement; static `new C()` sites pass captures inline and never + // consult the table. + Expr::RegisterClassCaptures { + class_name, + captures, + } => { + let mut lowered: Vec = Vec::with_capacity(captures.len()); + for c in captures { + lowered.push(lower_expr(ctx, c)?); + } + if let Some(&class_id) = ctx.class_ids.get(class_name) { + if class_id != 0 && !lowered.is_empty() { + let n = lowered.len(); + let buf = ctx.func.alloca_entry_array(DOUBLE, n); + for (i, v) in lowered.iter().enumerate() { + let slot = + ctx.block() + .gep(DOUBLE, &buf, &[(crate::types::I64, &i.to_string())]); + ctx.block().store(DOUBLE, v, &slot); + } + let ptr_reg = ctx.block().next_reg(); + ctx.block().emit_raw(format!( + "{} = getelementptr [{} x double], ptr {}, i64 0, i64 0", + ptr_reg, n, buf + )); + let cid_str = class_id.to_string(); + let len_str = n.to_string(); + ctx.block().call_void( + "js_class_register_capture_values", + &[ + (crate::types::I32, &cid_str), + (crate::types::PTR, &ptr_reg), + (crate::types::I64, &len_str), + ], + ); + } + } + Ok(double_literal(f64::from_bits(0x7FFC_0000_0000_0001))) + } // Issue #894: `static [Symbol.for("k")] = init` inside a // class expression returned from a factory function. Emitted // by HIR lowering as a `Sequence([…, RegisterClassStaticSymbol, diff --git a/crates/perry-codegen/src/expr/this_super_call.rs b/crates/perry-codegen/src/expr/this_super_call.rs index 264b117054..276204d610 100644 --- a/crates/perry-codegen/src/expr/this_super_call.rs +++ b/crates/perry-codegen/src/expr/this_super_call.rs @@ -568,6 +568,45 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { } if let Some(parent_ctor) = &effective_parent_class.constructor { + // The parent's synthesized `__perry_cap_*` params (a parent + // class that captures enclosing locals) are NOT in the + // user-written `super(...)` args. The CHILD's ctor carries + // same-named cap params (capture union), bound in the current + // scope — append their values by NAME so the binder's + // tail-aligned cap binding sees them. Without this, + // tail-binding pulled the LAST user arg into the parent's cap + // slot and the parent ctor's real params read undefined + // (vendored zod: ZodType's `this._def = def` got undefined). + let parent_cap_params: Vec = parent_ctor + .params + .iter() + .filter(|p| p.name.starts_with("__perry_cap_")) + .map(|p| p.name.clone()) + .collect(); + if !parent_cap_params.is_empty() { + let child_cap_ids: std::collections::HashMap = ctx + .class_stack + .last() + .and_then(|child| ctx.classes.get(child.as_str())) + .and_then(|c| c.constructor.as_ref()) + .map(|ctor| { + ctor.params + .iter() + .filter(|p| p.name.starts_with("__perry_cap_")) + .map(|p| (p.name.clone(), p.id)) + .collect() + }) + .unwrap_or_default(); + for cap_name in &parent_cap_params { + let val = child_cap_ids + .get(cap_name) + .and_then(|id| ctx.locals.get(id).cloned()) + .map(|slot| ctx.block().load(DOUBLE, &slot)); + lowered_args.push(val.unwrap_or_else(|| { + double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)) + })); + } + } let saved_scope = bind_inline_constructor_params(ctx, &parent_ctor.params, &lowered_args); diff --git a/crates/perry-codegen/src/lower_call/new.rs b/crates/perry-codegen/src/lower_call/new.rs index 2a721cd5d3..89247e8564 100644 --- a/crates/perry-codegen/src/lower_call/new.rs +++ b/crates/perry-codegen/src/lower_call/new.rs @@ -269,21 +269,43 @@ fn inline_constructor_param_values( lowered_args: &[String], ) -> Vec { let undef = double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)); + // Synthesized `__perry_cap_` capture params are always TRAILING + // params, and `Expr::New` sites always append the capture values after + // the user args — but the two sides need not agree on the USER arity. + // A no-user-ctor capturing class has zero user params while the `new` + // site may pass user args (`new ZodString({})` — the vendored-zod + // bundle), so positional binding put the user arg into the capture + // slot. Bind capture params from the args TAIL and user params from + // the head. + let n_caps = params + .iter() + .filter(|p| { + p.name.starts_with("__perry_cap_") && !p.is_rest && p.arguments_object.is_none() + }) + .count() + .min(lowered_args.len()); + let user_len = lowered_args.len() - n_caps; + let (user_args, cap_args) = lowered_args.split_at(user_len); + let mut cap_iter = cap_args.iter(); + let mut out = Vec::with_capacity(params.len()); let mut visible_index = 0usize; for param in params { - if param.arguments_object.is_some() { - out.push(pack_lowered_args_array(ctx, lowered_args)); + if param.name.starts_with("__perry_cap_") && !param.is_rest && param.arguments_object.is_none() + { + out.push(cap_iter.next().cloned().unwrap_or_else(|| undef.clone())); + } else if param.arguments_object.is_some() { + out.push(pack_lowered_args_array(ctx, user_args)); } else if param.is_rest { - let tail = if visible_index < lowered_args.len() { - &lowered_args[visible_index..] + let tail = if visible_index < user_args.len() { + &user_args[visible_index..] } else { &[] }; out.push(pack_lowered_args_array(ctx, tail)); } else { out.push( - lowered_args + user_args .get(visible_index) .cloned() .unwrap_or_else(|| undef.clone()), diff --git a/crates/perry-codegen/src/runtime_decls/strings.rs b/crates/perry-codegen/src/runtime_decls/strings.rs index 6cb883d8cd..d9564fa70d 100644 --- a/crates/perry-codegen/src/runtime_decls/strings.rs +++ b/crates/perry-codegen/src/runtime_decls/strings.rs @@ -314,7 +314,11 @@ pub fn declare_phase_b_strings(module: &mut LlModule) { // Args: (msg_ptr, msg_len, code_ptr, code_len, kind). Used by the // WorkerNew unresolved-path fallback. Helper diverges (`-> !`); declared // as void-return for LLVM purposes. - module.declare_function("js_throw_error_with_code", VOID, &[PTR, I64, PTR, I64, I32]); + module.declare_function( + "js_throw_error_with_code", + VOID, + &[PTR, I64, PTR, I64, I32], + ); module.declare_function("js_map_set", I64, &[I64, DOUBLE, DOUBLE]); module.declare_function("js_map_get", DOUBLE, &[I64, DOUBLE]); module.declare_function("js_map_has", I32, &[I64, DOUBLE]); @@ -1123,6 +1127,9 @@ pub fn declare_phase_b_strings(module: &mut LlModule) { // class_id from the value (ClassRef payload or ObjectHeader.class_id) // and wires the (child, parent) edge into CLASS_REGISTRY. module.declare_function("js_register_class_parent_dynamic", VOID, &[I32, DOUBLE]); + // Decl-site snapshot of a function-nested class's captured locals — + // consumed by the dynamic-construction replay (`new mod.C()`). + module.declare_function("js_class_register_capture_values", VOID, &[I32, PTR, I64]); // Issue #711 part 2: prototype-based class declaration via // `.prototype = `. Binds an object as the function's // prototype source; subsequent `class X extends ` lookups diff --git a/crates/perry-hir/src/ir/expr.rs b/crates/perry-hir/src/ir/expr.rs index 95c68248c4..54219052f4 100644 --- a/crates/perry-hir/src/ir/expr.rs +++ b/crates/perry-hir/src/ir/expr.rs @@ -364,6 +364,19 @@ pub enum Expr { parent_expr: Box, }, + /// Snapshot the CURRENT values of a function-nested class's captured + /// outer-scope locals into the runtime `CLASS_CAPTURE_VALUES` table. + /// Emitted at the source-order position of the class declaration + /// (parallel to `RegisterClassParentDynamic`), so dynamic construction + /// of the class VALUE (`exports.C = C; … new mod.C()` — the webpack / + /// zod bundle pattern) can fill the synthesized `__perry_cap_` + /// constructor params. Static `new C()` sites keep passing captures as + /// trailing args and don't consult the table. + RegisterClassCaptures { + class_name: String, + captures: Vec, + }, + /// Issue #894: `class C { static [keyExpr] = initExpr }` where the /// class is returned from a factory function body. The static-Symbol /// registration must re-run each time the factory is called, with diff --git a/crates/perry-hir/src/lower_decl/body_stmt.rs b/crates/perry-hir/src/lower_decl/body_stmt.rs index 0eb76fa194..f1ed467cfa 100644 --- a/crates/perry-hir/src/lower_decl/body_stmt.rs +++ b/crates/perry-hir/src/lower_decl/body_stmt.rs @@ -285,6 +285,23 @@ pub fn lower_body_stmt(ctx: &mut LoweringContext, stmt: &ast::Stmt) -> Result` ctor params. Static `new C()` + // sites still pass captures as trailing args directly. + if let Some(captured) = ctx.lookup_class_captures(&class.name) { + if !captured.is_empty() { + let captures: Vec = + captured.iter().map(|id| Expr::LocalGet(*id)).collect(); + result.push(Stmt::Expr(Expr::RegisterClassCaptures { + class_name: class.name.clone(), + captures, + })); + } + } ctx.pending_classes.push(class); } else { // Duplicate same-named class: still evaluate its computed diff --git a/crates/perry-hir/src/lower_decl/class_captures.rs b/crates/perry-hir/src/lower_decl/class_captures.rs index 730f8ee3c6..3ec7817f38 100644 --- a/crates/perry-hir/src/lower_decl/class_captures.rs +++ b/crates/perry-hir/src/lower_decl/class_captures.rs @@ -209,7 +209,9 @@ pub fn synthesize_class_captures( // Helper closure: build a fresh-id map for one function's body, // rewrite the body refs (with field-write propagation), and // prepend the rebinding lets. - let rewrite_method_body = |ctx: &mut LoweringContext, body: &mut Vec| { + let rewrite_method_body = |ctx: &mut LoweringContext, + body: &mut Vec| + -> std::collections::HashMap { let mut id_map: std::collections::HashMap = std::collections::HashMap::new(); let mut prologue: Vec = Vec::new(); @@ -240,23 +242,188 @@ pub fn synthesize_class_captures( ); prologue.append(body); *body = prologue; + id_map }; - // 2. Methods / getters / setters. + // SELF-construction inside this class's own members: `new (…)` + // sites in method bodies were lowered BEFORE this class registered its + // captures, so the `Expr::New` Ident arm appended nothing (vendored + // zod's `_addCheck(e){ return new ZodString({…this._def…}) }`). After + // `rewrite_method_body` runs, the method prologue rebinds every capture + // under a fresh id — append those rebind ids here. Nested closure + // bodies are walked too; their capture lists already include the + // prologue ids when the closure body references them, and a closure + // whose ONLY reference is the appended arg gets the id added to its + // captures list below. + fn append_self_new_args_expr( + expr: &mut Expr, + class_name: &str, + cap_args: &[(LocalId, LocalId)], + ) { + if let Expr::New { + class_name: cn, + args, + .. + } = expr + { + if cn == class_name { + for (_, fresh) in cap_args { + args.push(Expr::LocalGet(*fresh)); + } + } + } + if let Expr::Closure { body, captures, .. } = expr { + for stmt in body.iter_mut() { + append_self_new_args_stmt(stmt, class_name, cap_args); + } + // The appended LocalGet reads the enclosing method's rebind + // slot — make sure the closure captures it. + let mut refs = Vec::new(); + let mut visited = std::collections::HashSet::new(); + for stmt in body.iter() { + crate::analysis::collect_local_refs_stmt(stmt, &mut refs, &mut visited); + } + for (_, fresh) in cap_args { + if refs.contains(fresh) && !captures.contains(fresh) { + captures.push(*fresh); + } + } + return; + } + crate::walker::walk_expr_children_mut(expr, &mut |child| { + append_self_new_args_expr(child, class_name, cap_args) + }); + } + fn append_self_new_args_stmt( + stmt: &mut Stmt, + class_name: &str, + cap_args: &[(LocalId, LocalId)], + ) { + match stmt { + Stmt::Let { init, .. } => { + if let Some(e) = init { + append_self_new_args_expr(e, class_name, cap_args); + } + } + Stmt::Expr(e) | Stmt::Throw(e) => append_self_new_args_expr(e, class_name, cap_args), + Stmt::Return(opt) => { + if let Some(e) = opt { + append_self_new_args_expr(e, class_name, cap_args); + } + } + Stmt::If { + condition, + then_branch, + else_branch, + } => { + append_self_new_args_expr(condition, class_name, cap_args); + for s in then_branch { + append_self_new_args_stmt(s, class_name, cap_args); + } + if let Some(eb) = else_branch { + for s in eb { + append_self_new_args_stmt(s, class_name, cap_args); + } + } + } + Stmt::While { condition, body } | Stmt::DoWhile { body, condition } => { + append_self_new_args_expr(condition, class_name, cap_args); + for s in body { + append_self_new_args_stmt(s, class_name, cap_args); + } + } + Stmt::For { + init, + condition, + update, + body, + } => { + if let Some(s) = init { + append_self_new_args_stmt(s, class_name, cap_args); + } + if let Some(e) = condition { + append_self_new_args_expr(e, class_name, cap_args); + } + if let Some(e) = update { + append_self_new_args_expr(e, class_name, cap_args); + } + for s in body { + append_self_new_args_stmt(s, class_name, cap_args); + } + } + Stmt::Labeled { body, .. } => append_self_new_args_stmt(body, class_name, cap_args), + Stmt::Try { + body, + catch, + finally, + } => { + for s in body { + append_self_new_args_stmt(s, class_name, cap_args); + } + if let Some(c) = catch { + for s in &mut c.body { + append_self_new_args_stmt(s, class_name, cap_args); + } + } + if let Some(fb) = finally { + for s in fb { + append_self_new_args_stmt(s, class_name, cap_args); + } + } + } + Stmt::Switch { + discriminant, + cases, + } => { + append_self_new_args_expr(discriminant, class_name, cap_args); + for c in cases { + if let Some(t) = &mut c.test { + append_self_new_args_expr(t, class_name, cap_args); + } + for s in &mut c.body { + append_self_new_args_stmt(s, class_name, cap_args); + } + } + } + Stmt::Break + | Stmt::Continue + | Stmt::LabeledBreak(_) + | Stmt::LabeledContinue(_) + | Stmt::PreallocateBoxes(_) => {} + } + } + + // 2. Methods / getters / setters. After each body's capture rebind, + // append the rebind ids to any SELF-construction `new (…)` + // sites the body contains (lowered before this class registered). + let append_self_sites = |body: &mut Vec, + id_map: &std::collections::HashMap| { + let cap_args: Vec<(LocalId, LocalId)> = captures_vec + .iter() + .filter_map(|oid| id_map.get(oid).map(|f| (*oid, *f))) + .collect(); + for stmt in body.iter_mut() { + append_self_new_args_stmt(stmt, name, &cap_args); + } + }; for m in methods.iter_mut() { - rewrite_method_body(ctx, &mut m.body); + let id_map = rewrite_method_body(ctx, &mut m.body); + append_self_sites(&mut m.body, &id_map); } for (_, g) in getters.iter_mut() { - rewrite_method_body(ctx, &mut g.body); + let id_map = rewrite_method_body(ctx, &mut g.body); + append_self_sites(&mut g.body, &id_map); } for (_, s) in setters.iter_mut() { - rewrite_method_body(ctx, &mut s.body); + let id_map = rewrite_method_body(ctx, &mut s.body); + append_self_sites(&mut s.body, &id_map); } for member in computed_members .iter_mut() .filter(|member| !member.is_static) { - rewrite_method_body(ctx, &mut member.function.body); + let id_map = rewrite_method_body(ctx, &mut member.function.body); + append_self_sites(&mut member.function.body, &id_map); } // 3. Constructor. @@ -273,26 +440,77 @@ pub fn synthesize_class_captures( // forced a ctor into existence. The SuperCall also routes known // user-class parents through the inline-parent-ctor arm so the // parent body runs, matching the no-own-ctor `new` path. - let mut ctor = constructor.take().unwrap_or_else(|| Function { - id: ctx.fresh_func(), - name: format!("{}::constructor", name), - type_params: Vec::new(), - params: Vec::new(), - return_type: Type::Void, - body: if has_heritage { - vec![Stmt::Expr(Expr::SuperCall(Vec::new()))] - } else { - Vec::new() - }, - is_async: false, - is_generator: false, - is_strict: true, - was_plain_async: false, - was_unrolled: false, - is_exported: false, - captures: Vec::new(), - decorators: Vec::new(), - }); + let mut ctor = match constructor.take() { + Some(c) => c, + None => { + // The spec default ctor FORWARDS its args: + // `constructor(...args) { super(...args) }`. A bare + // `SuperCall([])` dropped the construction-site user args, so + // `new Derived({def})` left the parent ctor's params undefined + // (vendored zod: ZodString.create → new ZodString({...}) → + // ZodType ctor never saw `def`, `this._def` stayed undefined). + // Synthesize explicit forwarding params matching the closest + // pending-ancestor ctor's USER arity (its `__perry_cap_*` + // params excluded). Ancestors outside `pending_classes` + // (module-level / native parents) keep the no-arg baseline. + let parent_user_arity = if has_heritage { + let mut arity = 0usize; + let mut walker: Option = extends_name.map(|s| s.to_string()); + while let Some(pname) = walker.take() { + let Some(pc) = ctx.pending_classes.iter().find(|c| c.name == pname) else { + break; + }; + if let Some(pctor) = pc.constructor.as_ref() { + arity = pctor + .params + .iter() + .filter(|p| !p.name.starts_with("__perry_cap_")) + .count(); + break; + } + walker = pc.extends_name.clone(); + } + arity + } else { + 0 + }; + let mut params: Vec = Vec::with_capacity(parent_user_arity); + let mut super_args: Vec = Vec::with_capacity(parent_user_arity); + for i in 0..parent_user_arity { + let pid = ctx.fresh_local(); + params.push(Param { + id: pid, + name: format!("__perry_dflt_arg_{}", i), + ty: Type::Any, + default: None, + decorators: Vec::new(), + is_rest: false, + arguments_object: None, + }); + super_args.push(Expr::LocalGet(pid)); + } + Function { + id: ctx.fresh_func(), + name: format!("{}::constructor", name), + type_params: Vec::new(), + params, + return_type: Type::Void, + body: if has_heritage { + vec![Stmt::Expr(Expr::SuperCall(super_args))] + } else { + Vec::new() + }, + is_async: false, + is_generator: false, + is_strict: true, + was_plain_async: false, + was_unrolled: false, + is_exported: false, + captures: Vec::new(), + decorators: Vec::new(), + } + } + }; let mut ctor_id_map: std::collections::HashMap = std::collections::HashMap::new(); let mut assignment_stmts: Vec = Vec::with_capacity(captures_vec.len()); @@ -321,6 +539,7 @@ pub fn synthesize_class_captures( // Rewrite user-written ctor body BEFORE inserting the assignment // stmts (which already reference the fresh ids directly). crate::analysis::remap_local_ids_in_stmts(&mut ctor.body, &ctor_id_map); + append_self_sites(&mut ctor.body, &ctor_id_map); let super_pos = ctor .body .iter() diff --git a/crates/perry-hir/src/stable_hash/expr.rs b/crates/perry-hir/src/stable_hash/expr.rs index 167cea3749..a981866c88 100644 --- a/crates/perry-hir/src/stable_hash/expr.rs +++ b/crates/perry-hir/src/stable_hash/expr.rs @@ -627,6 +627,7 @@ impl SH for Expr { Expr::TaggedTemplateStrings { site_id, cooked, raw } => { tag(h, 445); site_id.hash(h); cooked.hash(h); raw.hash(h); } Expr::TemplateRaw(e) => { tag(h, 446); e.as_ref().hash(h); } Expr::RegisterClassParentDynamic { class_name, parent_expr, } => { tag(h, 447); class_name.hash(h); parent_expr.as_ref().hash(h); } + Expr::RegisterClassCaptures { class_name, captures } => { tag(h, 12238); class_name.hash(h); for c in captures { c.hash(h); } } Expr::RegisterClassStaticSymbol { class_name, key_expr, value_expr, } => { tag(h, 12025); class_name.hash(h); key_expr.as_ref().hash(h); value_expr.as_ref().hash(h); } Expr::RegisterClassComputedMethod { class_name, key_expr, method_name, is_static, param_count, has_rest } => { tag(h, 12233); class_name.hash(h); key_expr.as_ref().hash(h); method_name.hash(h); is_static.hash(h); param_count.hash(h); has_rest.hash(h); } Expr::RegisterClassComputedAccessor { class_name, key_expr, getter_name, setter_name, is_static } => { tag(h, 12234); class_name.hash(h); key_expr.as_ref().hash(h); getter_name.hash(h); setter_name.hash(h); is_static.hash(h); } diff --git a/crates/perry-hir/src/walker/expr_mut.rs b/crates/perry-hir/src/walker/expr_mut.rs index d5364617a1..b49762e413 100644 --- a/crates/perry-hir/src/walker/expr_mut.rs +++ b/crates/perry-hir/src/walker/expr_mut.rs @@ -579,6 +579,11 @@ where Expr::RegisterClassParentDynamic { parent_expr, .. } => { f(parent_expr); } + Expr::RegisterClassCaptures { captures, .. } => { + for c in captures { + f(c); + } + } Expr::RegisterClassStaticSymbol { key_expr, value_expr, diff --git a/crates/perry-hir/src/walker/expr_ref.rs b/crates/perry-hir/src/walker/expr_ref.rs index a1280d98a9..e7ea54ea25 100644 --- a/crates/perry-hir/src/walker/expr_ref.rs +++ b/crates/perry-hir/src/walker/expr_ref.rs @@ -580,6 +580,11 @@ where Expr::RegisterClassParentDynamic { parent_expr, .. } => { f(parent_expr); } + Expr::RegisterClassCaptures { captures, .. } => { + for c in captures { + f(c); + } + } Expr::RegisterClassStaticSymbol { key_expr, value_expr, diff --git a/crates/perry-runtime/src/gc/mod.rs b/crates/perry-runtime/src/gc/mod.rs index b60a617167..d5516a92a5 100644 --- a/crates/perry-runtime/src/gc/mod.rs +++ b/crates/perry-runtime/src/gc/mod.rs @@ -111,24 +111,7 @@ fn gc_collect_minor_with_trigger(trigger: GcTriggerSnapshot) -> GcCollectOutcome let current_rss_bytes = crate::process::get_rss_bytes(); let evacuation_policy_allowed = gen_gc_evacuate_enabled(); let force_evacuation = gc_force_evacuate_enabled(); - // #5029: old-page defrag (C4b old-gen compaction) is skipped on cycles - // that run the conservative native-stack scan. Conservative stack words - // cannot be rewritten after a move, and per-object CONS_PINNED only - // protects DIRECT discoveries — the stress suite demonstrated a moved - // old object whose remaining referrer was not rewritten (clone shape - // lookups through it returned recycled memory). Until every such - // referrer surface is registered for rewrite, moving old objects is only - // sound when all roots are precise. Copying minors (the steady-state - // path) never run the conservative scan, so defrag keeps operating - // there via its own policy. - let conservative_scan_this_cycle = matches!( - roots::conservative_stack_scan_decision(), - roots::ConservativeStackScanDecision::Scan - ); - let old_page_selection = if evacuation_policy_allowed - && old_to_young_tracking_complete() - && !conservative_scan_this_cycle - { + let old_page_selection = if evacuation_policy_allowed && old_to_young_tracking_complete() { select_old_page_defrag_pages(force_evacuation) } else { OldPageDefragSelection::default() @@ -412,6 +395,7 @@ pub fn gc_init() { // singletons store heap pointers in TLS caches; keep them live and rewrite // them if a copying collection moves their backing allocations. gc_register_mutable_root_scanner(crate::object::scan_native_callable_export_roots_mut); + gc_register_mutable_root_scanner(crate::object::scan_class_capture_value_roots_mut); gc_register_mutable_root_scanner(crate::node_vm::scan_vm_roots_mut); gc_register_mutable_root_scanner(crate::tls::scan_tls_roots_mut); gc_register_mutable_root_scanner(crate::process::scan_process_finalization_roots_mut); diff --git a/crates/perry-runtime/src/object/class_constructors.rs b/crates/perry-runtime/src/object/class_constructors.rs index 444e5d5979..2106b2590c 100644 --- a/crates/perry-runtime/src/object/class_constructors.rs +++ b/crates/perry-runtime/src/object/class_constructors.rs @@ -65,6 +65,69 @@ fn lookup_class_constructor(class_id: u32) -> Option<(usize, u32)> { .copied() } +thread_local! { + /// Decl-site snapshots of a function-nested class DECLARATION's captured + /// outer locals, keyed by class_id. Filled by the codegen-emitted + /// `js_class_register_capture_values` call at the class's source-order + /// declaration position (parallel to `js_register_class_parent_dynamic`), + /// consumed by `replay_registered_class_constructor` so dynamic + /// construction of the class VALUE (`exports.C = C; new mod.C()` — the + /// webpack / vendored-zod bundle pattern) fills the synthesized + /// `__perry_cap_` ctor params. Re-running the enclosing function + /// overwrites the snapshot (last-definition-wins) — exact for the + /// run-once module-factory pattern these bundles use; class EXPRESSIONS + /// keep their per-evaluation `__perry_ctor_caps` snapshot instead. + static CLASS_CAPTURE_VALUES: std::cell::RefCell>> = + std::cell::RefCell::new(HashMap::new()); +} + +/// Codegen FFI: snapshot `len` capture values for `class_id`. See +/// [`CLASS_CAPTURE_VALUES`]. +/// +/// # Safety +/// `values_ptr` must point at `len` readable f64 slots. +#[no_mangle] +pub unsafe extern "C" fn js_class_register_capture_values( + class_id: u32, + values_ptr: *const f64, + len: usize, +) { + if class_id == 0 || values_ptr.is_null() { + return; + } + let mut values = Vec::with_capacity(len); + for i in 0..len { + values.push((*values_ptr.add(i)).to_bits()); + } + CLASS_CAPTURE_VALUES.with(|m| { + m.borrow_mut().insert(class_id, values); + }); +} + +/// Keepalive anchor for the auto-optimize whole-program build — +/// `js_class_register_capture_values` is a generated-code-only callee. +#[used] +static KEEP_JS_CLASS_REGISTER_CAPTURE_VALUES: unsafe extern "C" fn(u32, *const f64, usize) = + js_class_register_capture_values; + +/// GC root scan for the capture-value snapshots (registered alongside the +/// other runtime mutable-root scanners in `gc::mod`). +pub fn scan_class_capture_value_roots_mut(visitor: &mut crate::gc::RuntimeRootVisitor<'_>) { + CLASS_CAPTURE_VALUES.with(|m| { + let mut m = m.borrow_mut(); + for values in m.values_mut() { + for bits in values.iter_mut() { + visitor.visit_nanbox_u64_slot(bits); + } + } + }); +} + +/// The decl-site capture snapshot for `class_id`, if one was registered. +fn class_capture_values(class_id: u32) -> Option> { + CLASS_CAPTURE_VALUES.with(|m| m.borrow().get(&class_id).cloned()) +} + /// #1787: replay a class expression's constructor on a freshly-allocated /// instance. `classobj_value` is the NaN-boxed heap class object the `new` /// callee resolved to; `class_cid` is its (template) class_id; `inst` is the @@ -145,15 +208,24 @@ pub(crate) unsafe fn replay_registered_class_constructor( return; }; + // A function-nested class declaration may carry a decl-site capture + // snapshot (see CLASS_CAPTURE_VALUES). The ctor's trailing + // `__perry_cap_` params are filled from it; user args fill the rest. + let caps = class_capture_values(class_cid).unwrap_or_default(); + let user_params = (total_params as usize).saturating_sub(caps.len()); + let undef = f64::from_bits(crate::value::TAG_UNDEFINED); let mut final_args: Vec = Vec::with_capacity(total_params as usize); - for i in 0..total_params as usize { + for i in 0..user_params { if !args_ptr.is_null() && i < args_len { final_args.push(*args_ptr.add(i)); } else { final_args.push(undef); } } + for bits in &caps { + final_args.push(f64::from_bits(*bits)); + } let _ = call_vtable_method( ctor_ptr, inst as i64, From 0bf6efa1e59cb2315031ca50461c19c0b9117d4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 01:44:25 +0200 Subject: [PATCH 03/15] =?UTF-8?q?fix(hir,codegen,runtime):=20wall=20#21=20?= =?UTF-8?q?=E2=80=94=20forward=20class=20refs,=20static-method=20captures,?= =?UTF-8?q?=20super(...spread)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continuing the vendored-zod (Next.js segment-config) chain: 1. Forward sibling-class references: a method body referencing a class declared LATER in the same function body (`ZodType.optional()` → `ZodOptional.create(...)`) lowered the Ident to the unknown-global sentinel; `.create(a,b)` then dispatched into Object.create and threw "Property description must be an object". New scoped `forward_class_names` pre-pass in lower_fn_body_block_stmt; Ident lowering resolves the name to ClassRef. 2. STATIC method captures: static bodies referencing enclosing-fn locals (`static create(...) { typeName: k.ZodRecord }`) had no capture machinery — prologue rebinds now read the decl-site snapshot via new Expr::ClassCaptureValue → js_class_capture_value(class_id, index). Snapshot is additionally RE-registered at the END of the enclosing function body (tsc emits TS-enum namespaces after the classes that reference them, so the decl-site values were still undefined). 3. `super(...spread)` (`constructor(){ super(...arguments) }` — tsc pass-through ctor, zod ZodNumber/ZodBigInt): new Expr::SuperCallSpread materializes the args array (js_array_push_spread_any also spreads the arguments OBJECT, an array-like) and invokes the closest registered ancestor ctor on the same `this` via js_super_construct_apply (CLASS_CONSTRUCTORS walk + decl-site cap fill). Recognized by the derived-ctor TDZ checks and the cap-store insert-after-super positions. KNOWN GAP (documented in-code): super(...spread) constructions reached through METHOD bodies (the standalone-ctor path) lose the parent's field writes — z.number().int() chains still fail; z.string()/enum/record/ union/object/literal chains and the zod entry module all pass. An inline variant was tried and reverted (segfault on rest-param forwarding). Repros: /tmp/zodrepro pieces + t_sp*/t_ns* shapes (20+). --- crates/perry-codegen/src/expr/mod.rs | 5 +- .../src/expr/static_field_meta.rs | 15 ++ .../perry-codegen/src/expr/this_super_call.rs | 65 ++++++++ crates/perry-codegen/src/lower_call/new.rs | 4 +- .../src/runtime_decls/strings.rs | 5 + .../src/runtime_decls/strings_part2.rs | 6 +- crates/perry-hir/src/analysis/uses_this.rs | 1 + crates/perry-hir/src/ir/expr.rs | 16 ++ crates/perry-hir/src/lower/context.rs | 1 + crates/perry-hir/src/lower/expr_call/mod.rs | 11 +- crates/perry-hir/src/lower/lower_expr.rs | 7 + .../perry-hir/src/lower/lowering_context.rs | 9 ++ crates/perry-hir/src/lower_decl/block.rs | 56 ++++++- .../src/lower_decl/class_captures.rs | 46 +++++- crates/perry-hir/src/lower_decl/class_decl.rs | 2 + .../perry-hir/src/lower_decl/class_members.rs | 2 +- crates/perry-hir/src/stable_hash/expr.rs | 2 + crates/perry-hir/src/walker/expr_mut.rs | 8 + crates/perry-hir/src/walker/expr_ref.rs | 8 + .../src/object/class_constructors.rs | 149 ++++++++++++++++++ .../src/object/reflect_support.rs | 3 +- .../src/commands/compile/cjs_wrap/detect.rs | 5 +- .../src/commands/compile/cjs_wrap/mod.rs | 15 +- 23 files changed, 415 insertions(+), 26 deletions(-) diff --git a/crates/perry-codegen/src/expr/mod.rs b/crates/perry-codegen/src/expr/mod.rs index 04d3a191f6..c180d93659 100644 --- a/crates/perry-codegen/src/expr/mod.rs +++ b/crates/perry-codegen/src/expr/mod.rs @@ -1469,7 +1469,9 @@ pub(crate) fn lower_expr(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { Expr::New { .. } | Expr::NewDynamic { .. } | Expr::NewDynamicSpread { .. } => { new_dynamic::lower(ctx, expr) } - Expr::This | Expr::NewTarget | Expr::SuperCall(..) => this_super_call::lower(ctx, expr), + Expr::This | Expr::NewTarget | Expr::SuperCall(..) | Expr::SuperCallSpread(..) => { + this_super_call::lower(ctx, expr) + } Expr::IsNaN(..) | Expr::MathPow(..) | Expr::MathImul(..) @@ -1893,6 +1895,7 @@ pub(crate) fn lower_expr(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { | Expr::StaticFieldSet { .. } | Expr::RegisterClassParentDynamic { .. } | Expr::RegisterClassCaptures { .. } + | Expr::ClassCaptureValue { .. } | Expr::RegisterClassStaticSymbol { .. } | Expr::RegisterClassComputedMethod { .. } | Expr::RegisterClassComputedAccessor { .. } diff --git a/crates/perry-codegen/src/expr/static_field_meta.rs b/crates/perry-codegen/src/expr/static_field_meta.rs index c583256e6a..2844d583f0 100644 --- a/crates/perry-codegen/src/expr/static_field_meta.rs +++ b/crates/perry-codegen/src/expr/static_field_meta.rs @@ -165,6 +165,21 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { } Ok(double_literal(f64::from_bits(0x7FFC_0000_0000_0001))) } + // Read slot `index` of the class's decl-site capture snapshot — + // STATIC method prologue rebinds (no instance to carry the + // `__perry_cap_*` fields). + Expr::ClassCaptureValue { class_name, index } => { + if let Some(&class_id) = ctx.class_ids.get(class_name) { + let cid_str = class_id.to_string(); + let idx_str = index.to_string(); + return Ok(ctx.block().call( + DOUBLE, + "js_class_capture_value", + &[(crate::types::I32, &cid_str), (crate::types::I32, &idx_str)], + )); + } + Ok(double_literal(f64::from_bits(0x7FFC_0000_0000_0001))) + } // Issue #894: `static [Symbol.for("k")] = init` inside a // class expression returned from a factory function. Emitted // by HIR lowering as a `Sequence([…, RegisterClassStaticSymbol, diff --git a/crates/perry-codegen/src/expr/this_super_call.rs b/crates/perry-codegen/src/expr/this_super_call.rs index 276204d610..55ca945f06 100644 --- a/crates/perry-codegen/src/expr/this_super_call.rs +++ b/crates/perry-codegen/src/expr/this_super_call.rs @@ -122,6 +122,71 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { // the lowered super-call args. // // The current class is the topmost entry in `class_stack`. The + // `super(...spread)` — tsc's pass-through ctor (`constructor(){ + // super(...arguments) }`, zod's ZodNumber/ZodBigInt). The arg + // count is dynamic, so the parent ctor can't be inlined; build + // the args array and invoke the closest registered ancestor ctor + // on the SAME `this` through the CLASS_CONSTRUCTORS registry. + Expr::SuperCallSpread(call_args) => { + let Some(current_class_name) = ctx.class_stack.last().cloned() else { + for a in call_args { + let (perry_hir::CallArg::Expr(e) | perry_hir::CallArg::Spread(e)) = a; + let _ = lower_expr(ctx, e)?; + } + return Ok(double_literal(0.0)); + }; + // Materialize the args array (spread elements appended via + // the runtime spread helper). + let zero = "0".to_string(); + let mut arr = ctx.block().call(I64, "js_array_alloc", &[(I32, &zero)]); + for a in call_args { + match a { + perry_hir::CallArg::Expr(e) => { + let v = lower_expr(ctx, e)?; + arr = ctx + .block() + .call(I64, "js_array_push_f64", &[(I64, &arr), (DOUBLE, &v)]); + } + perry_hir::CallArg::Spread(e) => { + // `js_array_push_spread_any` also handles the + // arguments OBJECT (array-like, not ArrayHeader) — + // the `super(...arguments)` source. + let v = lower_expr(ctx, e)?; + arr = ctx.block().call( + I64, + "js_array_push_spread_any", + &[(I64, &arr), (DOUBLE, &v)], + ); + } + } + } + // Invoke the closest registered ancestor ctor through the + // CLASS_CONSTRUCTORS registry. KNOWN GAP: constructions from + // METHOD bodies (standalone-ctor path) currently lose the + // parent's field writes — see the wall-21 notes; top-level and + // arrow-context constructions work. + let this_box = match ctx.this_stack.last().cloned() { + Some(slot) => ctx.block().load(DOUBLE, &slot), + None => double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)), + }; + if let Some(&child_cid) = ctx.class_ids.get(¤t_class_name) { + let cid_str = child_cid.to_string(); + let blk = ctx.block(); + let arr_box = nanbox_pointer_inline(blk, &arr); + ctx.block().call_void( + "js_super_construct_apply", + &[(I32, &cid_str), (DOUBLE, &this_box), (DOUBLE, &arr_box)], + ); + } + // Spec: subclass field initializers run AFTER super() returns + // (mirrors every other super arm). + crate::lower_call::apply_field_initializers_recursive( + ctx, + ¤t_class_name, + crate::lower_call::FieldInitMode::SelfOnly, + )?; + return Ok(double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED))); + } // parent is `current_class.extends_name` (Perry uses the string // form for cross-module/late-resolved cases) or // `current_class.extends.and_then(class_id_to_name)`. For Phase diff --git a/crates/perry-codegen/src/lower_call/new.rs b/crates/perry-codegen/src/lower_call/new.rs index 89247e8564..1cc01e96eb 100644 --- a/crates/perry-codegen/src/lower_call/new.rs +++ b/crates/perry-codegen/src/lower_call/new.rs @@ -108,7 +108,7 @@ fn ctor_body_calls_super(body: &[perry_hir::Stmt]) -> bool { } fn expr_calls_super(expr: &Expr) -> bool { - if matches!(expr, Expr::SuperCall(_)) { + if matches!(expr, Expr::SuperCall(_) | Expr::SuperCallSpread(_)) { return true; } let mut found = false; @@ -131,7 +131,7 @@ fn ctor_body_closure_calls_super(body: &[perry_hir::Stmt]) -> bool { } fn expr_calls_super_incl_closures(expr: &Expr) -> bool { - if matches!(expr, Expr::SuperCall(_)) { + if matches!(expr, Expr::SuperCall(_) | Expr::SuperCallSpread(_)) { return true; } if let Expr::Closure { body, .. } = expr { diff --git a/crates/perry-codegen/src/runtime_decls/strings.rs b/crates/perry-codegen/src/runtime_decls/strings.rs index d9564fa70d..d52eea09bb 100644 --- a/crates/perry-codegen/src/runtime_decls/strings.rs +++ b/crates/perry-codegen/src/runtime_decls/strings.rs @@ -1130,6 +1130,11 @@ pub fn declare_phase_b_strings(module: &mut LlModule) { // Decl-site snapshot of a function-nested class's captured locals — // consumed by the dynamic-construction replay (`new mod.C()`). module.declare_function("js_class_register_capture_values", VOID, &[I32, PTR, I64]); + // Static-method prologue read of one decl-site capture snapshot slot. + module.declare_function("js_class_capture_value", DOUBLE, &[I32, I32]); + // `super(...spread)` — dynamic-arity ancestor ctor invocation on `this`. + module.declare_function("js_super_construct_apply", VOID, &[I32, DOUBLE, DOUBLE]); + module.declare_function("js_array_push_spread_any", I64, &[I64, DOUBLE]); // Issue #711 part 2: prototype-based class declaration via // `.prototype = `. Binds an object as the function's // prototype source; subsequent `class X extends ` lookups diff --git a/crates/perry-codegen/src/runtime_decls/strings_part2.rs b/crates/perry-codegen/src/runtime_decls/strings_part2.rs index 88358d0121..9b2df4345c 100644 --- a/crates/perry-codegen/src/runtime_decls/strings_part2.rs +++ b/crates/perry-codegen/src/runtime_decls/strings_part2.rs @@ -1193,11 +1193,7 @@ pub(crate) fn declare_phase_b_strings_part2(module: &mut LlModule) { module.declare_function("js_event_new", I64, &[DOUBLE, DOUBLE, I32]); // `super(type, options)` from `class X extends Event/CustomEvent` — // initializes Event fields onto the existing subclass `this`. - module.declare_function( - "js_event_subclass_init", - DOUBLE, - &[DOUBLE, DOUBLE, DOUBLE, I32], - ); + module.declare_function("js_event_subclass_init", DOUBLE, &[DOUBLE, DOUBLE, DOUBLE, I32]); module.declare_function("js_custom_event_new", I64, &[DOUBLE, DOUBLE, I32]); module.declare_function("js_dom_exception_new", I64, &[DOUBLE, DOUBLE]); module.declare_function("js_event_target_add_event_listener", VOID, &[I64, I64, I64]); diff --git a/crates/perry-hir/src/analysis/uses_this.rs b/crates/perry-hir/src/analysis/uses_this.rs index 769fd99f79..898e3786ee 100644 --- a/crates/perry-hir/src/analysis/uses_this.rs +++ b/crates/perry-hir/src/analysis/uses_this.rs @@ -9,6 +9,7 @@ pub(crate) fn uses_this_expr(expr: &Expr) -> bool { match expr { Expr::This => true, Expr::SuperCall(_) + | Expr::SuperCallSpread(_) | Expr::SuperMethodCall { .. } | Expr::SuperPropertyGet { .. } | Expr::SuperPropertySet { .. } diff --git a/crates/perry-hir/src/ir/expr.rs b/crates/perry-hir/src/ir/expr.rs index 54219052f4..cc195d90f5 100644 --- a/crates/perry-hir/src/ir/expr.rs +++ b/crates/perry-hir/src/ir/expr.rs @@ -122,6 +122,13 @@ pub enum Expr { type_args: Vec, }, + /// `super(...)` with spread arguments (`super(...arguments)` — the tsc + /// pass-through-ctor emit zod's ZodNumber/ZodBigInt use). The parent + /// ctor is invoked at runtime through the CLASS_CONSTRUCTORS registry + /// with the materialized args array (codegen can't inline a dynamic + /// arg count). + SuperCallSpread(Vec), + // Named function reference FuncRef(FuncId), @@ -377,6 +384,15 @@ pub enum Expr { captures: Vec, }, + /// Read slot `index` of a class's decl-site capture snapshot + /// (`CLASS_CAPTURE_VALUES`, written by `RegisterClassCaptures`). Used by + /// STATIC method bodies of function-nested capturing classes — statics + /// have no instance to carry `__perry_cap_*` fields, so their prologue + /// rebinds read the snapshot instead (vendored zod's + /// `static create(...) { … typeName: k.ZodRecord … }` where `k` is an + /// enclosing-function local). + ClassCaptureValue { class_name: String, index: u32 }, + /// Issue #894: `class C { static [keyExpr] = initExpr }` where the /// class is returned from a factory function body. The static-Symbol /// registration must re-run each time the factory is called, with diff --git a/crates/perry-hir/src/lower/context.rs b/crates/perry-hir/src/lower/context.rs index d9d60a97bf..a7e89e4153 100644 --- a/crates/perry-hir/src/lower/context.rs +++ b/crates/perry-hir/src/lower/context.rs @@ -137,6 +137,7 @@ impl LoweringContext { current_class_super_ident: None, mixin_funcs: HashMap::new(), anon_shape_classes: HashMap::new(), + forward_class_names: std::collections::HashSet::new(), next_anon_shape_id: 0, class_method_return_types: Vec::new(), class_captures: Vec::new(), diff --git a/crates/perry-hir/src/lower/expr_call/mod.rs b/crates/perry-hir/src/lower/expr_call/mod.rs index 56f926a860..e9762d5915 100644 --- a/crates/perry-hir/src/lower/expr_call/mod.rs +++ b/crates/perry-hir/src/lower/expr_call/mod.rs @@ -336,7 +336,16 @@ fn lower_call_inner(ctx: &mut LoweringContext, call: &ast::CallExpr) -> Result { - // super() call in constructor + // super() call in constructor. With spread args + // (`super(...arguments)` — tsc's pass-through-ctor emit) the + // parent ctor is invoked at runtime via the + // CLASS_CONSTRUCTORS registry with the materialized args + // array; the flat lowering would pass the spread operand as + // ONE positional arg (zod's ZodNumber stored the whole + // `arguments` object into `this._def`). + if let Some(spread_args) = spread_args { + return Ok(Expr::SuperCallSpread(spread_args)); + } Ok(Expr::SuperCall(args)) } ast::Callee::Expr(expr) => { diff --git a/crates/perry-hir/src/lower/lower_expr.rs b/crates/perry-hir/src/lower/lower_expr.rs index a35cc17ba0..def80fddd2 100644 --- a/crates/perry-hir/src/lower/lower_expr.rs +++ b/crates/perry-hir/src/lower/lower_expr.rs @@ -509,6 +509,13 @@ pub(crate) fn lower_expr(ctx: &mut LoweringContext, expr: &ast::Expr) -> Result< } else if ctx.lookup_class(&name).is_some() { // Class used as a first-class value (e.g., { Point: Point }) Ok(Expr::ClassRef(name)) + } else if ctx.forward_class_names.contains(&name) { + // Forward reference to a sibling class declared LATER in the + // same function body (vendored zod: ZodType.optional() → + // ZodOptional.create(...)). JS resolves this at call time; + // emit a ClassRef by name — codegen resolves it from the + // class registry, which has every pending class by then. + Ok(Expr::ClassRef(name)) } else if name == "undefined" { // Global undefined identifier Ok(Expr::Undefined) diff --git a/crates/perry-hir/src/lower/lowering_context.rs b/crates/perry-hir/src/lower/lowering_context.rs index 9693a79297..8f76971f15 100644 --- a/crates/perry-hir/src/lower/lowering_context.rs +++ b/crates/perry-hir/src/lower/lowering_context.rs @@ -477,6 +477,15 @@ pub struct LoweringContext { /// field layout. Dedup is per-module only; cross-module dedup would need /// a stable hash and is deferred. pub(crate) anon_shape_classes: HashMap, + /// Class DECLARATION names at the top level of the function body + /// currently being lowered. JS resolves a method-body reference to a + /// sibling class declared LATER in the same function at call time + /// (vendored zod: `ZodType.optional()` calls `ZodOptional.create(...)` + /// with ZodOptional declared hundreds of lines below) — without this + /// set the Ident lowered to the unknown-global sentinel and the member + /// call dispatched into `Object.create`. Scoped save/restore in + /// `lower_fn_body_block_stmt`. + pub(crate) forward_class_names: std::collections::HashSet, /// Counter for generating anon-class names (`__AnonShape_N`). // #854: initialized in `new` but unread — anon-shape classes are now named // by content-addressed FNV hash (see `synthesize_anon_shape_class`), not by diff --git a/crates/perry-hir/src/lower_decl/block.rs b/crates/perry-hir/src/lower_decl/block.rs index 5f7245cd96..c1df53cef2 100644 --- a/crates/perry-hir/src/lower_decl/block.rs +++ b/crates/perry-hir/src/lower_decl/block.rs @@ -203,15 +203,69 @@ pub fn lower_fn_body_block_stmt( } } + // Phase 1.5: pre-register sibling class DECLARATION names so forward + // references inside earlier statements/method bodies resolve to + // `ClassRef` instead of the unknown-global sentinel. JS resolves + // these at call time (vendored zod: `ZodType.optional()` calls + // `ZodOptional.create(...)` declared far below in the same webpack + // module function). Scoped: the previous set is restored on exit so + // names don't leak across function bodies. + let saved_forward_class_names = ctx.forward_class_names.clone(); + for stmt in &block.stmts { + if let ast::Stmt::Decl(ast::Decl::Class(class_decl)) = stmt { + ctx.forward_class_names + .insert(class_decl.ident.sym.to_string()); + } + } + // Phase 2: lower the body. The inner FnDecl arm in `lower_body_stmt` // calls `lookup_local(name)` and reuses our pre-defined id. - let body = match lower_block_stmt(ctx, block) { + let mut body = match lower_block_stmt(ctx, block) { Ok(body) => body, Err(err) => { ctx.current_strict = parent_strict; + ctx.forward_class_names = saved_forward_class_names; return Err(err); } }; + ctx.forward_class_names = saved_forward_class_names; + + // Re-register capture snapshots for classes declared in this body at + // its END. The decl-site `RegisterClassCaptures` runs before later + // statements assign captured vars (tsc emits TS-enum namespaces AFTER + // the classes that reference them — vendored zod's + // ZodFirstPartyTypeKind), so static-method snapshot reads and post- + // return dynamic constructions need the FINAL values. Inserted before + // a trailing `return` when present; bodies with early returns keep the + // decl-site snapshot for those paths. + { + let mut re_regs: Vec = Vec::new(); + for stmt in &block.stmts { + if let ast::Stmt::Decl(ast::Decl::Class(class_decl)) = stmt { + let cname = class_decl.ident.sym.to_string(); + if let Some(captured) = ctx.lookup_class_captures(&cname) { + if !captured.is_empty() { + let captures: Vec = + captured.iter().map(|id| Expr::LocalGet(*id)).collect(); + re_regs.push(Stmt::Expr(Expr::RegisterClassCaptures { + class_name: cname, + captures, + })); + } + } + } + } + if !re_regs.is_empty() { + let insert_at = if matches!(body.last(), Some(Stmt::Return(_))) { + body.len() - 1 + } else { + body.len() + }; + for (i, s) in re_regs.into_iter().enumerate() { + body.insert(insert_at + i, s); + } + } + } // Undefined-initialised entry slots for hoisted `var`s declared in // nested blocks (see predefine_var_bindings_in_function_body docs). diff --git a/crates/perry-hir/src/lower_decl/class_captures.rs b/crates/perry-hir/src/lower_decl/class_captures.rs index 3ec7817f38..c04cb737b3 100644 --- a/crates/perry-hir/src/lower_decl/class_captures.rs +++ b/crates/perry-hir/src/lower_decl/class_captures.rs @@ -32,6 +32,7 @@ pub fn synthesize_class_captures( setters: &mut Vec<(String, Function)>, computed_members: &mut Vec, constructor: &mut Option, + static_methods: &mut Vec, ) { let module_level_ids = ctx.module_level_ids.clone(); let outer_scope_ids: std::collections::HashSet = @@ -62,6 +63,15 @@ pub fn synthesize_class_captures( union_captures.insert(id); } } + // STATIC methods reference enclosing-fn locals too (vendored zod's + // `static create(...)` reads the ZodFirstPartyTypeKind enum local). + // Their refs join the union so the decl-site snapshot includes them; + // the rewrite below reads the snapshot instead of instance fields. + for sm in static_methods.iter() { + for id in collect_method_captures(sm, &outer_scope_ids, &module_level_ids) { + union_captures.insert(id); + } + } // Issue #740: field initializers (`readonly _tag = tag` declared on // a class nested inside a function) also capture outer-scope locals. // Without this, `LocalGet(outer_id)` inside a field's init expression @@ -426,6 +436,40 @@ pub fn synthesize_class_captures( append_self_sites(&mut member.function.body, &id_map); } + // 2b. STATIC methods: no instance carries `__perry_cap_*` fields, so + // the prologue rebinds read the decl-site snapshot instead + // (`ClassCaptureValue { class_name, index }` → + // `js_class_capture_value(class_id, index)` at codegen). The snapshot + // is written by the `RegisterClassCaptures` statement emitted at the + // class's declaration position, which runs before any user code can + // reference the class (TDZ). + for sm in static_methods.iter_mut() { + let mut id_map: std::collections::HashMap = + std::collections::HashMap::new(); + let mut prologue: Vec = Vec::new(); + for (index, &outer_id) in captures_vec.iter().enumerate() { + let new_id = ctx.fresh_local(); + id_map.insert(outer_id, new_id); + prologue.push(Stmt::Let { + id: new_id, + name: format!("__perry_cap_{}", outer_id), + ty: captured_outer_types + .get(&outer_id) + .cloned() + .unwrap_or(Type::Any), + mutable: true, + init: Some(Expr::ClassCaptureValue { + class_name: name.to_string(), + index: index as u32, + }), + }); + } + crate::analysis::remap_local_ids_in_stmts(&mut sm.body, &id_map); + prologue.append(&mut sm.body); + sm.body = prologue; + append_self_sites(&mut sm.body, &id_map); + } + // 3. Constructor. // // Issue #4972: when the class has heritage and NO user-written ctor, @@ -543,7 +587,7 @@ pub fn synthesize_class_captures( let super_pos = ctor .body .iter() - .position(|s| matches!(s, Stmt::Expr(Expr::SuperCall(_)))); + .position(|s| matches!(s, Stmt::Expr(Expr::SuperCall(_) | Expr::SuperCallSpread(_)))); let insert_at = super_pos.map(|p| p + 1).unwrap_or(0); for (i, stmt) in assignment_stmts.into_iter().enumerate() { ctor.body.insert(insert_at + i, stmt); diff --git a/crates/perry-hir/src/lower_decl/class_decl.rs b/crates/perry-hir/src/lower_decl/class_decl.rs index 4d319bb5f6..84a68be62f 100644 --- a/crates/perry-hir/src/lower_decl/class_decl.rs +++ b/crates/perry-hir/src/lower_decl/class_decl.rs @@ -1108,6 +1108,7 @@ pub fn lower_class_decl( &mut setters, &mut computed_members, &mut constructor, + &mut static_methods, ); // Phase 4.1: register each method's and getter's return type so @@ -1594,6 +1595,7 @@ pub fn lower_class_from_ast( &mut setters, &mut computed_members, &mut constructor, + &mut static_methods, ); Ok(Class { diff --git a/crates/perry-hir/src/lower_decl/class_members.rs b/crates/perry-hir/src/lower_decl/class_members.rs index 4614ef7627..c31b55faa8 100644 --- a/crates/perry-hir/src/lower_decl/class_members.rs +++ b/crates/perry-hir/src/lower_decl/class_members.rs @@ -174,7 +174,7 @@ pub fn lower_constructor( // touch only params (not `this`), so they stay at the very top. if let Some(super_pos) = body .iter() - .position(|s| matches!(s, Stmt::Expr(Expr::SuperCall(_)))) + .position(|s| matches!(s, Stmt::Expr(Expr::SuperCall(_) | Expr::SuperCallSpread(_)))) { let tail = body.split_off(super_pos + 1); body.extend(assignments); diff --git a/crates/perry-hir/src/stable_hash/expr.rs b/crates/perry-hir/src/stable_hash/expr.rs index a981866c88..aa32ecf915 100644 --- a/crates/perry-hir/src/stable_hash/expr.rs +++ b/crates/perry-hir/src/stable_hash/expr.rs @@ -50,6 +50,7 @@ impl SH for Expr { Expr::Logical { op, left, right } => { tag(h, 17); op.hash(h); left.as_ref().hash(h); right.as_ref().hash(h); } Expr::Call { callee, args, type_args, } => { tag(h, 18); callee.as_ref().hash(h); args.hash(h); type_args.hash(h); } Expr::CallSpread { callee, args, type_args, } => { tag(h, 19); callee.as_ref().hash(h); args.hash(h); type_args.hash(h); } + Expr::SuperCallSpread(args) => { tag(h, 12240); for a in args { match a { CallArg::Expr(e) | CallArg::Spread(e) => e.hash(h), } } } Expr::PodLayoutSizeOf { ty } => { tag(h, 12001); ty.hash(h); } Expr::PodLayoutAlignOf { ty } => { tag(h, 12002); ty.hash(h); } Expr::PodLayoutOffsetOf { ty, field_path } => { tag(h, 12003); ty.hash(h); field_path.hash(h); } @@ -628,6 +629,7 @@ impl SH for Expr { Expr::TemplateRaw(e) => { tag(h, 446); e.as_ref().hash(h); } Expr::RegisterClassParentDynamic { class_name, parent_expr, } => { tag(h, 447); class_name.hash(h); parent_expr.as_ref().hash(h); } Expr::RegisterClassCaptures { class_name, captures } => { tag(h, 12238); class_name.hash(h); for c in captures { c.hash(h); } } + Expr::ClassCaptureValue { class_name, index } => { tag(h, 12239); class_name.hash(h); index.hash(h); } Expr::RegisterClassStaticSymbol { class_name, key_expr, value_expr, } => { tag(h, 12025); class_name.hash(h); key_expr.as_ref().hash(h); value_expr.as_ref().hash(h); } Expr::RegisterClassComputedMethod { class_name, key_expr, method_name, is_static, param_count, has_rest } => { tag(h, 12233); class_name.hash(h); key_expr.as_ref().hash(h); method_name.hash(h); is_static.hash(h); param_count.hash(h); has_rest.hash(h); } Expr::RegisterClassComputedAccessor { class_name, key_expr, getter_name, setter_name, is_static } => { tag(h, 12234); class_name.hash(h); key_expr.as_ref().hash(h); getter_name.hash(h); setter_name.hash(h); is_static.hash(h); } diff --git a/crates/perry-hir/src/walker/expr_mut.rs b/crates/perry-hir/src/walker/expr_mut.rs index b49762e413..a3c313c0e2 100644 --- a/crates/perry-hir/src/walker/expr_mut.rs +++ b/crates/perry-hir/src/walker/expr_mut.rs @@ -584,6 +584,7 @@ where f(c); } } + Expr::ClassCaptureValue { .. } => {} Expr::RegisterClassStaticSymbol { key_expr, value_expr, @@ -858,6 +859,13 @@ where } } } + Expr::SuperCallSpread(args) => { + for a in args { + match a { + CallArg::Expr(e) | CallArg::Spread(e) => f(e), + } + } + } Expr::ArraySpread(elements) => { for el in elements { match el { diff --git a/crates/perry-hir/src/walker/expr_ref.rs b/crates/perry-hir/src/walker/expr_ref.rs index e7ea54ea25..038abcffcb 100644 --- a/crates/perry-hir/src/walker/expr_ref.rs +++ b/crates/perry-hir/src/walker/expr_ref.rs @@ -585,6 +585,7 @@ where f(c); } } + Expr::ClassCaptureValue { .. } => {} Expr::RegisterClassStaticSymbol { key_expr, value_expr, @@ -855,6 +856,13 @@ where } } } + Expr::SuperCallSpread(args) => { + for a in args { + match a { + CallArg::Expr(e) | CallArg::Spread(e) => f(e), + } + } + } Expr::ArraySpread(elements) => { for el in elements { match el { diff --git a/crates/perry-runtime/src/object/class_constructors.rs b/crates/perry-runtime/src/object/class_constructors.rs index 2106b2590c..976ad72940 100644 --- a/crates/perry-runtime/src/object/class_constructors.rs +++ b/crates/perry-runtime/src/object/class_constructors.rs @@ -128,6 +128,155 @@ fn class_capture_values(class_id: u32) -> Option> { CLASS_CAPTURE_VALUES.with(|m| m.borrow().get(&class_id).cloned()) } +/// Codegen FFI: read one slot of a class's decl-site capture snapshot — +/// STATIC method prologue rebinds (statics have no instance to carry the +/// `__perry_cap_*` fields). Absent snapshot/slot reads `undefined`. +#[no_mangle] +pub extern "C" fn js_class_capture_value(class_id: u32, index: u32) -> f64 { + CLASS_CAPTURE_VALUES.with(|m| { + m.borrow() + .get(&class_id) + .and_then(|v| v.get(index as usize).copied()) + .map(f64::from_bits) + .unwrap_or(f64::from_bits(crate::value::TAG_UNDEFINED)) + }) +} + +/// Keepalive anchor (generated-code-only callee). +#[used] +static KEEP_JS_CLASS_CAPTURE_VALUE: extern "C" fn(u32, u32) -> f64 = js_class_capture_value; + +/// `super(...spread)` — invoke the closest registered ancestor constructor +/// of `child_cid` on the EXISTING `this`, with args from the materialized +/// `args_array` (dynamic count; the inline-super path needs a static arg +/// list). The ancestor's trailing `__perry_cap_*` params are filled from +/// its decl-site snapshot, mirroring `replay_registered_class_constructor`. +/// +/// # Safety +/// `this_value`/`args_array` must be valid NaN-boxed heap pointers. +#[no_mangle] +pub unsafe extern "C" fn js_super_construct_apply( + child_cid: u32, + this_value: f64, + args_array: f64, +) -> f64 { + let undef = f64::from_bits(crate::value::TAG_UNDEFINED); + let this_raw = (this_value.to_bits() & crate::value::POINTER_MASK) as i64; + if std::env::var_os("PERRY_SUPER_DEBUG").is_some() { + eprintln!( + "super_apply child={} this_bits={:#x} args_bits={:#x}", + child_cid, + this_value.to_bits(), + args_array.to_bits() + ); + } + if this_raw == 0 { + return undef; + } + let arr = + (args_array.to_bits() & crate::value::POINTER_MASK) as *const crate::array::ArrayHeader; + let mut cur = crate::object::get_parent_class_id(child_cid).unwrap_or(0); + let mut depth = 0usize; + while cur != 0 && depth < 64 { + if let Some((ctor_ptr, total_params)) = lookup_class_constructor(cur) { + if std::env::var_os("PERRY_SUPER_DEBUG").is_some() { + eprintln!("super_apply resolved ancestor cid={} total={}", cur, total_params); + } + let caps = class_capture_values(cur).unwrap_or_default(); + let user_params = (total_params as usize).saturating_sub(caps.len()); + let n = if arr.is_null() { + 0 + } else { + crate::array::js_array_length(arr) + } as usize; + let mut final_args: Vec = Vec::with_capacity(total_params as usize); + for i in 0..user_params { + if i < n { + final_args.push(crate::array::js_array_get_f64(arr, i as u32)); + } else { + final_args.push(undef); + } + } + for bits in &caps { + final_args.push(f64::from_bits(*bits)); + } + let _ = call_vtable_method( + ctor_ptr, + this_raw, + final_args.as_ptr(), + final_args.len(), + total_params, + false, + false, + ); + return undef; + } + let next = crate::object::get_parent_class_id(cur).unwrap_or(0); + if next == cur { + break; + } + cur = next; + depth += 1; + } + undef +} + +/// Keepalive anchor (generated-code-only callee). +#[used] +static KEEP_JS_SUPER_CONSTRUCT_APPLY: unsafe extern "C" fn(u32, f64, f64) -> f64 = + js_super_construct_apply; + +/// Append the spread of `value` to `target` (array handle), handling BOTH +/// real arrays AND array-likes (Perry's `arguments` object is an +/// ObjectHeader with "0".."n-1" + "length" props — `super(...arguments)` +/// spreads it). Returns the (possibly reallocated) target handle. +/// +/// # Safety +/// `target` must be a valid ArrayHeader pointer. +#[no_mangle] +pub unsafe extern "C" fn js_array_push_spread_any( + target: *mut crate::array::ArrayHeader, + value: f64, +) -> *mut crate::array::ArrayHeader { + let jv = crate::value::JSValue::from_bits(value.to_bits()); + if !jv.is_pointer() && !jv.is_string() { + return target; + } + let raw = (value.to_bits() & crate::value::POINTER_MASK) as *const u8; + if raw.is_null() { + return target; + } + // Real array → bulk append. + let as_arr = crate::array::clean_arr_ptr(raw as *const crate::array::ArrayHeader); + if !as_arr.is_null() { + return crate::array::js_array_push_spread_f64(target, as_arr); + } + // Array-like object (arguments): read `length`, copy indexed props. + let obj = raw as *const ObjectHeader; + let len_key = crate::string::js_string_from_bytes(b"length".as_ptr(), 6); + let len_v = crate::object::js_object_get_field_by_name(obj, len_key); + let len_f = f64::from_bits(len_v.bits()); + if !len_f.is_finite() || len_f < 0.0 { + return target; + } + let n = len_f as u32; + let mut cur = target; + for i in 0..n { + let idx = i.to_string(); + let key = crate::string::js_string_from_bytes(idx.as_ptr(), idx.len() as u32); + let v = crate::object::js_object_get_field_by_name(obj, key); + cur = crate::array::js_array_push_f64(cur, f64::from_bits(v.bits())); + } + cur +} + +/// Keepalive anchor (generated-code-only callee). +#[used] +static KEEP_JS_ARRAY_PUSH_SPREAD_ANY: unsafe extern "C" fn( + *mut crate::array::ArrayHeader, + f64, +) -> *mut crate::array::ArrayHeader = js_array_push_spread_any; + /// #1787: replay a class expression's constructor on a freshly-allocated /// instance. `classobj_value` is the NaN-boxed heap class object the `new` /// callee resolved to; `class_cid` is its (template) class_id; `inst` is the diff --git a/crates/perry-runtime/src/object/reflect_support.rs b/crates/perry-runtime/src/object/reflect_support.rs index d6a3f6b08e..dbf2f32c85 100644 --- a/crates/perry-runtime/src/object/reflect_support.rs +++ b/crates/perry-runtime/src/object/reflect_support.rs @@ -101,7 +101,8 @@ pub(crate) fn obj_value_has_own_key(value: f64, key: f64) -> bool { super::native_module::read_native_module_name(obj), key_to_rust_string(key), ) { - if super::native_module::native_module_has_enumerable_key(&module_name, &key_name) { + if super::native_module::native_module_has_enumerable_key(&module_name, &key_name) + { return true; } } diff --git a/crates/perry/src/commands/compile/cjs_wrap/detect.rs b/crates/perry/src/commands/compile/cjs_wrap/detect.rs index 091abc6fc5..b568a30c3c 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/detect.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/detect.rs @@ -207,7 +207,10 @@ fn strip_comments_and_strings(source: &str) -> String { State::Str(quote) => { if bytes[i] == b'\\' { i += 2; - } else if quote == b'`' && bytes[i] == b'$' && bytes.get(i + 1) == Some(&b'{') { + } else if quote == b'`' + && bytes[i] == b'$' + && bytes.get(i + 1) == Some(&b'{') + { // `${` — interpolation body is code (and may nest). template_interp_depth.push(0); state = State::Code; diff --git a/crates/perry/src/commands/compile/cjs_wrap/mod.rs b/crates/perry/src/commands/compile/cjs_wrap/mod.rs index c19a4cface..b1a39ed604 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/mod.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/mod.rs @@ -107,10 +107,7 @@ if (process.env.NEXT_RUNTIME !== 'edge') { require('next/dist/server/node-environment'); } "#; - assert!( - is_commonjs(src), - "comment text must not defeat require( arm" - ); + assert!(is_commonjs(src), "comment text must not defeat require( arm"); } #[test] @@ -119,10 +116,7 @@ if (process.env.NEXT_RUNTIME !== 'edge') { // literal whose column-0 `import path from 'node:path'` line must // not flip this CJS file to the ESM pipeline. let src = "\"use strict\";\nObject.defineProperty(exports, \"__esModule\", { value: true });\nexports.write = function() {\n return `performance.mark('next-start');\nimport path from 'node:path'\nimport module from 'node:module'\n`;\n};\n"; - assert!( - is_commonjs(src), - "template-literal import must not defeat CJS detection" - ); + assert!(is_commonjs(src), "template-literal import must not defeat CJS detection"); } #[test] @@ -143,10 +137,7 @@ if (process.env.NEXT_RUNTIME !== 'edge') { // followed by the real `module.exports=` tail. The stripper must // track regex literals or the tail is masked as string content. let src = "const e = s.split(/['\"]/);\nvar i = make();\nmodule.exports = i;\n"; - assert!( - is_commonjs(src), - "regex with quote must not hide module.exports" - ); + assert!(is_commonjs(src), "regex with quote must not hide module.exports"); } #[test] From d85996089c29373e1a3f6b0821efe8a55fe30950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 01:54:49 +0200 Subject: [PATCH 04/15] fix(codegen): pack rest/arguments params on the ctor symbol-call path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recursion-guarded standalone-ctor call path (call_local_constructor_symbol — `new Kid(...)` issued inside a method of Kid) passed lowered args RAW/positionally, but a rest param or the synthesized `arguments` param expects the caller to pack an array (the inline path's inline_constructor_param_values rules). The user arg landed raw in the rest slot, `super(...arguments)` spread an object with no .length, and the parent ctor ran with zero args — second-generation zod instances (`_addCheck` → `new ZodNumber({…})`) lost `_def`. Reuse inline_constructor_param_values with the effective ctor's params (own, else closest ancestor's — matching the adopted standalone-symbol signature). Cross-module ctors (arity only) keep prior behavior. Also reverts the SuperCallSpread inline-parent experiment to the runtime registry apply (the experiment segfaulted on rest-param forwarding; with this packing fix the registry path is correct in all contexts — verified across 14 t_sp*/t_ns* shapes incl. the previously-failing in-method and rest-param ones, plus zod number/string/enum/record/union chains). --- crates/perry-codegen/src/lower_call/new.rs | 34 +++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/crates/perry-codegen/src/lower_call/new.rs b/crates/perry-codegen/src/lower_call/new.rs index 1cc01e96eb..9f04c5f165 100644 --- a/crates/perry-codegen/src/lower_call/new.rs +++ b/crates/perry-codegen/src/lower_call/new.rs @@ -383,7 +383,39 @@ fn call_local_constructor_symbol( // from `_addCheck`, where ZodNumber has no own ctor and ZodType does). let param_count = effective_constructor_param_count(ctx, class); let undef_lit = double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)); - let mut ctor_values = lowered_args.to_vec(); + // When the ctor's signature is statically known, build per-param values + // with the SAME packing rules the inline path uses — a rest param or the + // synthesized `arguments` param receives a PACKED ARRAY, not a raw + // positional value. Pre-fix, `new Kid({...})` from a method of Kid (the + // recursion-guarded symbol-call path) shoved the user arg RAW into the + // ctor's synthetic `arguments` slot; `super(...arguments)` then spread + // an object with no `length` and the parent ctor saw zero args + // (vendored zod's `z.number().int()` chain — `_addCheck` → + // `new ZodNumber({…})` → `constructor(){ super(...arguments) }`). + let effective_params: Option> = { + let mut found = class.constructor.as_ref().map(|c| c.params.clone()); + if found.is_none() { + let mut parent = class.extends_name.as_deref().map(|s| s.to_string()); + while let Some(pname) = parent { + match ctx.classes.get(&pname).copied() { + Some(pc) => { + if let Some(pctor) = pc.constructor.as_ref() { + found = Some(pctor.params.clone()); + break; + } + parent = pc.extends_name.as_deref().map(|s| s.to_string()); + } + None => break, + } + } + } + found + }; + let mut ctor_values = if let Some(params) = effective_params { + inline_constructor_param_values(ctx, ¶ms, lowered_args) + } else { + lowered_args.to_vec() + }; ctor_values.truncate(param_count); while ctor_values.len() < param_count { ctor_values.push(undef_lit.clone()); From 24080ecc51767b4f5af57eca3ed773e0b3213689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 02:41:35 +0200 Subject: [PATCH 05/15] fix(hir): append capture args to New sites lowered before class registration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sibling code lowered BEFORE a function-nested capturing class registers its captures — a hoisted function declared above the class (zod's `function createZodEnum(values) { return new ZodEnum({...}) }`) — emitted `new (…)` with NO capture args; the inline binder then misfilled the ctor params (first capture slot ate the user arg) and instances lost both `_def` and their capture fields ("reading 'getParsedType'/'OK' of undefined" at parse time). The function-body end hook (where capture snapshots are re-registered) now walks the lowered body and appends the raw outer capture ids to those sites, descending nested closures and patching their capture lists. Sites lowered AFTER registration already end with exactly the append sequence and are skipped via a tail-match guard. The member-body appender (`append_self_sites`) is refactored onto the same shared `append_new_args_stmt/expr` helpers. Validates: z.enum/optional/object.strict safeParse chains, the full Next.js segment-config schema (seg.js), and the 15-repro t_sp*/t_ns* regression suite. --- crates/perry-hir/src/lower_decl/block.rs | 21 ++ .../src/lower_decl/class_captures.rs | 285 ++++++++++-------- 2 files changed, 181 insertions(+), 125 deletions(-) diff --git a/crates/perry-hir/src/lower_decl/block.rs b/crates/perry-hir/src/lower_decl/block.rs index c1df53cef2..8e88d6fe0f 100644 --- a/crates/perry-hir/src/lower_decl/block.rs +++ b/crates/perry-hir/src/lower_decl/block.rs @@ -247,6 +247,27 @@ pub fn lower_fn_body_block_stmt( if !captured.is_empty() { let captures: Vec = captured.iter().map(|id| Expr::LocalGet(*id)).collect(); + // Sibling code lowered BEFORE this class registered + // its captures (forward refs — zod's + // `function createZodEnum(...) { return new + // ZodEnum({...}) }` declared above the class) has + // `new (…)` sites with NO cap args appended; + // the inline binder then misfills the ctor params. + // Append the raw outer ids now; sites lowered after + // registration already end with exactly these ids + // and are skipped (tail-match guard). Class members + // were handled by `append_self_sites` with remapped + // ids — their tails don't match the raw ids, but + // they ALREADY carry appends; restrict this pass to + // non-member code by walking the lowered body only + // (member bodies live in pending_classes, not here). + let cap_args: Vec<(perry_types::LocalId, perry_types::LocalId)> = + captured.iter().map(|id| (*id, *id)).collect(); + for s in body.iter_mut() { + super::class_captures::append_new_args_stmt( + s, &cname, &cap_args, true, + ); + } re_regs.push(Stmt::Expr(Expr::RegisterClassCaptures { class_name: cname, captures, diff --git a/crates/perry-hir/src/lower_decl/class_captures.rs b/crates/perry-hir/src/lower_decl/class_captures.rs index c04cb737b3..e92e485505 100644 --- a/crates/perry-hir/src/lower_decl/class_captures.rs +++ b/crates/perry-hir/src/lower_decl/class_captures.rs @@ -270,137 +270,14 @@ pub fn synthesize_class_captures( class_name: &str, cap_args: &[(LocalId, LocalId)], ) { - if let Expr::New { - class_name: cn, - args, - .. - } = expr - { - if cn == class_name { - for (_, fresh) in cap_args { - args.push(Expr::LocalGet(*fresh)); - } - } - } - if let Expr::Closure { body, captures, .. } = expr { - for stmt in body.iter_mut() { - append_self_new_args_stmt(stmt, class_name, cap_args); - } - // The appended LocalGet reads the enclosing method's rebind - // slot — make sure the closure captures it. - let mut refs = Vec::new(); - let mut visited = std::collections::HashSet::new(); - for stmt in body.iter() { - crate::analysis::collect_local_refs_stmt(stmt, &mut refs, &mut visited); - } - for (_, fresh) in cap_args { - if refs.contains(fresh) && !captures.contains(fresh) { - captures.push(*fresh); - } - } - return; - } - crate::walker::walk_expr_children_mut(expr, &mut |child| { - append_self_new_args_expr(child, class_name, cap_args) - }); + append_new_args_expr(expr, class_name, cap_args, false) } fn append_self_new_args_stmt( stmt: &mut Stmt, class_name: &str, cap_args: &[(LocalId, LocalId)], ) { - match stmt { - Stmt::Let { init, .. } => { - if let Some(e) = init { - append_self_new_args_expr(e, class_name, cap_args); - } - } - Stmt::Expr(e) | Stmt::Throw(e) => append_self_new_args_expr(e, class_name, cap_args), - Stmt::Return(opt) => { - if let Some(e) = opt { - append_self_new_args_expr(e, class_name, cap_args); - } - } - Stmt::If { - condition, - then_branch, - else_branch, - } => { - append_self_new_args_expr(condition, class_name, cap_args); - for s in then_branch { - append_self_new_args_stmt(s, class_name, cap_args); - } - if let Some(eb) = else_branch { - for s in eb { - append_self_new_args_stmt(s, class_name, cap_args); - } - } - } - Stmt::While { condition, body } | Stmt::DoWhile { body, condition } => { - append_self_new_args_expr(condition, class_name, cap_args); - for s in body { - append_self_new_args_stmt(s, class_name, cap_args); - } - } - Stmt::For { - init, - condition, - update, - body, - } => { - if let Some(s) = init { - append_self_new_args_stmt(s, class_name, cap_args); - } - if let Some(e) = condition { - append_self_new_args_expr(e, class_name, cap_args); - } - if let Some(e) = update { - append_self_new_args_expr(e, class_name, cap_args); - } - for s in body { - append_self_new_args_stmt(s, class_name, cap_args); - } - } - Stmt::Labeled { body, .. } => append_self_new_args_stmt(body, class_name, cap_args), - Stmt::Try { - body, - catch, - finally, - } => { - for s in body { - append_self_new_args_stmt(s, class_name, cap_args); - } - if let Some(c) = catch { - for s in &mut c.body { - append_self_new_args_stmt(s, class_name, cap_args); - } - } - if let Some(fb) = finally { - for s in fb { - append_self_new_args_stmt(s, class_name, cap_args); - } - } - } - Stmt::Switch { - discriminant, - cases, - } => { - append_self_new_args_expr(discriminant, class_name, cap_args); - for c in cases { - if let Some(t) = &mut c.test { - append_self_new_args_expr(t, class_name, cap_args); - } - for s in &mut c.body { - append_self_new_args_stmt(s, class_name, cap_args); - } - } - } - Stmt::Break - | Stmt::Continue - | Stmt::LabeledBreak(_) - | Stmt::LabeledContinue(_) - | Stmt::PreallocateBoxes(_) => {} - } + append_new_args_stmt(stmt, class_name, cap_args, false) } // 2. Methods / getters / setters. After each body's capture rebind, @@ -616,3 +493,161 @@ pub fn synthesize_class_captures( // construction site. ctx.register_class_captures(name.to_string(), captures_vec); } + +/// Append `cap_args` (the `.1` ids) to every `new (…)` site in +/// `expr`, descending nested closures (patching their capture lists when the +/// appended id is otherwise unreferenced). With `skip_if_present`, a site +/// whose args already END with exactly the `.1` id sequence is left alone — +/// used by the post-body pass, where sites lowered AFTER the class +/// registered already carry the appends. +pub(crate) fn append_new_args_expr( + expr: &mut Expr, + class_name: &str, + cap_args: &[(LocalId, LocalId)], + skip_if_present: bool, +) { + if let Expr::New { + class_name: cn, + args, + .. + } = expr + { + if cn == class_name { + let already = skip_if_present + && args.len() >= cap_args.len() + && args[args.len() - cap_args.len()..] + .iter() + .zip(cap_args.iter()) + .all(|(a, (_, fresh))| matches!(a, Expr::LocalGet(id) if id == fresh)); + if !already { + for (_, fresh) in cap_args { + args.push(Expr::LocalGet(*fresh)); + } + } + } + } + if let Expr::Closure { body, captures, .. } = expr { + for stmt in body.iter_mut() { + append_new_args_stmt(stmt, class_name, cap_args, skip_if_present); + } + let mut refs = Vec::new(); + let mut visited = std::collections::HashSet::new(); + for stmt in body.iter() { + crate::analysis::collect_local_refs_stmt(stmt, &mut refs, &mut visited); + } + for (_, fresh) in cap_args { + if refs.contains(fresh) && !captures.contains(fresh) { + captures.push(*fresh); + } + } + return; + } + crate::walker::walk_expr_children_mut(expr, &mut |child| { + append_new_args_expr(child, class_name, cap_args, skip_if_present) + }); +} + +/// Statement-level driver for [`append_new_args_expr`]. +pub(crate) fn append_new_args_stmt( + stmt: &mut Stmt, + class_name: &str, + cap_args: &[(LocalId, LocalId)], + skip_if_present: bool, +) { + match stmt { + Stmt::Let { init, .. } => { + if let Some(e) = init { + append_new_args_expr(e, class_name, cap_args, skip_if_present); + } + } + Stmt::Expr(e) | Stmt::Throw(e) => { + append_new_args_expr(e, class_name, cap_args, skip_if_present) + } + Stmt::Return(opt) => { + if let Some(e) = opt { + append_new_args_expr(e, class_name, cap_args, skip_if_present); + } + } + Stmt::If { + condition, + then_branch, + else_branch, + } => { + append_new_args_expr(condition, class_name, cap_args, skip_if_present); + for s in then_branch { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + if let Some(eb) = else_branch { + for s in eb { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + } + Stmt::While { condition, body } | Stmt::DoWhile { body, condition } => { + append_new_args_expr(condition, class_name, cap_args, skip_if_present); + for s in body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + Stmt::For { + init, + condition, + update, + body, + } => { + if let Some(s) = init { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + if let Some(e) = condition { + append_new_args_expr(e, class_name, cap_args, skip_if_present); + } + if let Some(e) = update { + append_new_args_expr(e, class_name, cap_args, skip_if_present); + } + for s in body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + Stmt::Labeled { body, .. } => { + append_new_args_stmt(body, class_name, cap_args, skip_if_present) + } + Stmt::Try { + body, + catch, + finally, + } => { + for s in body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + if let Some(c) = catch { + for s in &mut c.body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + if let Some(fb) = finally { + for s in fb { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + } + Stmt::Switch { + discriminant, + cases, + } => { + append_new_args_expr(discriminant, class_name, cap_args, skip_if_present); + for c in cases { + if let Some(t) = &mut c.test { + append_new_args_expr(t, class_name, cap_args, skip_if_present); + } + for s in &mut c.body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + } + Stmt::Break + | Stmt::Continue + | Stmt::LabeledBreak(_) + | Stmt::LabeledContinue(_) + | Stmt::PreallocateBoxes(_) => {} + } +} From 9e05cdb81601b380ea2ef7e93551a5fad973c67c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 02:52:04 +0200 Subject: [PATCH 06/15] fix(cjs_wrap): require() of an unresolvable specifier throws MODULE_NOT_FOUND MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wrap adopted EVERY static require by hoisting an import — including unresolvable ones (`require('@opentelemetry/api')` where only Next's vendored copy exists). The hoisted binding lowers to the boolean TRUE sentinel at runtime, and the shim returned it, defeating the ubiquitous try/require-fallback pattern: Node throws MODULE_NOT_FOUND and the catch loads `next/dist/compiled/@opentelemetry/api`; the shim handed back `true` and the catch never ran ("(boolean).createContextKey is not a function" at Next.js boot). Guard each adopted shim entry: a boolean binding means the module never resolved — throw the spec ERR MODULE_NOT_FOUND instead, so the catch arm loads the fallback. (Caveat documented: a real module default-exporting a boolean would mis-trip; no such package shape observed.) --- .../src/commands/compile/cjs_wrap/wrap.rs | 80 ++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/crates/perry/src/commands/compile/cjs_wrap/wrap.rs b/crates/perry/src/commands/compile/cjs_wrap/wrap.rs index 31d1c470a7..95947c7af0 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/wrap.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/wrap.rs @@ -182,12 +182,90 @@ pub(in crate::commands::compile) fn wrap_commonjs_for_target( .collect::>() .join("\n"); + // An UNRESOLVABLE adopted specifier (`require('@opentelemetry/api')` + // with only Next's vendored copy on disk) leaves its hoisted import + // binding as the boolean TRUE sentinel at runtime. Returning that from + // the shim defeats the ubiquitous try/require-fallback pattern — Node + // throws MODULE_NOT_FOUND and the catch loads the vendored copy, but + // the shim handed back `true` and the catch never ran. Guard such an + // entry with a throw — but ONLY when a call site of that specifier + // sits inside a `try` block: a BARE top-level require of a pruned + // build-only module (`require('next/dist/compiled/browserslist')` in + // get-supported-browsers.js) must keep the silent sentinel, because + // Perry initializes every collected module eagerly while Node never + // loads that file at all — a throw there kills startup. (A real module + // default-exporting a boolean would mis-trip the guard; no such + // package shape has been observed.) let require_cases = require_specs .iter() .zip(import_local_names.iter()) - .map(|(spec, local)| format!(" if (specifier === '{}') return {};", spec, local)) + .map(|(spec, local)| { + if require_site_in_try(source, spec) { + format!( + " if (specifier === '{spec}') {{ if (typeof {local} === 'boolean') \ + throw __perry_cjs_require_error('error', 'MODULE_NOT_FOUND', \ + \"Cannot find module '{spec}'\"); return {local}; }}" + ) + } else { + format!(" if (specifier === '{}') return {};", spec, local) + } + }) .collect::>() .join("\n"); + // Heuristic: is any `require('')` call site lexically inside a + // `try { … }` block? Reverse brace-depth scan from the call offset to + // the nearest unmatched `{`, checking whether `try` precedes it. + // String/comment contexts are not stripped — a false positive only + // turns the silent sentinel into a (more Node-faithful) throw. + fn require_site_in_try(source: &str, spec: &str) -> bool { + let needle_sq = format!("require('{}')", spec); + let needle_dq = format!("require(\"{}\")", spec); + let bytes = source.as_bytes(); + let mut search = 0usize; + loop { + let hit = source[search..] + .find(&needle_sq) + .or_else(|| source[search..].find(&needle_dq)); + let Some(rel) = hit else { return false }; + let at = search + rel; + // Walk backwards to the nearest unmatched `{`, repeatedly: each + // enclosing block is checked for a preceding `try`. + let mut depth = 0i32; + let mut i = at; + while i > 0 { + i -= 1; + match bytes[i] { + b'}' => depth += 1, + b'{' => { + if depth > 0 { + depth -= 1; + } else { + // Enclosing block opener — does `try` precede it? + let mut j = i; + while j > 0 + && (bytes[j - 1] == b' ' + || bytes[j - 1] == b'\t' + || bytes[j - 1] == b'\r' + || bytes[j - 1] == b'\n') + { + j -= 1; + } + if j >= 3 + && &bytes[j - 3..j] == b"try" + && (j == 3 || !bytes[j - 4].is_ascii_alphanumeric()) + { + return true; + } + // Keep walking outward (this block wasn't a try). + } + } + _ => {} + } + } + search = at + 1; + } + } + let require_resolve_cases = require_specs .iter() .map(|spec| format!(" if (specifier === '{}') return '{}';", spec, spec)) From e09e9f158ac89231e0c69dd1b07efc8785d73916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 06:50:31 +0200 Subject: [PATCH 07/15] =?UTF-8?q?fix(compile,runtime):=20wall=20#24/#25=20?= =?UTF-8?q?=E2=80=94=20webpack=20inner-module=20class=20interop=20(p-queue?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit next/dist/compiled/p-queue compile-validation wall. Two independent roots, both in the webpack/ncc inner-module shape where a class is declared inside a nested arrow `(e, t, n) => { class TimeoutError extends Error {…}; e.exports = …; e.exports.TimeoutError = TimeoutError; }`. 1. extract_exports: the `exports.X = …` named-export regex matched `e.exports.TimeoutError = …` (a property write on an INNER module's own exports param). That false positive made the wrap emit `export const TimeoutError = _cjs.TimeoutError;` at outer-module scope, which shadowed the inner class binding so every inner reference read undefined. Exclude `.` from the boundary class so only true `(module.)exports.X` matches. 2. js_fetch_or_value_super: `class PQ extends t {}` nested in a function lowers its heritage Ident at class-DECL scope, but codegen re-emits the expression inside the constructor where the captured slot is unrelated, so `parent_val` arrives stale (undefined → "is not a constructor"). The decl-site `js_register_class_parent_dynamic` DID see the live closure and recorded it in CLASS_PARENT_CLOSURES; when `parent_val` isn't callable, walk the instance's parent chain (new `parent_closure_in_chain`) and dispatch to the registered parent closure with `this` bound so the parent body (`this._events = …`) runs. 3. replay_class_object_constructor: a class DECLARATION reached as a heap class object (webpack `t["default"] = PQueue` read cross-module) carries no per-evaluation `__perry_ctor_caps`; fall back to the decl-site CLASS_CAPTURE_VALUES snapshot for the trailing `__perry_cap_*` ctor params, mirroring the ClassRef replay path. Also a descriptive " is not a constructor" message. p-queue repro chain (main/mini2/mini3/t_fnparent2) now matches Node; zod capture suite + t_sp*/t_ns* regression set unchanged. --- .../src/object/class_constructors.rs | 18 ++++++++- .../src/object/class_registry.rs | 25 +++++++++++- .../perry-runtime/src/object/global_this.rs | 39 ++++++++++++++++++- .../compile/cjs_wrap/extract_exports.rs | 9 ++++- .../src/commands/compile/cjs_wrap/mod.rs | 28 +++++++++++++ 5 files changed, 115 insertions(+), 4 deletions(-) diff --git a/crates/perry-runtime/src/object/class_constructors.rs b/crates/perry-runtime/src/object/class_constructors.rs index 976ad72940..099088cdb9 100644 --- a/crates/perry-runtime/src/object/class_constructors.rs +++ b/crates/perry-runtime/src/object/class_constructors.rs @@ -317,7 +317,20 @@ pub(crate) unsafe fn replay_class_object_constructor( (std::ptr::null(), 0) }; - let user_params = (total_params as usize).saturating_sub(n_caps as usize); + // A class DECLARATION reached as a heap class object (webpack interop: + // `t["default"] = PQueue` read back cross-module) has no per-evaluation + // `__perry_ctor_caps` array — fall back to the decl-site snapshot + // (CLASS_CAPTURE_VALUES), exactly like the ClassRef replay path. Without + // this, the trailing `__perry_cap_*` ctor params read the USER args + // (p-queue's `new PQueue({...})` left `i.default` undefined and + // `new e.queueClass` threw "undefined is not a constructor"). + let snapshot_caps: Vec = if n_caps == 0 { + class_capture_values(class_cid).unwrap_or_default() + } else { + Vec::new() + }; + let effective_caps = (n_caps as usize).max(snapshot_caps.len()); + let user_params = (total_params as usize).saturating_sub(effective_caps); let undef = f64::from_bits(crate::value::TAG_UNDEFINED); let mut final_args: Vec = Vec::with_capacity(total_params as usize); for i in 0..user_params { @@ -330,6 +343,9 @@ pub(crate) unsafe fn replay_class_object_constructor( for j in 0..n_caps { final_args.push(crate::array::js_array_get_f64(caps_arr, j)); } + for bits in &snapshot_caps { + final_args.push(f64::from_bits(*bits)); + } let _ = call_vtable_method( ctor_ptr, inst as i64, diff --git a/crates/perry-runtime/src/object/class_registry.rs b/crates/perry-runtime/src/object/class_registry.rs index ade80c3b8c..21858e5d65 100644 --- a/crates/perry-runtime/src/object/class_registry.rs +++ b/crates/perry-runtime/src/object/class_registry.rs @@ -311,6 +311,28 @@ pub(crate) fn class_parent_closure(class_id: u32) -> Option { .and_then(|g| g.as_ref().and_then(|m| m.get(&class_id).copied())) } +/// Walk the class parent chain looking for a registered parent-closure edge. +/// `super()` dispatch needs this because the instance's class_id is the +/// MOST-DERIVED class, while the closure-parent edge is keyed by the class +/// that directly `extends ` — possibly an ancestor. +pub(crate) fn parent_closure_in_chain(class_id: u32) -> Option { + let mut cid = class_id; + let mut depth = 0u32; + while depth < 32 && cid != 0 { + if let Some(addr) = class_parent_closure(cid) { + return Some(addr); + } + match get_parent_class_id(cid) { + Some(p) if p != 0 && p != cid => { + cid = p; + depth += 1; + } + _ => break, + } + } + None +} + /// Reverse lookup: which declared class's `.prototype` is this heap object? /// Used by `Object.getOwnPropertyDescriptor(C.prototype, name)` to surface /// vtable accessors as own properties of the prototype object. Linear scan — @@ -1614,7 +1636,8 @@ pub unsafe extern "C" fn js_new_function_construct( || jv.is_any_string() || jv.is_bigint() { - super::object_ops::throw_object_type_error(b"is not a constructor"); + let desc = unsafe { super::object_ops::describe_value_for_type_error(func_value) }; + super::object_ops::throw_object_type_error_with_suffix(&format!("{desc} "), "is not a constructor"); } } // `new (new String(""))` / `new (new Number(1))` — a boxed primitive WRAPPER diff --git a/crates/perry-runtime/src/object/global_this.rs b/crates/perry-runtime/src/object/global_this.rs index 602602108e..1b60da846d 100644 --- a/crates/perry-runtime/src/object/global_this.rs +++ b/crates/perry-runtime/src/object/global_this.rs @@ -460,8 +460,45 @@ pub unsafe extern "C" fn js_fetch_or_value_super( undef } _ => { + // `class PQ extends t {}` nested inside another function (webpack/ + // ncc inner modules — next/dist/compiled/p-queue extending + // eventemitter3): HIR lowers the heritage Ident at class-DECL + // scope, but codegen re-emits that expression inside the + // constructor, where the captured slot index is unrelated, so + // `parent_val` arrives stale (undefined). The decl-site + // `js_register_class_parent_dynamic` call DID see the live value + // and recorded it in CLASS_PARENT_CLOSURES — prefer that + // registration whenever `parent_val` isn't actually callable, so + // the parent function body still runs with `this` bound (sets + // `this._events` etc.). A valid closure / class-object parent + // value keeps the existing direct-dispatch path untouched. + let mut callee = parent_val; + let bits = parent_val.to_bits(); + const POINTER_TAG: u64 = 0x7FFD_0000_0000_0000; + const TAG_MASK: u64 = 0xFFFF_0000_0000_0000; + const PTR_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; + let usable = if bits & TAG_MASK == POINTER_TAG { + let p = (bits & PTR_MASK) as usize; + crate::closure::is_closure_ptr(p) + || crate::object::js_object_get_class_id( + p as *const crate::object::ObjectHeader, + ) != 0 + } else { + // INT32-tagged ClassRefs route through the static super paths + // before reaching here; anything else (undefined / a stale + // numeric slot) is not a constructor. + bits & TAG_MASK == 0x7FFE_0000_0000_0000 + }; + if !usable { + if let Some(obj) = subclass_this_object_ptr(this_box) { + let cid = crate::object::js_object_get_class_id(obj); + if let Some(addr) = super::class_registry::parent_closure_in_chain(cid) { + callee = f64::from_bits(POINTER_TAG | addr as u64); + } + } + } let prev = crate::object::js_implicit_this_set(this_box); - let r = crate::closure::js_native_call_value(parent_val, args_ptr, args_len); + let r = crate::closure::js_native_call_value(callee, args_ptr, args_len); crate::object::js_implicit_this_set(prev); r } diff --git a/crates/perry/src/commands/compile/cjs_wrap/extract_exports.rs b/crates/perry/src/commands/compile/cjs_wrap/extract_exports.rs index 77d9ed8e9c..8b77e578a0 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/extract_exports.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/extract_exports.rs @@ -355,8 +355,15 @@ pub fn extract_exports_from_source(source: &str) -> Vec { }; // Shape 1: `exports.X = ...` / `module.exports.X = ...` + // The boundary class excludes `.` so `e.exports.X = …` (a property write on + // some OTHER object — e.g. a webpack/ncc inner module's own exports param, + // as in next/dist/compiled/p-queue's `e.exports.TimeoutError = TimeoutError`) + // is NOT mistaken for a named export of the outer bundle. A false positive + // here makes the wrap emit `export const X = _cjs.X;` at module scope, + // which shadows the inner binding of the same name during lowering and + // turns every inner reference to it into `undefined`. let dot_re = regex::Regex::new( - r"(?:^|[^A-Za-z0-9_$])(?:module\.)?exports\.([A-Za-z_$][A-Za-z0-9_$]*)\s*=", + r"(?:^|[^A-Za-z0-9_$.])(?:module\.)?exports\.([A-Za-z_$][A-Za-z0-9_$]*)\s*=", ) .unwrap(); for cap in dot_re.captures_iter(source) { diff --git a/crates/perry/src/commands/compile/cjs_wrap/mod.rs b/crates/perry/src/commands/compile/cjs_wrap/mod.rs index b1a39ed604..25b30b8909 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/mod.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/mod.rs @@ -1256,6 +1256,34 @@ module.exports = SafeBuffer;"#; assert!(names.contains(&"version".to_string())); } + #[test] + fn extract_exports_skips_inner_module_exports_param() { + // next/dist/compiled/p-queue: webpack/ncc inner modules write to their + // OWN exports object (`e.exports.X = …`), which is not a named export + // of the outer bundle. Pre-fix the dot-boundary regex matched it, the + // wrap emitted `export const TimeoutError = _cjs.TimeoutError;` at + // module scope, and that const shadowed the inner class binding — + // every inner reference to `TimeoutError` became undefined. + let src = "var mods = { 816: (e, t, n) => {\n\ + class TimeoutError extends Error {}\n\ + const pTimeout = (p) => p;\n\ + e.exports = pTimeout;\n\ + e.exports.str = 'hello';\n\ + e.exports.TimeoutError = TimeoutError;\n\ + }};\n\ + exports.real = 1;\n\ + module.exports.alsoReal = 2;\n"; + let names = extract_exports_from_source(src); + assert!( + !names.contains(&"TimeoutError".to_string()), + "`e.exports.X` is an inner module's exports, not ours: {:?}", + names + ); + assert!(!names.contains(&"str".to_string()), "got: {:?}", names); + assert!(names.contains(&"real".to_string())); + assert!(names.contains(&"alsoReal".to_string())); + } + #[test] fn wrap_pino_shape_parses_cleanly() { // Issue #845 — pino sub-bug: end-to-end check that a pino-shaped From 2a5671fb23c70df2163765e4f0ede298553f5501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 07:11:17 +0200 Subject: [PATCH 08/15] fix(hir): hoist generator/async-generator FnDecls inside nested function bodies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wall #26 (Next.js standalone, next/dist/compiled/edge-runtime's `consumeUint8ArrayReadableStream`). A webpack/ncc inner module is a nested arrow `(e, A, t) => { A.gen = gen; ...; async function* gen(){} }` that exports a hoisted generator by forward-referencing it ABOVE the declaration. Perry's function-body hoisting pass (`lower_fn_body_block_stmt`) pre-defined and front-loaded regular/async FnDecls but EXCLUDED generators, which `lower_body_stmt` lowers to a top-level function plus a source-position `Let { init: FuncRef }`. So the forward reference saw an undefined local → `ReferenceError: gen is not defined` at eager module init. Spec function-declaration hoisting applies to generators too. Phase 1 now pre-defines generator FnDecl locals (so `lookup_local` resolves at the forward reference) and Phase 3 recognizes the `FuncRef`-init `Let` (not just `Closure`-init) as hoisted and moves it to the front. The FuncRef value is pure, so reordering it ahead of other statements is safe. Top-level generators were already fine; this only affects generators declared inside another function. Also fixes a stable-hash tag collision introduced with the wall-21 capture variants: `RegisterClassCaptures`/`ClassCaptureValue` had copy-pasted tags 12238/12239 (already used by `SuperPropertySet`/`ObjectSuperPropertySet`), failing `expr_variant_stable_hash_tags_are_unique`. Reassigned to 12241/12242. Repro (t_asyncgen_mod) now matches Node; yield-star / class-iterator / generator gap tests unchanged. --- crates/perry-hir/src/lower_decl/block.rs | 23 +++++++++++++++++++---- crates/perry-hir/src/stable_hash/expr.rs | 4 ++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/crates/perry-hir/src/lower_decl/block.rs b/crates/perry-hir/src/lower_decl/block.rs index 8e88d6fe0f..9bed95d402 100644 --- a/crates/perry-hir/src/lower_decl/block.rs +++ b/crates/perry-hir/src/lower_decl/block.rs @@ -184,13 +184,20 @@ pub fn lower_fn_body_block_stmt( // Phase 1: pre-define hoisted FnDecl locals so forward references in // any earlier statement resolve via `lookup_local`. Generator and - // async-generator FnDecls are excluded — those go through the - // hoist-to-top-level + FuncRef path in `lower_body_stmt` and aren't - // closure-bound at the source position. + // async-generator FnDecls ARE included: `lower_body_stmt` lowers them to + // a top-level function plus a source-position `Stmt::Let { init: FuncRef }` + // binding the name. Spec function-declaration hoisting still applies to + // generators, so a forward reference (`A.gen = gen` ABOVE the + // `function* gen(){}` in a webpack/ncc inner module — next/dist/compiled/ + // edge-runtime's `consumeUint8ArrayReadableStream`) must resolve. We + // pre-define the local here (so `lookup_local` succeeds at the forward + // reference) and Phase 3 moves the FuncRef `Let` to the front (so it is + // initialized before that reference runs). The FuncRef value is pure, so + // reordering it ahead of other statements is safe. let mut hoisted_id_set: HashSet = HashSet::new(); for stmt in &block.stmts { if let ast::Stmt::Decl(ast::Decl::Fn(fn_decl)) = stmt { - if fn_decl.function.body.is_none() || fn_decl.function.is_generator { + if fn_decl.function.body.is_none() { continue; } let name = fn_decl.ident.sym.to_string(); @@ -313,10 +320,18 @@ pub fn lower_fn_body_block_stmt( let mut hoisted_lets: Vec = Vec::new(); let mut other: Vec = Vec::new(); for s in body { + // A regular/async FnDecl lowers to a `Let { init: Closure }`; a + // generator/async-generator FnDecl lowers to a `Let { init: FuncRef }` + // (the body lives in a hoisted top-level function). Both forms are + // hoisted to the front per spec function-declaration semantics. let is_hoisted = matches!( &s, Stmt::Let { id, init: Some(Expr::Closure { .. }), .. } if hoisted_id_set.contains(id) + ) || matches!( + &s, + Stmt::Let { id, init: Some(Expr::FuncRef(_)), .. } + if hoisted_id_set.contains(id) ); if is_hoisted { hoisted_lets.push(s); diff --git a/crates/perry-hir/src/stable_hash/expr.rs b/crates/perry-hir/src/stable_hash/expr.rs index aa32ecf915..ab1fa2ab09 100644 --- a/crates/perry-hir/src/stable_hash/expr.rs +++ b/crates/perry-hir/src/stable_hash/expr.rs @@ -628,8 +628,8 @@ impl SH for Expr { Expr::TaggedTemplateStrings { site_id, cooked, raw } => { tag(h, 445); site_id.hash(h); cooked.hash(h); raw.hash(h); } Expr::TemplateRaw(e) => { tag(h, 446); e.as_ref().hash(h); } Expr::RegisterClassParentDynamic { class_name, parent_expr, } => { tag(h, 447); class_name.hash(h); parent_expr.as_ref().hash(h); } - Expr::RegisterClassCaptures { class_name, captures } => { tag(h, 12238); class_name.hash(h); for c in captures { c.hash(h); } } - Expr::ClassCaptureValue { class_name, index } => { tag(h, 12239); class_name.hash(h); index.hash(h); } + Expr::RegisterClassCaptures { class_name, captures } => { tag(h, 12241); class_name.hash(h); for c in captures { c.hash(h); } } + Expr::ClassCaptureValue { class_name, index } => { tag(h, 12242); class_name.hash(h); index.hash(h); } Expr::RegisterClassStaticSymbol { class_name, key_expr, value_expr, } => { tag(h, 12025); class_name.hash(h); key_expr.as_ref().hash(h); value_expr.as_ref().hash(h); } Expr::RegisterClassComputedMethod { class_name, key_expr, method_name, is_static, param_count, has_rest } => { tag(h, 12233); class_name.hash(h); key_expr.as_ref().hash(h); method_name.hash(h); is_static.hash(h); param_count.hash(h); has_rest.hash(h); } Expr::RegisterClassComputedAccessor { class_name, key_expr, getter_name, setter_name, is_static } => { tag(h, 12234); class_name.hash(h); key_expr.as_ref().hash(h); getter_name.hash(h); setter_name.hash(h); is_static.hash(h); } From 2a9caa12d817454921ddd0c6a1308519f7f8c994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 08:05:17 +0200 Subject: [PATCH 09/15] fix(resolve): append extension for dotted specifiers instead of replacing it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wall #27 (Next.js standalone): `startServer` crashed with a stack-overflow SIGSEGV from an accessor getter recursing on itself (`exports.chainStreams` → `self.chainStreams` → …). Root cause is module resolution, not the getter. Next.js's app-render dir holds both `stream-ops.js` and `stream-ops.web.js`, and `stream-ops.js` does `require("./stream-ops.web")`. `resolve_with_extensions` tried `Path::with_extension("js")` first, which REPLACES the last dotted segment: `stream-ops.web` → `stream-ops.js` — the requiring module itself. Resolving the require to its own file made the module self-require, so its SWC `_export` re-export getter (`chainStreams: () => _streamopsweb.chainStreams`, where `_streamopsweb` now aliases the module's own exports) returned `exports.chainStreams`, i.e. the getter itself, and any read of it recursed until the stack overflowed. Node resolution APPENDS the extension to the full specifier (`./stream-ops.web` → `./stream-ops.web.js`) and never strips a non-module dotted segment. The loop now tries the append form first and only falls back to `with_extension` (replace) when the specifier already ends in a recognized module extension — that branch exists solely for Perry's TS-over-JS preference (`./foo.js` pruned but `./foo.ts` present) and must never swap a `.web`-style filename segment. Two regression tests cover both directions. --- crates/perry/src/commands/compile/resolve.rs | 57 ++++++++++++++----- .../src/commands/compile/resolve/tests.rs | 35 ++++++++++++ 2 files changed, 78 insertions(+), 14 deletions(-) diff --git a/crates/perry/src/commands/compile/resolve.rs b/crates/perry/src/commands/compile/resolve.rs index f4900c7359..6cb9c04f0e 100644 --- a/crates/perry/src/commands/compile/resolve.rs +++ b/crates/perry/src/commands/compile/resolve.rs @@ -408,28 +408,57 @@ pub(super) fn resolve_with_extensions(base: &Path) -> Option { return Some(base.to_path_buf()); } - // Try with extensions in order of preference (TS before JS) + // Try with extensions in order of preference (TS before JS). + // + // Node module resolution APPENDS the extension to the full specifier + // (`./stream-ops.web` -> `./stream-ops.web.js`); it never strips a dotted + // segment that isn't a real module extension. `Path::with_extension` + // REPLACES the last `.foo` segment, so on `stream-ops.web` it produces + // `stream-ops.js` — which, in Next.js's app-render dir, is the *requiring* + // module itself (`stream-ops.js` requires `./stream-ops.web`). Returning it + // makes the module self-require and its re-export getters recurse forever + // (`exports.chainStreams` -> `self.chainStreams` -> ... stack overflow). + // + // So: always try the APPEND form first. Only fall back to the REPLACE form + // when the specifier already ends in a recognized module extension — that + // path exists purely for Perry's TS-over-JS preference (`./foo.js` whose + // `.js` was pruned but `./foo.ts` is present), never to swap an arbitrary + // filename segment like `.web`. + let base_ext_is_module = base + .extension() + .and_then(|e| e.to_str()) + .map(|e| { + matches!( + e, + "js" | "mjs" | "cjs" | "ts" | "tsx" | "mts" | "cts" | "json" | "node" + ) + }) + .unwrap_or(false); + let path_str = base.to_string_lossy().to_string(); for ext in all_extensions { - let with_ext = base.with_extension(ext.trim_start_matches('.')); - if with_ext.exists() && with_ext.is_file() { - return Some(with_ext); - } - - // Also try adding extension to full path (for paths like ./foo.js) - let path_str = base.to_string_lossy(); - let with_ext = PathBuf::from(format!("{}{}", path_str, ext)); - if with_ext.exists() && with_ext.is_file() { - // If we found a JS file, check for TS equivalent first + // APPEND: `./stream-ops.web` + `.js` -> `./stream-ops.web.js`. + let appended = PathBuf::from(format!("{}{}", path_str, ext)); + if appended.exists() && appended.is_file() { + // If we landed on a JS file, prefer a co-located TS source. if matches!(ext, ".js" | ".mjs" | ".cjs") { - let stem_str = path_str.to_string(); for ts_ext in ts_extensions { - let ts_path = PathBuf::from(format!("{}{}", stem_str, ts_ext)); + let ts_path = PathBuf::from(format!("{}{}", path_str, ts_ext)); if ts_path.exists() && ts_path.is_file() { return Some(ts_path); } } } - return Some(with_ext); + return Some(appended); + } + + // REPLACE: only safe when the specifier already carries a real module + // extension (e.g. `./foo.js` -> `./foo.ts`). Skipped for `.web`-style + // dotted filenames so we never resolve to a sibling module. + if base_ext_is_module { + let replaced = base.with_extension(ext.trim_start_matches('.')); + if replaced.exists() && replaced.is_file() { + return Some(replaced); + } } } diff --git a/crates/perry/src/commands/compile/resolve/tests.rs b/crates/perry/src/commands/compile/resolve/tests.rs index d96beda956..f627f1b24e 100644 --- a/crates/perry/src/commands/compile/resolve/tests.rs +++ b/crates/perry/src/commands/compile/resolve/tests.rs @@ -1327,6 +1327,41 @@ mod manifest_parse_tests { assert!(msg.contains("backends.vulkan.available"), "got: {msg}"); assert!(msg.contains("expected boolean"), "got: {msg}"); } + + #[test] + fn dotted_specifier_appends_not_replaces_extension() { + // Next.js app-render dir: `stream-ops.js` and `stream-ops.web.js` + // coexist, and `stream-ops.js` does `require("./stream-ops.web")`. + // `Path::with_extension("js")` REPLACES `.web` → `stream-ops.js` (the + // requiring file itself); resolving to it makes the module self-require + // and its re-export getters recurse forever. The resolver must APPEND: + // `./stream-ops.web` → `./stream-ops.web.js`. + let dir = tempfile::tempdir().expect("tempdir"); + let root = dir.path(); + std::fs::write(root.join("stream-ops.js"), "// requiring module\n").expect("write"); + std::fs::write(root.join("stream-ops.web.js"), "// the real target\n").expect("write"); + + let resolved = + resolve_with_extensions(&root.join("stream-ops.web")).expect("must resolve"); + assert_eq!( + resolved, + root.join("stream-ops.web.js"), + "must append `.js` to the full specifier, not strip `.web`" + ); + } + + #[test] + fn pruned_js_specifier_still_falls_back_to_ts_via_replace() { + // The REPLACE path is retained for Perry's TS-over-JS preference: a + // `require("./foo.js")` whose `.js` was pruned but whose `./foo.ts` + // source is present must still resolve to the TS file. + let dir = tempfile::tempdir().expect("tempdir"); + let root = dir.path(); + std::fs::write(root.join("foo.ts"), "export const x = 1;\n").expect("write"); + + let resolved = resolve_with_extensions(&root.join("foo.js")).expect("must resolve"); + assert_eq!(resolved, root.join("foo.ts")); + } } #[cfg(test)] From af92965ceb0ffb93bfd9244f91cf07143f89d628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 08:39:32 +0200 Subject: [PATCH 10/15] fix(codegen): apply entry-module process.env literals before eager module init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wall #28 (Next.js standalone): the server booted past the earlier walls but crashed with `TypeError: Cannot read properties of undefined (reading 'd')` from `react-dom-server-legacy.node.production.js`, reading `ReactDOMSharedInternals.d`. `react-dom/index.js` selects its bundle with `if (process.env.NODE_ENV === 'production') module.exports = require('./cjs/react-dom.production.js') else module.exports = require('./cjs/react-dom.development.js')`. Next.js's standalone entry sets `process.env.NODE_ENV = 'production'` on its first line, before any `require`. In Node that assignment is observed by every later `require`d module. Perry hoists `require`s to eager imports that init BEFORE the entry body runs, so react-dom saw the unmodified env, took the `else` branch, and `require`d the development bundle — which Next.js's file tracing prunes from a standalone build. The require returned an empty module, so `ReactDOM.__DOM_INTERNALS_…` (hence `ReactDOMSharedInternals`) was undefined and `.d` threw. Fix: scan the entry module's unconditional top-level `process.env. = ""` assignments (including one+ levels into the cjs-wrap IIFE where the wrapped entry's top-level statements live) and emit `js_setenv` for each in `main`, after the string pool init but before the module-init loop. This restores Node's observable ordering for the ubiquitous "set NODE_ENV then require" pattern without affecting runtime `process.env` writes (still lowered normally) or assignments nested in conditionals/functions (intentionally not hoisted, matching Node's lazy evaluation). --- crates/perry-codegen/src/codegen/entry.rs | 97 ++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/crates/perry-codegen/src/codegen/entry.rs b/crates/perry-codegen/src/codegen/entry.rs index cc6cc64850..7e98a01bc5 100644 --- a/crates/perry-codegen/src/codegen/entry.rs +++ b/crates/perry-codegen/src/codegen/entry.rs @@ -9,7 +9,7 @@ use crate::expr::FnCtx; use crate::module::LlModule; use crate::stmt; use crate::strings::StringPool; -use crate::types::{DOUBLE, I32, I8, PTR, VOID}; +use crate::types::{DOUBLE, I32, I64, I8, PTR, VOID}; use super::helpers::{ emit_namespace_populator, enable_module_init_shadow_frame, init_static_fields_early, @@ -18,6 +18,82 @@ use super::helpers::{ }; use super::opts::CrossModuleCtx; +/// Collect the entry module's top-level `process.env. = ""` +/// assignments so they can be applied to the OS environment BEFORE eager +/// module init (see the call site in `compile_module_entry`). +/// +/// Node runs the entry script top-to-bottom, so a `process.env.NODE_ENV = +/// 'production'` on line 1 is observed by every `require()`d dependency's +/// init. Perry hoists `require`s to eager imports that init before the entry +/// body runs, so without this the dependency observes the unmodified env — +/// e.g. `react-dom/index.js` branches on `process.env.NODE_ENV === 'production'` +/// to pick the production vs development bundle, and the development file is +/// pruned from a Next.js standalone build, so the wrong branch yields an empty +/// module and a downstream `ReactDOMSharedInternals.d` crash. +/// +/// Only *unconditional module-top-level* assignments are collected: the entry +/// init statements, plus one+ levels into a cjs-wrap IIFE (`_cjs = +/// (function(){ ... })()`), which is where the wrapped entry's top-level +/// statements live. Assignments nested in conditionals or inner functions are +/// deliberately skipped — those run conditionally/lazily, exactly as in Node. +fn collect_entry_env_literals(init: &[perry_hir::Stmt]) -> Vec<(String, String)> { + use perry_hir::{Expr, Stmt}; + + fn record(expr: &Expr, out: &mut Vec<(String, String)>) { + // `process.env.X = "lit"` lowers to either form depending on path. + if let Expr::PutValueSet { + target, key, value, .. + } = expr + { + if matches!(target.as_ref(), Expr::ProcessEnv) { + if let (Expr::String(k), Expr::String(v)) = (key.as_ref(), value.as_ref()) { + out.push((k.clone(), v.clone())); + } + } + } + if let Expr::PropertySet { + object, + property, + value, + } = expr + { + if matches!(object.as_ref(), Expr::ProcessEnv) { + if let Expr::String(v) = value.as_ref() { + out.push((property.clone(), v.clone())); + } + } + } + } + + fn descend_iife(expr: &Expr, out: &mut Vec<(String, String)>, depth: u32) { + if depth >= 4 { + return; + } + if let Expr::Call { callee, .. } = expr { + if let Expr::Closure { body, .. } = callee.as_ref() { + scan(body, out, depth + 1); + } + } + } + + fn scan(stmts: &[Stmt], out: &mut Vec<(String, String)>, depth: u32) { + for s in stmts { + match s { + Stmt::Expr(e) => { + record(e, out); + descend_iife(e, out, depth); + } + Stmt::Let { init: Some(e), .. } => descend_iife(e, out, depth), + _ => {} + } + } + } + + let mut out = Vec::new(); + scan(init, &mut out, 0); + out +} + /// Emit the module's entry function. /// /// For the **entry module**: emits `int main()` that bootstraps GC, runs @@ -203,6 +279,25 @@ pub(super) fn compile_module_entry( let blk = main.block_mut(0).unwrap(); // Entry module's own string pool first. blk.call_void(&strings_init_name, &[]); + // Apply the entry module's top-level `process.env. = + // ""` assignments NOW — after the string pool is live but + // BEFORE any dependency's `__init` runs — so eager-inited deps that + // branch on `process.env` at init time observe what the entry sets, + // matching Node's require-is-lazy ordering. See + // `collect_entry_env_literals`. The "NODE_ENV"/"production" string + // handles are interned here and populated by the strings-init call + // above (the entry body also references them, so they share slots). + for (name, value) in collect_entry_env_literals(&hir.init) { + let name_idx = strings.intern(&name); + let value_idx = strings.intern(&value); + let name_global = format!("@{}", strings.entry(name_idx).handle_global); + let value_global = format!("@{}", strings.entry(value_idx).handle_global); + let name_box = blk.load(DOUBLE, &name_global); + let name_bits = blk.bitcast_double_to_i64(&name_box); + let name_handle = blk.and(I64, &name_bits, crate::nanbox::POINTER_MASK_I64); + let value_box = blk.load(DOUBLE, &value_global); + blk.call_void("js_setenv", &[(I64, &name_handle), (DOUBLE, &value_box)]); + } // Then every non-entry module's init in order. Each // non-entry module's `__init` runs its own string // pool init internally before its top-level statements. From c1a940db7459a0f70475d5dfdf1cd5be3b47e30a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 09:27:18 +0200 Subject: [PATCH 11/15] fix(runtime): native function `name` is non-enumerable (for-in / copyProps) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wall #29 (Next.js standalone): boot crashed with `TypeError: Cannot assign to read only property 'name' of object '#'` from `next/dist/compiled/jsonwebtoken` (its bundled safe-buffer). safe-buffer does `function copyProps(e,r){for(var t in e){r[t]=e[t]}}` and calls `copyProps(Buffer, SafeBuffer)`. In Node `for (k in Buffer)` skips `name` and `length` (functions' intrinsic props are non-enumerable), so nothing is copied onto them. Perry stored a native constructor's `name` via `closure_set_dynamic_prop`, which defaults to ENUMERABLE — so `for (k in Buffer)` yielded "name", and `copyProps` assigned `SafeBuffer.name = "Buffer"`. SafeBuffer is a function whose own `name` is read-only, so the strict-mode write threw. (`getOwnPropertyDescriptor(Buffer,'name').enumerable` already reported false via the function-name special case, so enumeration disagreed with reflection.) Fix: `set_bound_native_closure_name` now pins the spec descriptor `{ writable:false, enumerable:false, configurable:true }` on `name`, so the for-in / enumerable-keys path skips it, matching Node and reflection. --- crates/perry-runtime/src/object/native_module.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crates/perry-runtime/src/object/native_module.rs b/crates/perry-runtime/src/object/native_module.rs index 4637bc660a..718ea16bb8 100644 --- a/crates/perry-runtime/src/object/native_module.rs +++ b/crates/perry-runtime/src/object/native_module.rs @@ -4814,6 +4814,21 @@ pub(crate) fn set_bound_native_closure_name( let ptr = crate::string::js_string_from_bytes(name.as_ptr(), name.len() as u32); let name_value = f64::from_bits(JSValue::string_ptr(ptr).bits()); crate::closure::closure_set_dynamic_prop(closure as usize, "name", name_value); + // Spec: a function's `name` property is { writable:false, enumerable:false, + // configurable:true }. Storing it as a plain dynamic prop left it ENUMERABLE + // by default, so `for (k in Buffer)` yielded "name" — even though + // `getOwnPropertyDescriptor(Buffer,'name').enumerable` correctly reported + // false via the function-name special case. The inconsistency broke + // safe-buffer's `copyProps(Buffer, SafeBuffer)` (`for (k in Buffer) + // SafeBuffer[k] = Buffer[k]`): it copied "name" onto SafeBuffer, whose own + // `name` is read-only, throwing `Cannot assign to read only property 'name'` + // in strict mode (jsonwebtoken → Next.js). Pin the proper descriptor so + // enumeration matches reflection. + crate::object::set_property_attrs( + closure as usize, + "name".to_string(), + crate::object::PropertyAttrs::new(false, false, true), + ); } thread_local! { From 3a850eca0348c6fc5de065cd5501b6e27931aa1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 11:49:25 +0200 Subject: [PATCH 12/15] =?UTF-8?q?feat(compile):=20lazy=20function-local=20?= =?UTF-8?q?require=20=E2=80=94=20defer=20init=20like=20Node=20(Next.js)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perry eager-initialized every `require()`d module at program start, including ones Node only loads when an enclosing function actually runs. Next.js's server bundle `require()`s heavy dependencies inside request handlers (jsonwebtoken in `api-resolver.js`/`try-get-preview-data.js`), so at boot Perry dragged their whole module bodies — and Perry's gaps in them — into the startup path, while Node never touches them until a request hits that code. This is what produced the jsonwebtoken boot crashes (safe-buffer `for-in`/copyProps, native Stream as `util.inherits` base). Fix: a `require('S')` whose every call site is inside a function body is now routed through the existing Deferred-module mechanism (the same compile-but- lazy-init path dynamic `import()` uses). S still compiles and links; it just isn't eager-initialized — its `__init` fires when the require shim's binding is actually read, i.e. when the function calls `require('S')`, matching Node's synchronous-require semantics (including circular-require partial exports via the idempotent init guard). Mechanism: - `cjs_wrap`: `function_local_specs` (one masked-source scope-tracking pass) classifies which require specifiers are purely function-local. The wrap skips alias adoption for them (a function-scoped `const x = require()` is not a module binding) and names their import binding `_lazyreq_N`. - `collect_modules`: tags `Import.is_deferred_require` (new HIR field) for `_lazyreq_`-named Perry-compiled imports. - init classification: `classify_eager_modules`, `topo_sort_non_entry_modules`, and per-module `module_init_deps` all skip `is_deferred_require` edges, so the target stays `Deferred` unless some genuine top-level edge reaches it. - codegen: reading a `_lazyreq_` binding emits an idempotent `__init()` before the cross-module value read. Self-correcting by design: because the shim triggers init on the real `require()` call, an over-eager classification never breaks correctness — it only affects eager-init-loop membership. Verified end-to-end: a function-local require of a side-effecting module no longer initializes at boot (matches Node) yet still initializes, in order, when the function runs. --- .../perry-codegen/src/expr/dyn_extern_i18n.rs | 12 ++ crates/perry-hir/src/ir/decl.rs | 8 + crates/perry-hir/src/lower/module_decl.rs | 2 +- crates/perry-hir/src/stable_hash/module.rs | 2 + crates/perry-hir/src/stable_hash/tests.rs | 2 +- crates/perry-transform/src/inline/mod.rs | 2 +- crates/perry/src/commands/compile.rs | 5 +- .../perry/src/commands/compile/bootstrap.rs | 2 +- .../src/commands/compile/cjs_wrap/detect.rs | 2 +- .../compile/cjs_wrap/extract_requires.rs | 153 ++++++++++++++++++ .../src/commands/compile/cjs_wrap/mod.rs | 2 +- .../src/commands/compile/cjs_wrap/wrap.rs | 25 +++ .../src/commands/compile/collect_modules.rs | 33 ++++ .../perry/src/commands/compile/init_order.rs | 7 +- 14 files changed, 248 insertions(+), 9 deletions(-) diff --git a/crates/perry-codegen/src/expr/dyn_extern_i18n.rs b/crates/perry-codegen/src/expr/dyn_extern_i18n.rs index 4618c6a5fb..ca9dbd4af9 100644 --- a/crates/perry-codegen/src/expr/dyn_extern_i18n.rs +++ b/crates/perry-codegen/src/expr/dyn_extern_i18n.rs @@ -391,6 +391,18 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { )); } if let Some(source_prefix) = ctx.import_function_prefixes.get(name).cloned() { + // Next.js lazy-require: a `_lazyreq_N` binding is the CJS require + // shim's handle to a FUNCTION-LOCAL `require('S')`. S is + // `Deferred` (never eager-initialized), so before reading its + // default-export getter, fire `__init()` — idempotent, so + // re-reads cost a guard check. This is the moment Node would run + // S's module body: when `require('S')` is actually called. + if name.starts_with("_lazyreq_") { + let init_fn = format!("{}__init", source_prefix); + ctx.pending_declares + .push((init_fn.clone(), crate::types::VOID, vec![])); + ctx.block().call_void(&init_fn, &[]); + } // Issue #678 followup: a V8-fallback import used as a value // (rather than called directly) has no native singleton // wrapper to point at — the `__perry_wrap_extern_*` for V8 diff --git a/crates/perry-hir/src/ir/decl.rs b/crates/perry-hir/src/ir/decl.rs index 9e97b729ec..be690e1de3 100644 --- a/crates/perry-hir/src/ir/decl.rs +++ b/crates/perry-hir/src/ir/decl.rs @@ -116,6 +116,14 @@ pub struct Import { /// Always `false` on `is_dynamic` synthetic edges (those are already /// dynamic targets by virtue of `is_dynamic`). pub is_dynamic_target: bool, + /// Next.js lazy-require: this `import _req_N from 'S'` was synthesized by + /// the CJS→ESM wrap from a `require('S')` whose every call site is inside a + /// FUNCTION body (never module top-level). Node loads such a module lazily + /// — only when the enclosing function runs — so it must NOT pin the target + /// eager. Like `is_dynamic`, the target still enters the compile graph but + /// is left `Deferred` unless some other (top-level) edge reaches it; the + /// require shim triggers the target's `__init` on first `require()` call. + pub is_deferred_require: bool, } /// Import specifier diff --git a/crates/perry-hir/src/lower/module_decl.rs b/crates/perry-hir/src/lower/module_decl.rs index bea448887c..2cbb139bb2 100644 --- a/crates/perry-hir/src/lower/module_decl.rs +++ b/crates/perry-hir/src/lower/module_decl.rs @@ -458,7 +458,7 @@ pub(crate) fn lower_module_decl( type_only: whole_decl_type_only, is_dynamic: false, is_dynamic_target: false, - }); + is_deferred_require: false, }); } ast::ModuleDecl::ExportDecl(export) => { match &export.decl { diff --git a/crates/perry-hir/src/stable_hash/module.rs b/crates/perry-hir/src/stable_hash/module.rs index 08b90b8b5c..32e17a8644 100644 --- a/crates/perry-hir/src/stable_hash/module.rs +++ b/crates/perry-hir/src/stable_hash/module.rs @@ -130,6 +130,7 @@ impl SH for Import { type_only, is_dynamic, is_dynamic_target, + is_deferred_require, } = self; source.hash(h); specifiers.hash(h); @@ -139,6 +140,7 @@ impl SH for Import { type_only.hash(h); is_dynamic.hash(h); is_dynamic_target.hash(h); + is_deferred_require.hash(h); } } diff --git a/crates/perry-hir/src/stable_hash/tests.rs b/crates/perry-hir/src/stable_hash/tests.rs index 9c17904db1..5dc7d9fa5a 100644 --- a/crates/perry-hir/src/stable_hash/tests.rs +++ b/crates/perry-hir/src/stable_hash/tests.rs @@ -251,7 +251,7 @@ fn module_metadata_affects_hash() { type_only: false, is_dynamic: false, is_dynamic_target: false, - }); + is_deferred_require: false, }); assert_ne!(base_hash, hash_module(&m_imp)); // Add a class diff --git a/crates/perry-transform/src/inline/mod.rs b/crates/perry-transform/src/inline/mod.rs index 85d8f13a94..148d7b2849 100644 --- a/crates/perry-transform/src/inline/mod.rs +++ b/crates/perry-transform/src/inline/mod.rs @@ -493,7 +493,7 @@ pub fn inline_functions( type_only: false, is_dynamic: false, is_dynamic_target: false, - }); + is_deferred_require: false, }); } } } diff --git a/crates/perry/src/commands/compile.rs b/crates/perry/src/commands/compile.rs index 40871f099d..f503319387 100644 --- a/crates/perry/src/commands/compile.rs +++ b/crates/perry/src/commands/compile.rs @@ -2207,7 +2207,10 @@ pub fn run_with_parse_cache( } }; for import in &hir_module.imports { - if import.is_dynamic || import.type_only { + // `is_deferred_require`: a function-local `require('S')` + // (lazy in Node). S must NOT chain into this module's init + // — it inits only when the require shim is actually called. + if import.is_dynamic || import.type_only || import.is_deferred_require { continue; } if let Some(resolved) = &import.resolved_path { diff --git a/crates/perry/src/commands/compile/bootstrap.rs b/crates/perry/src/commands/compile/bootstrap.rs index e108d0b301..9d78903586 100644 --- a/crates/perry/src/commands/compile/bootstrap.rs +++ b/crates/perry/src/commands/compile/bootstrap.rs @@ -505,7 +505,7 @@ mod js_runtime_gate_tests { type_only: false, is_dynamic: false, is_dynamic_target: false, - }); + is_deferred_require: false, }); let mut package = empty_module("pkg"); package.exports.push(perry_hir::Export::Named { diff --git a/crates/perry/src/commands/compile/cjs_wrap/detect.rs b/crates/perry/src/commands/compile/cjs_wrap/detect.rs index b568a30c3c..595ae14694 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/detect.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/detect.rs @@ -64,7 +64,7 @@ pub(in crate::commands::compile) fn is_commonjs(source: &str) -> bool { /// `/['"]/` in vendored minified bundles like comment-json) would otherwise /// desync the string state and mask the rest of the file, hiding a trailing /// `module.exports = …`. -fn strip_comments_and_strings(source: &str) -> String { +pub(crate) fn strip_comments_and_strings(source: &str) -> String { #[derive(Clone, Copy, PartialEq, Eq)] enum State { Code, diff --git a/crates/perry/src/commands/compile/cjs_wrap/extract_requires.rs b/crates/perry/src/commands/compile/cjs_wrap/extract_requires.rs index 73c7a93540..3bd4f72592 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/extract_requires.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/extract_requires.rs @@ -179,3 +179,156 @@ pub fn identifier_is_reassigned(source: &str, name: &str) -> bool { } false } + +/// Next.js lazy-require classification (single forward pass). Returns the set +/// of specifiers whose EVERY `require('')` call site is lexically inside +/// a FUNCTION body — never at module top level, and never inside a top-level +/// control-flow block that runs at module load. Node loads such a module +/// lazily (only when the enclosing function runs), so Perry must not eager-init +/// it. +/// +/// Conservative by construction: a spec with any top-level call site (including +/// top-level `if`/`for`/`try` blocks, which execute during module evaluation) +/// is excluded and keeps the default eager behavior. A misclassification is +/// self-correcting at runtime — the require shim triggers the target's init +/// when `require()` is actually called — so this only governs eager-init-loop +/// membership. +/// +/// Brace/paren scanning runs on a comment/string/regex-masked copy (same +/// length, code structure preserved) so literal braces never corrupt the scope +/// stack. Call-site offsets + specifiers come from the original source. +pub fn function_local_specs(source: &str) -> std::collections::HashSet { + use std::collections::{HashMap, HashSet}; + + // (offset, spec) for every static `require('')` call, in source order. + let re = regex::Regex::new(r#"require\s*\(\s*['"]([^'"]+)['"]\s*\)"#).unwrap(); + let sbytes = source.as_bytes(); + let mut sites: Vec<(usize, &str)> = Vec::new(); + for cap in re.captures_iter(source) { + let m0 = cap.get(0).unwrap(); + // Skip member-access matches (`foo.require('x')`). + let mut p = m0.start(); + while p > 0 && (sbytes[p - 1] as char).is_whitespace() { + p -= 1; + } + if p > 0 && sbytes[p - 1] == b'.' { + continue; + } + sites.push((m0.start(), cap.get(1).unwrap().as_str())); + } + if sites.is_empty() { + return HashSet::new(); + } + + let masked = super::detect::strip_comments_and_strings(source); + let mbytes = masked.as_bytes(); + let is_ident = |c: u8| c == b'_' || c == b'$' || c.is_ascii_alphanumeric(); + let control_keywords = ["if", "for", "while", "switch", "catch", "with", "else"]; + + #[derive(PartialEq)] + enum Scope { + Function, + Block, + } + let mut scopes: Vec = Vec::new(); + // spec → (seen any site, all sites so far in-function). + let mut state: HashMap<&str, (bool, bool)> = HashMap::new(); + let mut next_site = 0usize; + let in_function = |scopes: &[Scope]| scopes.iter().any(|s| *s == Scope::Function); + + let mut i = 0usize; + while i < mbytes.len() { + // Record any require site at this offset before processing the char. + while next_site < sites.len() && sites[next_site].0 == i { + let (_, spec) = sites[next_site]; + let here = in_function(&scopes); + let e = state.entry(spec).or_insert((false, true)); + e.0 = true; + e.1 = e.1 && here; + next_site += 1; + } + match mbytes[i] { + b'{' => { + let mut p = i; + while p > 0 && (mbytes[p - 1] as char).is_whitespace() { + p -= 1; + } + let kind = if p >= 2 && &masked[p - 2..p] == "=>" { + Scope::Function + } else if p > 0 && mbytes[p - 1] == b')' { + let head = matched_open_head(&masked, mbytes, p - 1, &is_ident); + if control_keywords.iter().any(|k| *k == head) { + Scope::Block + } else { + // `function f(...) {`, method `m(...) {`, arrow + // `(...) => {` (caught above), IIFE `(...)(...) {`… + Scope::Function + } + } else { + Scope::Block + }; + scopes.push(kind); + } + b'}' => { + scopes.pop(); + } + _ => {} + } + i += 1; + } + // Any sites at EOF offset (defensive). + while next_site < sites.len() { + let (_, spec) = sites[next_site]; + let e = state.entry(spec).or_insert((false, true)); + e.0 = true; + e.1 = e.1 && in_function(&scopes); + next_site += 1; + } + + state + .into_iter() + .filter_map(|(spec, (seen, all_in_fn))| { + if seen && all_in_fn { + Some(spec.to_string()) + } else { + None + } + }) + .collect() +} + +/// Given the index of a `)` in the masked source, walk back to its matching +/// `(` and return the identifier/keyword immediately before that `(`. +fn matched_open_head( + masked: &str, + mbytes: &[u8], + close_paren: usize, + is_ident: &impl Fn(u8) -> bool, +) -> String { + let mut depth = 0i32; + let mut i = close_paren; + loop { + match mbytes[i] { + b')' => depth += 1, + b'(' => { + depth -= 1; + if depth == 0 { + let mut p = i; + while p > 0 && (mbytes[p - 1] as char).is_whitespace() { + p -= 1; + } + let end = p; + while p > 0 && is_ident(mbytes[p - 1]) { + p -= 1; + } + return masked[p..end].to_string(); + } + } + _ => {} + } + if i == 0 { + return String::new(); + } + i -= 1; + } +} diff --git a/crates/perry/src/commands/compile/cjs_wrap/mod.rs b/crates/perry/src/commands/compile/cjs_wrap/mod.rs index 25b30b8909..c21a111676 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/mod.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/mod.rs @@ -50,7 +50,7 @@ pub(self) use extract_exports::{ }; pub(self) use extract_requires::{ extract_export_star_specs, extract_require_aliases_with_ranges, extract_require_specifiers, - identifier_is_reassigned, + function_local_specs, identifier_is_reassigned, }; pub(self) use hoist_classes::{ extract_top_level_class_decls, rewrite_module_exports_class_expression, diff --git a/crates/perry/src/commands/compile/cjs_wrap/wrap.rs b/crates/perry/src/commands/compile/cjs_wrap/wrap.rs index 95947c7af0..10b1c47fb2 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/wrap.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/wrap.rs @@ -118,6 +118,16 @@ pub(in crate::commands::compile) fn wrap_commonjs_for_target( } true }; + // Next.js lazy-require: specifiers whose every `require('S')` call site is + // inside a function body (lazy in Node). Computed up front because it also + // suppresses alias ADOPTION below — a function-local `const dep = + // require('S')` is a function-scoped const, not a module binding, and + // adopting it would hoist `import dep from 'S'` to module scope (eager). We + // instead keep the synthetic binding and rename it `_lazyreq_N` so the + // target stays `Deferred` and inits only when the shim's + // `return _lazyreq_N` runs (i.e. when the function actually calls require). + let lazy_specs = function_local_specs(source); + let mut import_local_names: Vec = require_specs .iter() .enumerate() @@ -129,6 +139,10 @@ pub(in crate::commands::compile) fn wrap_commonjs_for_target( if !alias_is_safe(alias) { continue; } + if lazy_specs.contains(spec) { + // Don't adopt a function-local alias — keep it lazy (see above). + continue; + } if import_local_names.iter().any(|n| n == alias) { continue; } @@ -142,6 +156,17 @@ pub(in crate::commands::compile) fn wrap_commonjs_for_target( chosen_alias_per_spec.insert(spec.clone()); } + // Rename the surviving synthetic bindings for function-local specs so + // `collect_modules` can tag the import `is_deferred_require` by name and + // codegen can fire `__init()` at the shim read site. + if !lazy_specs.is_empty() { + for (i, spec) in require_specs.iter().enumerate() { + if import_local_names[i] == format!("_req_{i}") && lazy_specs.contains(spec) { + import_local_names[i] = format!("_lazyreq_{i}"); + } + } + } + // #1721: ranges of `const = require()` lines whose alias we // ADOPTED as the import local name above (`import_local_names[idx] == alias`). // The synthetic `require` returns that name, and the hoisted `import ` diff --git a/crates/perry/src/commands/compile/collect_modules.rs b/crates/perry/src/commands/compile/collect_modules.rs index e62adfa290..e5f1737c55 100644 --- a/crates/perry/src/commands/compile/collect_modules.rs +++ b/crates/perry/src/commands/compile/collect_modules.rs @@ -1089,6 +1089,7 @@ fn collect_module_one( type_only: false, is_dynamic: true, is_dynamic_target: false, + is_deferred_require: false, }); } @@ -1549,6 +1550,38 @@ fn collect_module_one( } } + // Next.js lazy-require: the CJS→ESM wrap names a binding `_lazyreq_N` when + // every `require('S')` call site is inside a function body (lazy in Node). + // Tag the import so `classify_eager_modules` leaves the target Deferred — + // matching Node, which only loads such a module when the enclosing function + // runs (e.g. jsonwebtoken, required only inside Next.js's request handlers). + // The require shim triggers the target's `__init` on first `require()`, so + // an over-eager classification is self-correcting at runtime. Limited to + // Perry-compiled (`NativeCompiled`) targets — native stdlib / V8 modules + // have their own init paths. + if was_cjs_wrapped { + for import in &mut hir_module.imports { + if import.type_only + || import.is_dynamic + || import.is_native + || import.module_kind != perry_hir::ModuleKind::NativeCompiled + { + continue; + } + let is_lazy = import.specifiers.iter().any(|s| { + let local = match s { + perry_hir::ImportSpecifier::Default { local } => local, + perry_hir::ImportSpecifier::Namespace { local } => local, + perry_hir::ImportSpecifier::Named { local, .. } => local, + }; + local.starts_with("_lazyreq_") + }); + if is_lazy { + import.is_deferred_require = true; + } + } + } + // Process re-exports for export in &hir_module.exports { let source = match export { diff --git a/crates/perry/src/commands/compile/init_order.rs b/crates/perry/src/commands/compile/init_order.rs index ad37c46184..106544e94a 100644 --- a/crates/perry/src/commands/compile/init_order.rs +++ b/crates/perry/src/commands/compile/init_order.rs @@ -48,7 +48,7 @@ pub(super) fn classify_eager_modules(ctx: &mut CompilationContext, entry_path: & let static_targets: Vec = module .imports .iter() - .filter(|i| !i.is_dynamic && !i.type_only) + .filter(|i| !i.is_dynamic && !i.type_only && !i.is_deferred_require) .filter_map(|i| i.resolved_path.as_ref().map(PathBuf::from)) .collect(); let reexport_sources: Vec = module @@ -136,7 +136,10 @@ pub(super) fn topo_sort_non_entry_modules( // (transitively reached via the same phony edge chain), // so tracer's top-level `Context.Reference()(...)` ran // against an uninitialized Context global and threw. - if import.type_only { + // `is_deferred_require`: a function-local `require('S')` is not an + // init-order edge — S inits lazily when the require shim runs, not + // as part of this module's eager init. + if import.type_only || import.is_deferred_require { continue; } if let Some(ref resolved) = import.resolved_path { From 3c5f8e0288495e6c4c3f70bae4ca400093a514e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 21:53:53 +0200 Subject: [PATCH 13/15] fix(runtime,compile): address CodeRabbit review on #5050 - class_registry: dynamic Request/Response subclasses resolved via the global-builtin branch now record their fetch-parent kind (the early return previously skipped the bookkeeping, so new X() lost the native handle). - class_registry: gate the extends-native-export exemption on the native-module metadata (bound_native_callable_module_and_method) instead of the raw BOUND_METHOD_FUNC_PTR sentinel, so reified Function.prototype.{bind,call,apply} values no longer skip the not-a-constructor TypeError. - global_this: super-parent callability test now requires a closure or a class object, not just a non-zero class id, so a stale non-callable slot falls back to parent_closure_in_chain instead of dispatching on a non-function. - event_target: class X extends CustomEvent initializes as a CustomEvent (constructor + detail), not always Event (new is_custom flag threaded through js_event_subclass_init). - collect_modules: gate the unresolved-Worker warning on OutputFormat::Text. --- .../perry-codegen/src/expr/this_super_call.rs | 6 +++++ .../src/runtime_decls/strings_part2.rs | 6 ++++- crates/perry-runtime/src/event_target.rs | 14 ++++++++-- .../src/object/class_registry.rs | 27 +++++++++++++------ .../perry-runtime/src/object/global_this.rs | 10 ++++--- .../src/commands/compile/collect_modules.rs | 12 +++++---- 6 files changed, 56 insertions(+), 19 deletions(-) diff --git a/crates/perry-codegen/src/expr/this_super_call.rs b/crates/perry-codegen/src/expr/this_super_call.rs index 55ca945f06..714ff6eb7f 100644 --- a/crates/perry-codegen/src/expr/this_super_call.rs +++ b/crates/perry-codegen/src/expr/this_super_call.rs @@ -447,6 +447,11 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { None => undef.clone(), }; let argc = super_args.len().min(2).to_string(); + // `extends CustomEvent` → initialize `constructor` + + // `detail` as a CustomEvent, not a plain Event. + let is_custom = + if parent_name.as_str() == "CustomEvent" { "1" } else { "0" } + .to_string(); ctx.block().call( DOUBLE, "js_event_subclass_init", @@ -455,6 +460,7 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { (DOUBLE, &arg0), (DOUBLE, &arg1), (I32, &argc), + (I32, &is_custom), ], ); let current_class_name = diff --git a/crates/perry-codegen/src/runtime_decls/strings_part2.rs b/crates/perry-codegen/src/runtime_decls/strings_part2.rs index 9b2df4345c..b9d072edd8 100644 --- a/crates/perry-codegen/src/runtime_decls/strings_part2.rs +++ b/crates/perry-codegen/src/runtime_decls/strings_part2.rs @@ -1193,7 +1193,11 @@ pub(crate) fn declare_phase_b_strings_part2(module: &mut LlModule) { module.declare_function("js_event_new", I64, &[DOUBLE, DOUBLE, I32]); // `super(type, options)` from `class X extends Event/CustomEvent` — // initializes Event fields onto the existing subclass `this`. - module.declare_function("js_event_subclass_init", DOUBLE, &[DOUBLE, DOUBLE, DOUBLE, I32]); + module.declare_function( + "js_event_subclass_init", + DOUBLE, + &[DOUBLE, DOUBLE, DOUBLE, I32, I32], + ); module.declare_function("js_custom_event_new", I64, &[DOUBLE, DOUBLE, I32]); module.declare_function("js_dom_exception_new", I64, &[DOUBLE, DOUBLE]); module.declare_function("js_event_target_add_event_listener", VOID, &[I64, I64, I64]); diff --git a/crates/perry-runtime/src/event_target.rs b/crates/perry-runtime/src/event_target.rs index cf57d60fd6..567f150330 100644 --- a/crates/perry-runtime/src/event_target.rs +++ b/crates/perry-runtime/src/event_target.rs @@ -292,6 +292,7 @@ pub extern "C" fn js_event_subclass_init( type_value: f64, options: f64, argc: u32, + is_custom: u32, ) -> f64 { let Some(event) = value_as_ptr::(this_value) else { return undefined_value(); @@ -299,14 +300,23 @@ pub extern "C" fn js_event_subclass_init( if argc == 0 { throw_missing_arg("type"); } - init_event_fields(event, type_value, options, b"Event", None); + // `class X extends CustomEvent` must initialize as a CustomEvent: the + // `constructor` field resolves to the CustomEvent global and `detail` is + // read off the options bag (mirroring the direct `new CustomEvent(...)` + // path). Plain `extends Event` keeps `b"Event"` and no `detail`. + if is_custom != 0 { + let detail = unsafe { option_detail(options) }; + init_event_fields(event, type_value, options, b"CustomEvent", Some(detail)); + } else { + init_event_fields(event, type_value, options, b"Event", None); + } undefined_value() } /// Keepalive anchor for the auto-optimize whole-program build — /// `js_event_subclass_init` is a generated-code-only callee. #[used] -static KEEP_JS_EVENT_SUBCLASS_INIT: extern "C" fn(f64, f64, f64, u32) -> f64 = +static KEEP_JS_EVENT_SUBCLASS_INIT: extern "C" fn(f64, f64, f64, u32, u32) -> f64 = js_event_subclass_init; fn is_event_instance(event: *const ObjectHeader) -> bool { diff --git a/crates/perry-runtime/src/object/class_registry.rs b/crates/perry-runtime/src/object/class_registry.rs index 21858e5d65..06763e1b2a 100644 --- a/crates/perry-runtime/src/object/class_registry.rs +++ b/crates/perry-runtime/src/object/class_registry.rs @@ -44,15 +44,17 @@ fn is_non_constructable_builtin_function_value(value: f64) -> bool { /// property read produces). These represent real Node classes/functions and /// must be accepted as `extends` targets. fn is_bound_native_method_closure_value(value: f64) -> bool { - use crate::value::JSValue; - let jv = JSValue::from_bits(value.to_bits()); - if !jv.is_pointer() { - return false; + // Gate on the native-module metadata, not the raw BOUND_METHOD_FUNC_PTR + // trampoline: reified `Function.prototype.{bind,call,apply}` values + // (`reify_function_method_value`) share that trampoline but are NOT native + // constructors, so matching the sentinel alone would let `class X extends + // obj.method {}` skip the spec-required TypeError and silently stay + // parentless. A real native-module export carries a non-empty module name. + unsafe { + super::native_module::bound_native_callable_module_and_method(value) + .map(|(module, _)| !module.is_empty()) + .unwrap_or(false) } - let raw_ptr = jv.as_pointer::(); - let closure_ptr = crate::closure::clean_closure_ptr(raw_ptr); - let func_ptr = crate::closure::get_valid_func_ptr(closure_ptr); - !func_ptr.is_null() && func_ptr == crate::closure::BOUND_METHOD_FUNC_PTR } fn throw_non_constructable_builtin_function() -> ! { @@ -4352,6 +4354,15 @@ pub extern "C" fn js_register_class_parent_dynamic(class_id: u32, parent_value: if parent_cid != 0 && parent_cid != class_id { register_class(class_id, parent_cid); } + // A dynamic subclass that resolves its parent through this builtin + // branch must still record the fetch-parent kind so `new X()` attaches + // the native Request/Response handle — the bookkeeping below this + // early return would otherwise be skipped. + match name { + "Request" => super::register_fetch_parent_kind(class_id, 1), + "Response" => super::register_fetch_parent_kind(class_id, 2), + _ => {} + } return; } // A bound native-module export (`const { Writable } = require('stream'); diff --git a/crates/perry-runtime/src/object/global_this.rs b/crates/perry-runtime/src/object/global_this.rs index 1b60da846d..017f58a73b 100644 --- a/crates/perry-runtime/src/object/global_this.rs +++ b/crates/perry-runtime/src/object/global_this.rs @@ -479,10 +479,14 @@ pub unsafe extern "C" fn js_fetch_or_value_super( const PTR_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; let usable = if bits & TAG_MASK == POINTER_TAG { let p = (bits & PTR_MASK) as usize; + // A real callability test: a closure, or a per-evaluation class + // OBJECT (constructor). The prior `class_id != 0` accepted any + // pointer-tagged object with a class id — including non-callable + // instances — so a stale captured slot holding one of those + // skipped the `parent_closure_in_chain` recovery below and + // dispatched `js_native_call_value` on a non-function. crate::closure::is_closure_ptr(p) - || crate::object::js_object_get_class_id( - p as *const crate::object::ObjectHeader, - ) != 0 + || super::class_registry::is_class_object_ptr(p as *const u8) } else { // INT32-tagged ClassRefs route through the static super paths // before reaching here; anything else (undefined / a stale diff --git a/crates/perry/src/commands/compile/collect_modules.rs b/crates/perry/src/commands/compile/collect_modules.rs index e5f1737c55..2ece54abe7 100644 --- a/crates/perry/src/commands/compile/collect_modules.rs +++ b/crates/perry/src/commands/compile/collect_modules.rs @@ -1018,11 +1018,13 @@ fn collect_module_one( // lower this WorkerNew to a runtime throw instead of // failing the whole compile. Push an empty set to keep // the fill pass aligned with resolved siblings. - eprintln!( - " Warning: worker_threads Worker in module {}: {} — \ - this Worker will throw if constructed at runtime", - module_name, reason - ); + if matches!(format, OutputFormat::Text) { + eprintln!( + " Warning: worker_threads Worker in module {}: {} — \ + this Worker will throw if constructed at runtime", + module_name, reason + ); + } worker_path_sets.push(Vec::new()); } } From b929e101bb17ba75b934da21a1517fb4e40d317e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 21:55:21 +0200 Subject: [PATCH 14/15] style: rustfmt pass over #5050 walls + review-fix files --- .../perry-codegen/src/expr/this_super_call.rs | 17 ++++++++++------ crates/perry-codegen/src/lower_call/new.rs | 4 +++- .../src/runtime_decls/strings.rs | 6 +----- crates/perry-hir/src/ir/expr.rs | 5 ++++- crates/perry-hir/src/lower/module_decl.rs | 3 ++- crates/perry-hir/src/lower_decl/block.rs | 4 +--- .../src/lower_decl/class_captures.rs | 20 +++++++++---------- crates/perry-hir/src/stable_hash/tests.rs | 3 ++- .../src/object/class_constructors.rs | 5 ++++- .../src/object/class_registry.rs | 5 ++++- .../src/object/reflect_support.rs | 3 +-- crates/perry-transform/src/inline/mod.rs | 3 ++- .../perry/src/commands/compile/bootstrap.rs | 3 ++- .../src/commands/compile/cjs_wrap/detect.rs | 5 +---- .../src/commands/compile/cjs_wrap/mod.rs | 15 +++++++++++--- .../src/commands/compile/resolve/tests.rs | 3 +-- 16 files changed, 61 insertions(+), 43 deletions(-) diff --git a/crates/perry-codegen/src/expr/this_super_call.rs b/crates/perry-codegen/src/expr/this_super_call.rs index 714ff6eb7f..87b4d43002 100644 --- a/crates/perry-codegen/src/expr/this_super_call.rs +++ b/crates/perry-codegen/src/expr/this_super_call.rs @@ -143,9 +143,11 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { match a { perry_hir::CallArg::Expr(e) => { let v = lower_expr(ctx, e)?; - arr = ctx - .block() - .call(I64, "js_array_push_f64", &[(I64, &arr), (DOUBLE, &v)]); + arr = ctx.block().call( + I64, + "js_array_push_f64", + &[(I64, &arr), (DOUBLE, &v)], + ); } perry_hir::CallArg::Spread(e) => { // `js_array_push_spread_any` also handles the @@ -449,9 +451,12 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { let argc = super_args.len().min(2).to_string(); // `extends CustomEvent` → initialize `constructor` + // `detail` as a CustomEvent, not a plain Event. - let is_custom = - if parent_name.as_str() == "CustomEvent" { "1" } else { "0" } - .to_string(); + let is_custom = if parent_name.as_str() == "CustomEvent" { + "1" + } else { + "0" + } + .to_string(); ctx.block().call( DOUBLE, "js_event_subclass_init", diff --git a/crates/perry-codegen/src/lower_call/new.rs b/crates/perry-codegen/src/lower_call/new.rs index 9f04c5f165..8cfb4a9845 100644 --- a/crates/perry-codegen/src/lower_call/new.rs +++ b/crates/perry-codegen/src/lower_call/new.rs @@ -291,7 +291,9 @@ fn inline_constructor_param_values( let mut out = Vec::with_capacity(params.len()); let mut visible_index = 0usize; for param in params { - if param.name.starts_with("__perry_cap_") && !param.is_rest && param.arguments_object.is_none() + if param.name.starts_with("__perry_cap_") + && !param.is_rest + && param.arguments_object.is_none() { out.push(cap_iter.next().cloned().unwrap_or_else(|| undef.clone())); } else if param.arguments_object.is_some() { diff --git a/crates/perry-codegen/src/runtime_decls/strings.rs b/crates/perry-codegen/src/runtime_decls/strings.rs index d52eea09bb..9f4080b4de 100644 --- a/crates/perry-codegen/src/runtime_decls/strings.rs +++ b/crates/perry-codegen/src/runtime_decls/strings.rs @@ -314,11 +314,7 @@ pub fn declare_phase_b_strings(module: &mut LlModule) { // Args: (msg_ptr, msg_len, code_ptr, code_len, kind). Used by the // WorkerNew unresolved-path fallback. Helper diverges (`-> !`); declared // as void-return for LLVM purposes. - module.declare_function( - "js_throw_error_with_code", - VOID, - &[PTR, I64, PTR, I64, I32], - ); + module.declare_function("js_throw_error_with_code", VOID, &[PTR, I64, PTR, I64, I32]); module.declare_function("js_map_set", I64, &[I64, DOUBLE, DOUBLE]); module.declare_function("js_map_get", DOUBLE, &[I64, DOUBLE]); module.declare_function("js_map_has", I32, &[I64, DOUBLE]); diff --git a/crates/perry-hir/src/ir/expr.rs b/crates/perry-hir/src/ir/expr.rs index cc195d90f5..9f55b9be1b 100644 --- a/crates/perry-hir/src/ir/expr.rs +++ b/crates/perry-hir/src/ir/expr.rs @@ -391,7 +391,10 @@ pub enum Expr { /// rebinds read the snapshot instead (vendored zod's /// `static create(...) { … typeName: k.ZodRecord … }` where `k` is an /// enclosing-function local). - ClassCaptureValue { class_name: String, index: u32 }, + ClassCaptureValue { + class_name: String, + index: u32, + }, /// Issue #894: `class C { static [keyExpr] = initExpr }` where the /// class is returned from a factory function body. The static-Symbol diff --git a/crates/perry-hir/src/lower/module_decl.rs b/crates/perry-hir/src/lower/module_decl.rs index 2cbb139bb2..28fad6be8d 100644 --- a/crates/perry-hir/src/lower/module_decl.rs +++ b/crates/perry-hir/src/lower/module_decl.rs @@ -458,7 +458,8 @@ pub(crate) fn lower_module_decl( type_only: whole_decl_type_only, is_dynamic: false, is_dynamic_target: false, - is_deferred_require: false, }); + is_deferred_require: false, + }); } ast::ModuleDecl::ExportDecl(export) => { match &export.decl { diff --git a/crates/perry-hir/src/lower_decl/block.rs b/crates/perry-hir/src/lower_decl/block.rs index 9bed95d402..bdc8d88416 100644 --- a/crates/perry-hir/src/lower_decl/block.rs +++ b/crates/perry-hir/src/lower_decl/block.rs @@ -271,9 +271,7 @@ pub fn lower_fn_body_block_stmt( let cap_args: Vec<(perry_types::LocalId, perry_types::LocalId)> = captured.iter().map(|id| (*id, *id)).collect(); for s in body.iter_mut() { - super::class_captures::append_new_args_stmt( - s, &cname, &cap_args, true, - ); + super::class_captures::append_new_args_stmt(s, &cname, &cap_args, true); } re_regs.push(Stmt::Expr(Expr::RegisterClassCaptures { class_name: cname, diff --git a/crates/perry-hir/src/lower_decl/class_captures.rs b/crates/perry-hir/src/lower_decl/class_captures.rs index e92e485505..a46f887b22 100644 --- a/crates/perry-hir/src/lower_decl/class_captures.rs +++ b/crates/perry-hir/src/lower_decl/class_captures.rs @@ -283,16 +283,16 @@ pub fn synthesize_class_captures( // 2. Methods / getters / setters. After each body's capture rebind, // append the rebind ids to any SELF-construction `new (…)` // sites the body contains (lowered before this class registered). - let append_self_sites = |body: &mut Vec, - id_map: &std::collections::HashMap| { - let cap_args: Vec<(LocalId, LocalId)> = captures_vec - .iter() - .filter_map(|oid| id_map.get(oid).map(|f| (*oid, *f))) - .collect(); - for stmt in body.iter_mut() { - append_self_new_args_stmt(stmt, name, &cap_args); - } - }; + let append_self_sites = + |body: &mut Vec, id_map: &std::collections::HashMap| { + let cap_args: Vec<(LocalId, LocalId)> = captures_vec + .iter() + .filter_map(|oid| id_map.get(oid).map(|f| (*oid, *f))) + .collect(); + for stmt in body.iter_mut() { + append_self_new_args_stmt(stmt, name, &cap_args); + } + }; for m in methods.iter_mut() { let id_map = rewrite_method_body(ctx, &mut m.body); append_self_sites(&mut m.body, &id_map); diff --git a/crates/perry-hir/src/stable_hash/tests.rs b/crates/perry-hir/src/stable_hash/tests.rs index 5dc7d9fa5a..87e1c96dfc 100644 --- a/crates/perry-hir/src/stable_hash/tests.rs +++ b/crates/perry-hir/src/stable_hash/tests.rs @@ -251,7 +251,8 @@ fn module_metadata_affects_hash() { type_only: false, is_dynamic: false, is_dynamic_target: false, - is_deferred_require: false, }); + is_deferred_require: false, + }); assert_ne!(base_hash, hash_module(&m_imp)); // Add a class diff --git a/crates/perry-runtime/src/object/class_constructors.rs b/crates/perry-runtime/src/object/class_constructors.rs index 099088cdb9..dc313e31aa 100644 --- a/crates/perry-runtime/src/object/class_constructors.rs +++ b/crates/perry-runtime/src/object/class_constructors.rs @@ -180,7 +180,10 @@ pub unsafe extern "C" fn js_super_construct_apply( while cur != 0 && depth < 64 { if let Some((ctor_ptr, total_params)) = lookup_class_constructor(cur) { if std::env::var_os("PERRY_SUPER_DEBUG").is_some() { - eprintln!("super_apply resolved ancestor cid={} total={}", cur, total_params); + eprintln!( + "super_apply resolved ancestor cid={} total={}", + cur, total_params + ); } let caps = class_capture_values(cur).unwrap_or_default(); let user_params = (total_params as usize).saturating_sub(caps.len()); diff --git a/crates/perry-runtime/src/object/class_registry.rs b/crates/perry-runtime/src/object/class_registry.rs index 06763e1b2a..a87e7140f9 100644 --- a/crates/perry-runtime/src/object/class_registry.rs +++ b/crates/perry-runtime/src/object/class_registry.rs @@ -1639,7 +1639,10 @@ pub unsafe extern "C" fn js_new_function_construct( || jv.is_bigint() { let desc = unsafe { super::object_ops::describe_value_for_type_error(func_value) }; - super::object_ops::throw_object_type_error_with_suffix(&format!("{desc} "), "is not a constructor"); + super::object_ops::throw_object_type_error_with_suffix( + &format!("{desc} "), + "is not a constructor", + ); } } // `new (new String(""))` / `new (new Number(1))` — a boxed primitive WRAPPER diff --git a/crates/perry-runtime/src/object/reflect_support.rs b/crates/perry-runtime/src/object/reflect_support.rs index dbf2f32c85..d6a3f6b08e 100644 --- a/crates/perry-runtime/src/object/reflect_support.rs +++ b/crates/perry-runtime/src/object/reflect_support.rs @@ -101,8 +101,7 @@ pub(crate) fn obj_value_has_own_key(value: f64, key: f64) -> bool { super::native_module::read_native_module_name(obj), key_to_rust_string(key), ) { - if super::native_module::native_module_has_enumerable_key(&module_name, &key_name) - { + if super::native_module::native_module_has_enumerable_key(&module_name, &key_name) { return true; } } diff --git a/crates/perry-transform/src/inline/mod.rs b/crates/perry-transform/src/inline/mod.rs index 148d7b2849..9a09c2b66f 100644 --- a/crates/perry-transform/src/inline/mod.rs +++ b/crates/perry-transform/src/inline/mod.rs @@ -493,7 +493,8 @@ pub fn inline_functions( type_only: false, is_dynamic: false, is_dynamic_target: false, - is_deferred_require: false, }); + is_deferred_require: false, + }); } } } diff --git a/crates/perry/src/commands/compile/bootstrap.rs b/crates/perry/src/commands/compile/bootstrap.rs index 9d78903586..e06e18d383 100644 --- a/crates/perry/src/commands/compile/bootstrap.rs +++ b/crates/perry/src/commands/compile/bootstrap.rs @@ -505,7 +505,8 @@ mod js_runtime_gate_tests { type_only: false, is_dynamic: false, is_dynamic_target: false, - is_deferred_require: false, }); + is_deferred_require: false, + }); let mut package = empty_module("pkg"); package.exports.push(perry_hir::Export::Named { diff --git a/crates/perry/src/commands/compile/cjs_wrap/detect.rs b/crates/perry/src/commands/compile/cjs_wrap/detect.rs index 595ae14694..e6cfc6c58e 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/detect.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/detect.rs @@ -207,10 +207,7 @@ pub(crate) fn strip_comments_and_strings(source: &str) -> String { State::Str(quote) => { if bytes[i] == b'\\' { i += 2; - } else if quote == b'`' - && bytes[i] == b'$' - && bytes.get(i + 1) == Some(&b'{') - { + } else if quote == b'`' && bytes[i] == b'$' && bytes.get(i + 1) == Some(&b'{') { // `${` — interpolation body is code (and may nest). template_interp_depth.push(0); state = State::Code; diff --git a/crates/perry/src/commands/compile/cjs_wrap/mod.rs b/crates/perry/src/commands/compile/cjs_wrap/mod.rs index c21a111676..c9277873df 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/mod.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/mod.rs @@ -107,7 +107,10 @@ if (process.env.NEXT_RUNTIME !== 'edge') { require('next/dist/server/node-environment'); } "#; - assert!(is_commonjs(src), "comment text must not defeat require( arm"); + assert!( + is_commonjs(src), + "comment text must not defeat require( arm" + ); } #[test] @@ -116,7 +119,10 @@ if (process.env.NEXT_RUNTIME !== 'edge') { // literal whose column-0 `import path from 'node:path'` line must // not flip this CJS file to the ESM pipeline. let src = "\"use strict\";\nObject.defineProperty(exports, \"__esModule\", { value: true });\nexports.write = function() {\n return `performance.mark('next-start');\nimport path from 'node:path'\nimport module from 'node:module'\n`;\n};\n"; - assert!(is_commonjs(src), "template-literal import must not defeat CJS detection"); + assert!( + is_commonjs(src), + "template-literal import must not defeat CJS detection" + ); } #[test] @@ -137,7 +143,10 @@ if (process.env.NEXT_RUNTIME !== 'edge') { // followed by the real `module.exports=` tail. The stripper must // track regex literals or the tail is masked as string content. let src = "const e = s.split(/['\"]/);\nvar i = make();\nmodule.exports = i;\n"; - assert!(is_commonjs(src), "regex with quote must not hide module.exports"); + assert!( + is_commonjs(src), + "regex with quote must not hide module.exports" + ); } #[test] diff --git a/crates/perry/src/commands/compile/resolve/tests.rs b/crates/perry/src/commands/compile/resolve/tests.rs index f627f1b24e..1bc0faa426 100644 --- a/crates/perry/src/commands/compile/resolve/tests.rs +++ b/crates/perry/src/commands/compile/resolve/tests.rs @@ -1341,8 +1341,7 @@ mod manifest_parse_tests { std::fs::write(root.join("stream-ops.js"), "// requiring module\n").expect("write"); std::fs::write(root.join("stream-ops.web.js"), "// the real target\n").expect("write"); - let resolved = - resolve_with_extensions(&root.join("stream-ops.web")).expect("must resolve"); + let resolved = resolve_with_extensions(&root.join("stream-ops.web")).expect("must resolve"); assert_eq!( resolved, root.join("stream-ops.web.js"), From a48768d57445239168aef7598f7698fbbe646868 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 22:30:31 +0200 Subject: [PATCH 15/15] refactor(compile): extract native-addon detection to collect_modules/native_addon.rs Keeps collect_modules.rs under the 2000-line file-size cap (the wall fixes pushed it from 1988 to 2031). Moves the self-contained compile-package Node-native-addon probe group (package-root resolution + binding.gyp / prebuilds / gypfile / loader-dep / *.node markers + the refusal) into a sibling module; behavior unchanged. --- .../src/commands/compile/collect_modules.rs | 146 +--------------- .../compile/collect_modules/native_addon.rs | 162 ++++++++++++++++++ 2 files changed, 164 insertions(+), 144 deletions(-) create mode 100644 crates/perry/src/commands/compile/collect_modules/native_addon.rs diff --git a/crates/perry/src/commands/compile/collect_modules.rs b/crates/perry/src/commands/compile/collect_modules.rs index 2ece54abe7..ef3c440f08 100644 --- a/crates/perry/src/commands/compile/collect_modules.rs +++ b/crates/perry/src/commands/compile/collect_modules.rs @@ -33,6 +33,7 @@ use super::{ mod create_require_transform; mod crypto_ns; mod dynamic_glob; +mod native_addon; mod parse_error; #[cfg(test)] mod tests; @@ -40,6 +41,7 @@ mod tests; use create_require_transform::transform_create_require_literal_requires; use crypto_ns::module_uses_global_crypto_namespace; use dynamic_glob::expand_dynamic_import_glob; +use native_addon::refuse_compile_package_native_addon; use parse_error::annotate_parse_error; const MAX_CROSS_MODULE_INLINE_PRIOR_MODULES: usize = 128; @@ -214,150 +216,6 @@ pub(super) fn known_node_submodule_key(source: &str) -> Option<&'static str> { } } -fn nearest_package_root(path: &std::path::Path) -> Option { - let mut dir = path.parent(); - while let Some(candidate) = dir { - if candidate.join("package.json").exists() { - return Some(candidate.to_path_buf()); - } - dir = candidate.parent(); - } - None -} - -fn package_root_for_compile_package( - ctx: &CompilationContext, - path: &std::path::Path, -) -> Option { - ctx.compile_package_dirs - .values() - .filter(|dir| path.starts_with(dir)) - .max_by_key(|dir| dir.components().count()) - .cloned() - .or_else(|| nearest_package_root(path)) -} - -fn package_name_from_package_json(package_root: &std::path::Path) -> Option { - let package_json = fs::read_to_string(package_root.join("package.json")).ok()?; - let parsed = serde_json::from_str::(&package_json).ok()?; - parsed - .get("name") - .and_then(|name| name.as_str()) - .map(str::to_string) -} - -fn find_node_addon_file(dir: &std::path::Path, max_depth: usize) -> Option { - if max_depth == 0 { - return None; - } - let Ok(entries) = fs::read_dir(dir) else { - return None; - }; - for entry in entries.flatten() { - let path = entry.path(); - let file_name = entry.file_name(); - let file_name = file_name.to_string_lossy(); - if file_name == "node_modules" || file_name == ".git" { - continue; - } - if path.is_file() && path.extension().and_then(|ext| ext.to_str()) == Some("node") { - return Some(path); - } - if path.is_dir() { - if let Some(found) = find_node_addon_file(&path, max_depth - 1) { - return Some(found); - } - } - } - None -} - -fn node_addon_marker(package_root: &std::path::Path) -> Option<(&'static str, String)> { - let binding_gyp = package_root.join("binding.gyp"); - if binding_gyp.exists() { - return Some(("binding.gyp", binding_gyp.display().to_string())); - } - let prebuilds = package_root.join("prebuilds"); - if prebuilds.is_dir() { - return Some(("prebuilds/", prebuilds.display().to_string())); - } - let package_json_path = package_root.join("package.json"); - if let Ok(package_json) = fs::read_to_string(&package_json_path) { - if let Ok(parsed) = serde_json::from_str::(&package_json) { - if parsed - .get("gypfile") - .and_then(|value| value.as_bool()) - .unwrap_or(false) - { - return Some(( - "package.json gypfile", - package_json_path.display().to_string(), - )); - } - if package_json_dependency_uses_native_addon_loader(&parsed, "node-gyp-build") - || package_json_dependency_uses_native_addon_loader(&parsed, "bindings") - { - return Some(( - "native addon loader dependency", - package_json_path.display().to_string(), - )); - } - } - } - if let Some(node_file) = find_node_addon_file(package_root, 5) { - return Some(("*.node", node_file.display().to_string())); - } - None -} - -fn package_json_dependency_uses_native_addon_loader( - package_json: &serde_json::Value, - loader_name: &str, -) -> bool { - ["dependencies", "optionalDependencies"] - .iter() - .any(|section| { - package_json - .get(section) - .and_then(|deps| deps.as_object()) - .is_some_and(|deps| deps.contains_key(loader_name)) - }) -} - -fn refuse_compile_package_native_addon( - ctx: &mut CompilationContext, - canonical: &std::path::Path, -) -> Result<()> { - let Some(package_root) = package_root_for_compile_package(ctx, canonical) else { - return Ok(()); - }; - if !ctx - .checked_compile_package_native_addon_roots - .insert(package_root.clone()) - { - return Ok(()); - } - if has_perry_native_library(&package_root) { - return Ok(()); - } - let Some((marker, marker_path)) = node_addon_marker(&package_root) else { - return Ok(()); - }; - let package_name = package_name_from_package_json(&package_root) - .unwrap_or_else(|| package_root.display().to_string()); - anyhow::bail!( - "package `{}` is in `perry.compilePackages` but uses a Node native addon ({}) at {}.\n\ - Perry cannot load Node `.node` / N-API addons inside a native Perry binary. \ - Remove `{}` from `perry.compilePackages`, choose a pure JS/TS package, \ - or replace the native boundary with a Perry native binding \ - (`perry.nativeLibrary` / perry-ffi).", - package_name, - marker, - marker_path, - package_name - ); -} - /// Collect all modules to compile (transitive closure of imports) pub(super) fn collect_modules( entry_path: &PathBuf, diff --git a/crates/perry/src/commands/compile/collect_modules/native_addon.rs b/crates/perry/src/commands/compile/collect_modules/native_addon.rs new file mode 100644 index 0000000000..d5de61317c --- /dev/null +++ b/crates/perry/src/commands/compile/collect_modules/native_addon.rs @@ -0,0 +1,162 @@ +//! Compile-package Node native-addon detection. +//! +//! Extracted from `collect_modules.rs` (file-size cap). A package listed in +//! `perry.compilePackages` must be pure JS/TS — Perry cannot load Node +//! `.node` / N-API addons inside a native binary. These helpers locate the +//! package root for a resolved file and probe it for native-addon markers +//! (`binding.gyp`, `prebuilds/`, `gypfile`, `node-gyp-build`/`bindings` +//! loader deps, or a stray `*.node`), so `refuse_compile_package_native_addon` +//! can fail the compile with an actionable message instead of silently +//! emitting a broken binary. + +use anyhow::Result; +use std::fs; +use std::path::PathBuf; + +// Parent (`collect_modules`) private imports are visible to this child module. +use super::has_perry_native_library; +use super::CompilationContext; + +fn nearest_package_root(path: &std::path::Path) -> Option { + let mut dir = path.parent(); + while let Some(candidate) = dir { + if candidate.join("package.json").exists() { + return Some(candidate.to_path_buf()); + } + dir = candidate.parent(); + } + None +} + +fn package_root_for_compile_package( + ctx: &CompilationContext, + path: &std::path::Path, +) -> Option { + ctx.compile_package_dirs + .values() + .filter(|dir| path.starts_with(dir)) + .max_by_key(|dir| dir.components().count()) + .cloned() + .or_else(|| nearest_package_root(path)) +} + +fn package_name_from_package_json(package_root: &std::path::Path) -> Option { + let package_json = fs::read_to_string(package_root.join("package.json")).ok()?; + let parsed = serde_json::from_str::(&package_json).ok()?; + parsed + .get("name") + .and_then(|name| name.as_str()) + .map(str::to_string) +} + +fn find_node_addon_file(dir: &std::path::Path, max_depth: usize) -> Option { + if max_depth == 0 { + return None; + } + let Ok(entries) = fs::read_dir(dir) else { + return None; + }; + for entry in entries.flatten() { + let path = entry.path(); + let file_name = entry.file_name(); + let file_name = file_name.to_string_lossy(); + if file_name == "node_modules" || file_name == ".git" { + continue; + } + if path.is_file() && path.extension().and_then(|ext| ext.to_str()) == Some("node") { + return Some(path); + } + if path.is_dir() { + if let Some(found) = find_node_addon_file(&path, max_depth - 1) { + return Some(found); + } + } + } + None +} + +fn node_addon_marker(package_root: &std::path::Path) -> Option<(&'static str, String)> { + let binding_gyp = package_root.join("binding.gyp"); + if binding_gyp.exists() { + return Some(("binding.gyp", binding_gyp.display().to_string())); + } + let prebuilds = package_root.join("prebuilds"); + if prebuilds.is_dir() { + return Some(("prebuilds/", prebuilds.display().to_string())); + } + let package_json_path = package_root.join("package.json"); + if let Ok(package_json) = fs::read_to_string(&package_json_path) { + if let Ok(parsed) = serde_json::from_str::(&package_json) { + if parsed + .get("gypfile") + .and_then(|value| value.as_bool()) + .unwrap_or(false) + { + return Some(( + "package.json gypfile", + package_json_path.display().to_string(), + )); + } + if package_json_dependency_uses_native_addon_loader(&parsed, "node-gyp-build") + || package_json_dependency_uses_native_addon_loader(&parsed, "bindings") + { + return Some(( + "native addon loader dependency", + package_json_path.display().to_string(), + )); + } + } + } + if let Some(node_file) = find_node_addon_file(package_root, 5) { + return Some(("*.node", node_file.display().to_string())); + } + None +} + +fn package_json_dependency_uses_native_addon_loader( + package_json: &serde_json::Value, + loader_name: &str, +) -> bool { + ["dependencies", "optionalDependencies"] + .iter() + .any(|section| { + package_json + .get(section) + .and_then(|deps| deps.as_object()) + .is_some_and(|deps| deps.contains_key(loader_name)) + }) +} + +pub(super) fn refuse_compile_package_native_addon( + ctx: &mut CompilationContext, + canonical: &std::path::Path, +) -> Result<()> { + let Some(package_root) = package_root_for_compile_package(ctx, canonical) else { + return Ok(()); + }; + if !ctx + .checked_compile_package_native_addon_roots + .insert(package_root.clone()) + { + return Ok(()); + } + if has_perry_native_library(&package_root) { + return Ok(()); + } + let Some((marker, marker_path)) = node_addon_marker(&package_root) else { + return Ok(()); + }; + let package_name = package_name_from_package_json(&package_root) + .unwrap_or_else(|| package_root.display().to_string()); + anyhow::bail!( + "package `{}` is in `perry.compilePackages` but uses a Node native addon ({}) at {}.\n\ + Perry cannot load Node `.node` / N-API addons inside a native Perry binary. \ + Remove `{}` from `perry.compilePackages`, choose a pure JS/TS package, \ + or replace the native boundary with a Perry native binding \ + (`perry.nativeLibrary` / perry-ffi).", + package_name, + marker, + marker_path, + package_name + ); +}