diff --git a/crates/perry-codegen/src/codegen/entry.rs b/crates/perry-codegen/src/codegen/entry.rs index cc6cc64850..7e98a01bc5 100644 --- a/crates/perry-codegen/src/codegen/entry.rs +++ b/crates/perry-codegen/src/codegen/entry.rs @@ -9,7 +9,7 @@ use crate::expr::FnCtx; use crate::module::LlModule; use crate::stmt; use crate::strings::StringPool; -use crate::types::{DOUBLE, I32, I8, PTR, VOID}; +use crate::types::{DOUBLE, I32, I64, I8, PTR, VOID}; use super::helpers::{ emit_namespace_populator, enable_module_init_shadow_frame, init_static_fields_early, @@ -18,6 +18,82 @@ use super::helpers::{ }; use super::opts::CrossModuleCtx; +/// Collect the entry module's top-level `process.env. = ""` +/// assignments so they can be applied to the OS environment BEFORE eager +/// module init (see the call site in `compile_module_entry`). +/// +/// Node runs the entry script top-to-bottom, so a `process.env.NODE_ENV = +/// 'production'` on line 1 is observed by every `require()`d dependency's +/// init. Perry hoists `require`s to eager imports that init before the entry +/// body runs, so without this the dependency observes the unmodified env — +/// e.g. `react-dom/index.js` branches on `process.env.NODE_ENV === 'production'` +/// to pick the production vs development bundle, and the development file is +/// pruned from a Next.js standalone build, so the wrong branch yields an empty +/// module and a downstream `ReactDOMSharedInternals.d` crash. +/// +/// Only *unconditional module-top-level* assignments are collected: the entry +/// init statements, plus one+ levels into a cjs-wrap IIFE (`_cjs = +/// (function(){ ... })()`), which is where the wrapped entry's top-level +/// statements live. Assignments nested in conditionals or inner functions are +/// deliberately skipped — those run conditionally/lazily, exactly as in Node. +fn collect_entry_env_literals(init: &[perry_hir::Stmt]) -> Vec<(String, String)> { + use perry_hir::{Expr, Stmt}; + + fn record(expr: &Expr, out: &mut Vec<(String, String)>) { + // `process.env.X = "lit"` lowers to either form depending on path. + if let Expr::PutValueSet { + target, key, value, .. + } = expr + { + if matches!(target.as_ref(), Expr::ProcessEnv) { + if let (Expr::String(k), Expr::String(v)) = (key.as_ref(), value.as_ref()) { + out.push((k.clone(), v.clone())); + } + } + } + if let Expr::PropertySet { + object, + property, + value, + } = expr + { + if matches!(object.as_ref(), Expr::ProcessEnv) { + if let Expr::String(v) = value.as_ref() { + out.push((property.clone(), v.clone())); + } + } + } + } + + fn descend_iife(expr: &Expr, out: &mut Vec<(String, String)>, depth: u32) { + if depth >= 4 { + return; + } + if let Expr::Call { callee, .. } = expr { + if let Expr::Closure { body, .. } = callee.as_ref() { + scan(body, out, depth + 1); + } + } + } + + fn scan(stmts: &[Stmt], out: &mut Vec<(String, String)>, depth: u32) { + for s in stmts { + match s { + Stmt::Expr(e) => { + record(e, out); + descend_iife(e, out, depth); + } + Stmt::Let { init: Some(e), .. } => descend_iife(e, out, depth), + _ => {} + } + } + } + + let mut out = Vec::new(); + scan(init, &mut out, 0); + out +} + /// Emit the module's entry function. /// /// For the **entry module**: emits `int main()` that bootstraps GC, runs @@ -203,6 +279,25 @@ pub(super) fn compile_module_entry( let blk = main.block_mut(0).unwrap(); // Entry module's own string pool first. blk.call_void(&strings_init_name, &[]); + // Apply the entry module's top-level `process.env. = + // ""` assignments NOW — after the string pool is live but + // BEFORE any dependency's `__init` runs — so eager-inited deps that + // branch on `process.env` at init time observe what the entry sets, + // matching Node's require-is-lazy ordering. See + // `collect_entry_env_literals`. The "NODE_ENV"/"production" string + // handles are interned here and populated by the strings-init call + // above (the entry body also references them, so they share slots). + for (name, value) in collect_entry_env_literals(&hir.init) { + let name_idx = strings.intern(&name); + let value_idx = strings.intern(&value); + let name_global = format!("@{}", strings.entry(name_idx).handle_global); + let value_global = format!("@{}", strings.entry(value_idx).handle_global); + let name_box = blk.load(DOUBLE, &name_global); + let name_bits = blk.bitcast_double_to_i64(&name_box); + let name_handle = blk.and(I64, &name_bits, crate::nanbox::POINTER_MASK_I64); + let value_box = blk.load(DOUBLE, &value_global); + blk.call_void("js_setenv", &[(I64, &name_handle), (DOUBLE, &value_box)]); + } // Then every non-entry module's init in order. Each // non-entry module's `__init` runs its own string // pool init internally before its top-level statements. diff --git a/crates/perry-codegen/src/expr/dyn_extern_i18n.rs b/crates/perry-codegen/src/expr/dyn_extern_i18n.rs index 79ac272aa8..ca9dbd4af9 100644 --- a/crates/perry-codegen/src/expr/dyn_extern_i18n.rs +++ b/crates/perry-codegen/src/expr/dyn_extern_i18n.rs @@ -59,6 +59,33 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { } else { double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)) }; + // An empty `paths` list means collect_modules could not resolve + // the filename statically (it warned at compile time). Many real + // packages construct Workers only on cold paths (e.g. Next.js + // build-time worker pools) — throw if one is actually reached at + // runtime instead of failing the whole compile. + if paths.is_empty() { + let msg = "worker_threads Worker filename was not statically \ + resolvable at compile time; constructing this Worker \ + is unsupported in the compiled binary"; + let msg_idx = ctx.strings.intern(msg); + let msg_entry = ctx.strings.entry(msg_idx); + let msg_bytes_global = format!("@{}", msg_entry.bytes_global); + let msg_len_str = msg_entry.byte_len.to_string(); + let blk = ctx.block(); + blk.call_void( + "js_throw_error_with_code", + &[ + (PTR, &msg_bytes_global), + (I64, &msg_len_str), + (PTR, &"null".to_string()), + (I64, &"0".to_string()), + (I32, &"0".to_string()), + ], + ); + blk.unreachable(); + return Ok(double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED))); + } if paths.len() != 1 { bail!( "worker_threads Worker requires exactly one compile-time-resolved filename, got {}", @@ -364,6 +391,18 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { )); } if let Some(source_prefix) = ctx.import_function_prefixes.get(name).cloned() { + // Next.js lazy-require: a `_lazyreq_N` binding is the CJS require + // shim's handle to a FUNCTION-LOCAL `require('S')`. S is + // `Deferred` (never eager-initialized), so before reading its + // default-export getter, fire `__init()` — idempotent, so + // re-reads cost a guard check. This is the moment Node would run + // S's module body: when `require('S')` is actually called. + if name.starts_with("_lazyreq_") { + let init_fn = format!("{}__init", source_prefix); + ctx.pending_declares + .push((init_fn.clone(), crate::types::VOID, vec![])); + ctx.block().call_void(&init_fn, &[]); + } // Issue #678 followup: a V8-fallback import used as a value // (rather than called directly) has no native singleton // wrapper to point at — the `__perry_wrap_extern_*` for V8 @@ -550,6 +589,27 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { &[(PTR, &name_bytes_global), (I64, &name_len)], )); } + // A default-import alias of a Node builtin module used as a VALUE + // (`const nodeTimers = require('node:timers')`, adopted to an + // import by the CJS wrap) — materialize the real native-module + // namespace object so member reads, monkey-patch writes, and + // enumeration behave. Previously fell through to TAG_TRUE: + // `typeof nodeTimers === "boolean"` and Next.js's + // fast-set-immediate extension threw on + // `nodeTimers.setImmediate = patched` at startup. + if let Some(source) = ctx.imported_class_sources.get(name) { + let bare = source.strip_prefix("node:").unwrap_or(source).to_string(); + if perry_hir::is_node_builtin_module(&bare) { + let module_label = emit_string_literal_global(ctx, &bare); + let module_len = bare.len(); + let blk = ctx.block(); + return Ok(blk.call( + DOUBLE, + "js_create_native_module_namespace", + &[(PTR, &module_label), (I64, &module_len.to_string())], + )); + } + } Ok(double_literal(f64::from_bits(crate::nanbox::TAG_TRUE))) } diff --git a/crates/perry-codegen/src/expr/mod.rs b/crates/perry-codegen/src/expr/mod.rs index d85c58dbda..c180d93659 100644 --- a/crates/perry-codegen/src/expr/mod.rs +++ b/crates/perry-codegen/src/expr/mod.rs @@ -1469,7 +1469,9 @@ pub(crate) fn lower_expr(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { Expr::New { .. } | Expr::NewDynamic { .. } | Expr::NewDynamicSpread { .. } => { new_dynamic::lower(ctx, expr) } - Expr::This | Expr::NewTarget | Expr::SuperCall(..) => this_super_call::lower(ctx, expr), + Expr::This | Expr::NewTarget | Expr::SuperCall(..) | Expr::SuperCallSpread(..) => { + this_super_call::lower(ctx, expr) + } Expr::IsNaN(..) | Expr::MathPow(..) | Expr::MathImul(..) @@ -1892,6 +1894,8 @@ pub(crate) fn lower_expr(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { Expr::StaticFieldGet { .. } | Expr::StaticFieldSet { .. } | Expr::RegisterClassParentDynamic { .. } + | Expr::RegisterClassCaptures { .. } + | Expr::ClassCaptureValue { .. } | Expr::RegisterClassStaticSymbol { .. } | Expr::RegisterClassComputedMethod { .. } | Expr::RegisterClassComputedAccessor { .. } diff --git a/crates/perry-codegen/src/expr/static_field_meta.rs b/crates/perry-codegen/src/expr/static_field_meta.rs index ca9d85587f..2844d583f0 100644 --- a/crates/perry-codegen/src/expr/static_field_meta.rs +++ b/crates/perry-codegen/src/expr/static_field_meta.rs @@ -121,6 +121,65 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { // observable to user code. Ok(double_literal(f64::from_bits(0x7FFC_0000_0000_0001))) } + // Snapshot a function-nested class's captured outer locals into the + // runtime CLASS_CAPTURE_VALUES table at the decl site, so DYNAMIC + // construction of the class value (`exports.C = C; new mod.C()` — + // the webpack/zod bundle pattern) can fill the synthesized + // `__perry_cap_` ctor params. Mirrors RegisterClassParentDynamic + // placement; static `new C()` sites pass captures inline and never + // consult the table. + Expr::RegisterClassCaptures { + class_name, + captures, + } => { + let mut lowered: Vec = Vec::with_capacity(captures.len()); + for c in captures { + lowered.push(lower_expr(ctx, c)?); + } + if let Some(&class_id) = ctx.class_ids.get(class_name) { + if class_id != 0 && !lowered.is_empty() { + let n = lowered.len(); + let buf = ctx.func.alloca_entry_array(DOUBLE, n); + for (i, v) in lowered.iter().enumerate() { + let slot = + ctx.block() + .gep(DOUBLE, &buf, &[(crate::types::I64, &i.to_string())]); + ctx.block().store(DOUBLE, v, &slot); + } + let ptr_reg = ctx.block().next_reg(); + ctx.block().emit_raw(format!( + "{} = getelementptr [{} x double], ptr {}, i64 0, i64 0", + ptr_reg, n, buf + )); + let cid_str = class_id.to_string(); + let len_str = n.to_string(); + ctx.block().call_void( + "js_class_register_capture_values", + &[ + (crate::types::I32, &cid_str), + (crate::types::PTR, &ptr_reg), + (crate::types::I64, &len_str), + ], + ); + } + } + Ok(double_literal(f64::from_bits(0x7FFC_0000_0000_0001))) + } + // Read slot `index` of the class's decl-site capture snapshot — + // STATIC method prologue rebinds (no instance to carry the + // `__perry_cap_*` fields). + Expr::ClassCaptureValue { class_name, index } => { + if let Some(&class_id) = ctx.class_ids.get(class_name) { + let cid_str = class_id.to_string(); + let idx_str = index.to_string(); + return Ok(ctx.block().call( + DOUBLE, + "js_class_capture_value", + &[(crate::types::I32, &cid_str), (crate::types::I32, &idx_str)], + )); + } + Ok(double_literal(f64::from_bits(0x7FFC_0000_0000_0001))) + } // Issue #894: `static [Symbol.for("k")] = init` inside a // class expression returned from a factory function. Emitted // by HIR lowering as a `Sequence([…, RegisterClassStaticSymbol, diff --git a/crates/perry-codegen/src/expr/this_super_call.rs b/crates/perry-codegen/src/expr/this_super_call.rs index e9fdfecf2b..87b4d43002 100644 --- a/crates/perry-codegen/src/expr/this_super_call.rs +++ b/crates/perry-codegen/src/expr/this_super_call.rs @@ -122,6 +122,73 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { // the lowered super-call args. // // The current class is the topmost entry in `class_stack`. The + // `super(...spread)` — tsc's pass-through ctor (`constructor(){ + // super(...arguments) }`, zod's ZodNumber/ZodBigInt). The arg + // count is dynamic, so the parent ctor can't be inlined; build + // the args array and invoke the closest registered ancestor ctor + // on the SAME `this` through the CLASS_CONSTRUCTORS registry. + Expr::SuperCallSpread(call_args) => { + let Some(current_class_name) = ctx.class_stack.last().cloned() else { + for a in call_args { + let (perry_hir::CallArg::Expr(e) | perry_hir::CallArg::Spread(e)) = a; + let _ = lower_expr(ctx, e)?; + } + return Ok(double_literal(0.0)); + }; + // Materialize the args array (spread elements appended via + // the runtime spread helper). + let zero = "0".to_string(); + let mut arr = ctx.block().call(I64, "js_array_alloc", &[(I32, &zero)]); + for a in call_args { + match a { + perry_hir::CallArg::Expr(e) => { + let v = lower_expr(ctx, e)?; + arr = ctx.block().call( + I64, + "js_array_push_f64", + &[(I64, &arr), (DOUBLE, &v)], + ); + } + perry_hir::CallArg::Spread(e) => { + // `js_array_push_spread_any` also handles the + // arguments OBJECT (array-like, not ArrayHeader) — + // the `super(...arguments)` source. + let v = lower_expr(ctx, e)?; + arr = ctx.block().call( + I64, + "js_array_push_spread_any", + &[(I64, &arr), (DOUBLE, &v)], + ); + } + } + } + // Invoke the closest registered ancestor ctor through the + // CLASS_CONSTRUCTORS registry. KNOWN GAP: constructions from + // METHOD bodies (standalone-ctor path) currently lose the + // parent's field writes — see the wall-21 notes; top-level and + // arrow-context constructions work. + let this_box = match ctx.this_stack.last().cloned() { + Some(slot) => ctx.block().load(DOUBLE, &slot), + None => double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)), + }; + if let Some(&child_cid) = ctx.class_ids.get(¤t_class_name) { + let cid_str = child_cid.to_string(); + let blk = ctx.block(); + let arr_box = nanbox_pointer_inline(blk, &arr); + ctx.block().call_void( + "js_super_construct_apply", + &[(I32, &cid_str), (DOUBLE, &this_box), (DOUBLE, &arr_box)], + ); + } + // Spec: subclass field initializers run AFTER super() returns + // (mirrors every other super arm). + crate::lower_call::apply_field_initializers_recursive( + ctx, + ¤t_class_name, + crate::lower_call::FieldInitMode::SelfOnly, + )?; + return Ok(double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED))); + } // parent is `current_class.extends_name` (Perry uses the string // form for cross-module/late-resolved cases) or // `current_class.extends.and_then(class_id_to_name)`. For Phase @@ -198,6 +265,8 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { | "TransformStream" | "Request" | "Response" + | "Event" + | "CustomEvent" ) || is_other_builtin_constructor_name(parent_name.as_str()); if !is_builtin_parent_name { if let Some(extends_expr) = current_class.extends_expr.as_deref() { @@ -361,6 +430,53 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { // handle at runtime (see `fetch_subclass_handle_id`). This // makes `class Request extends GlobalRequest {}` — exactly // what `@hono/node-server` does — produce a working Request. + // `class X extends Event` / `extends CustomEvent` (the `ws` + // package's CloseEvent/ErrorEvent/MessageEvent): `super(type, + // options)` initializes the standard Event fields/methods onto + // `this`. The `X → Event` registry edge (registered at class- + // definition time via js_register_class_parent_dynamic) keeps + // `instanceof Event` and EventTarget dispatch acceptance. + if matches!(parent_name.as_str(), "Event" | "CustomEvent") { + let undef = double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)); + let mut lowered: Vec = Vec::with_capacity(super_args.len()); + for a in super_args { + lowered.push(lower_expr(ctx, a)?); + } + let arg0 = lowered.first().cloned().unwrap_or_else(|| undef.clone()); + let arg1 = lowered.get(1).cloned().unwrap_or_else(|| undef.clone()); + let this_box = match ctx.this_stack.last().cloned() { + Some(slot) => ctx.block().load(DOUBLE, &slot), + None => undef.clone(), + }; + let argc = super_args.len().min(2).to_string(); + // `extends CustomEvent` → initialize `constructor` + + // `detail` as a CustomEvent, not a plain Event. + let is_custom = if parent_name.as_str() == "CustomEvent" { + "1" + } else { + "0" + } + .to_string(); + ctx.block().call( + DOUBLE, + "js_event_subclass_init", + &[ + (DOUBLE, &this_box), + (DOUBLE, &arg0), + (DOUBLE, &arg1), + (I32, &argc), + (I32, &is_custom), + ], + ); + let current_class_name = + ctx.class_stack.last().cloned().unwrap_or_default(); + crate::lower_call::apply_field_initializers_recursive( + ctx, + ¤t_class_name, + crate::lower_call::FieldInitMode::SelfOnly, + )?; + return Ok(double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED))); + } let fetch_subclass_fn = match parent_name.as_str() { "Request" => Some("js_request_subclass_init"), "Response" => Some("js_response_subclass_init"), @@ -528,6 +644,45 @@ pub(crate) fn lower(ctx: &mut FnCtx<'_>, expr: &Expr) -> Result { } if let Some(parent_ctor) = &effective_parent_class.constructor { + // The parent's synthesized `__perry_cap_*` params (a parent + // class that captures enclosing locals) are NOT in the + // user-written `super(...)` args. The CHILD's ctor carries + // same-named cap params (capture union), bound in the current + // scope — append their values by NAME so the binder's + // tail-aligned cap binding sees them. Without this, + // tail-binding pulled the LAST user arg into the parent's cap + // slot and the parent ctor's real params read undefined + // (vendored zod: ZodType's `this._def = def` got undefined). + let parent_cap_params: Vec = parent_ctor + .params + .iter() + .filter(|p| p.name.starts_with("__perry_cap_")) + .map(|p| p.name.clone()) + .collect(); + if !parent_cap_params.is_empty() { + let child_cap_ids: std::collections::HashMap = ctx + .class_stack + .last() + .and_then(|child| ctx.classes.get(child.as_str())) + .and_then(|c| c.constructor.as_ref()) + .map(|ctor| { + ctor.params + .iter() + .filter(|p| p.name.starts_with("__perry_cap_")) + .map(|p| (p.name.clone(), p.id)) + .collect() + }) + .unwrap_or_default(); + for cap_name in &parent_cap_params { + let val = child_cap_ids + .get(cap_name) + .and_then(|id| ctx.locals.get(id).cloned()) + .map(|slot| ctx.block().load(DOUBLE, &slot)); + lowered_args.push(val.unwrap_or_else(|| { + double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)) + })); + } + } let saved_scope = bind_inline_constructor_params(ctx, &parent_ctor.params, &lowered_args); diff --git a/crates/perry-codegen/src/lower_call/builtin.rs b/crates/perry-codegen/src/lower_call/builtin.rs index c255657b55..33765c6fb9 100644 --- a/crates/perry-codegen/src/lower_call/builtin.rs +++ b/crates/perry-codegen/src/lower_call/builtin.rs @@ -28,33 +28,35 @@ pub(super) fn lower_builtin_new( args: &[Expr], ) -> Result> { // Issue #602: ambiguously-named built-in constructors (Client / Pool / - // Database / Redis / MongoClient / Decimal) collide with default-import - // aliases from unrelated packages — `import Client from "better-sqlite3"` - // would otherwise dispatch through pg's Client arm and emit an undefined - // `js_pg_client_new` reference at link time. When `class_name` matches an - // ambiguous arm AND we know the import source is NOT the package the arm - // is for, return `None` so `lower_new` falls through to the generic path. - // Names without a recorded import source (top-level globals, locally- - // defined classes already filtered upstream, etc.) keep their pre-#602 - // behavior — the arm still fires. + // Database / Redis / MongoClient / Decimal) collide with bindings from + // unrelated packages — `import Client from "better-sqlite3"` would + // otherwise dispatch through pg's Client arm and emit an undefined + // `js_pg_client_new` reference at link time. None of these names is a + // Node global, so the arm fires ONLY on positive evidence: a recorded + // import binding whose source is the arm's package (the CJS wrap's + // require-adoption records these too). Names without a matching import + // source fall through to the generic path — this covers function-scoped + // class expressions like undici's `var Client = class _Client …` inside + // bundled vendor code (Next.js `@edge-runtime/primitives`), which are + // invisible to `ctx.classes` and previously hit pg's arm, breaking the + // link of any program that bundles undici without importing pg. let import_src = ctx .imported_class_sources .get(class_name) .map(|s| s.as_str()); - let arm_mismatches_source = match (class_name, import_src) { - ("Client", Some(src)) => src != "pg", - ("Pool", Some(src)) => src != "pg", - ("Database", Some(src)) => src != "better-sqlite3", - ("DatabaseSync", Some(src)) => src != "sqlite", - ("Session", Some(src)) => src != "sqlite", - ("StatementSync", Some(src)) => src != "sqlite", - ("Redis", Some(src)) => src != "ioredis" && src != "redis", - ("MongoClient", Some(src)) => src != "mongodb", - ("Decimal", Some(src)) => src != "decimal.js", - _ => false, + let required_sources: Option<&[&str]> = match class_name { + "Client" | "Pool" => Some(&["pg"]), + "Database" => Some(&["better-sqlite3"]), + "DatabaseSync" | "Session" | "StatementSync" => Some(&["sqlite", "node:sqlite"]), + "Redis" => Some(&["ioredis", "redis"]), + "MongoClient" => Some(&["mongodb"]), + "Decimal" => Some(&["decimal.js"]), + _ => None, }; - if arm_mismatches_source { - return Ok(None); + if let Some(sources) = required_sources { + if !import_src.is_some_and(|src| sources.contains(&src)) { + return Ok(None); + } } match class_name { "Utf8Stream" diff --git a/crates/perry-codegen/src/lower_call/new.rs b/crates/perry-codegen/src/lower_call/new.rs index 2a721cd5d3..8cfb4a9845 100644 --- a/crates/perry-codegen/src/lower_call/new.rs +++ b/crates/perry-codegen/src/lower_call/new.rs @@ -108,7 +108,7 @@ fn ctor_body_calls_super(body: &[perry_hir::Stmt]) -> bool { } fn expr_calls_super(expr: &Expr) -> bool { - if matches!(expr, Expr::SuperCall(_)) { + if matches!(expr, Expr::SuperCall(_) | Expr::SuperCallSpread(_)) { return true; } let mut found = false; @@ -131,7 +131,7 @@ fn ctor_body_closure_calls_super(body: &[perry_hir::Stmt]) -> bool { } fn expr_calls_super_incl_closures(expr: &Expr) -> bool { - if matches!(expr, Expr::SuperCall(_)) { + if matches!(expr, Expr::SuperCall(_) | Expr::SuperCallSpread(_)) { return true; } if let Expr::Closure { body, .. } = expr { @@ -269,21 +269,45 @@ fn inline_constructor_param_values( lowered_args: &[String], ) -> Vec { let undef = double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)); + // Synthesized `__perry_cap_` capture params are always TRAILING + // params, and `Expr::New` sites always append the capture values after + // the user args — but the two sides need not agree on the USER arity. + // A no-user-ctor capturing class has zero user params while the `new` + // site may pass user args (`new ZodString({})` — the vendored-zod + // bundle), so positional binding put the user arg into the capture + // slot. Bind capture params from the args TAIL and user params from + // the head. + let n_caps = params + .iter() + .filter(|p| { + p.name.starts_with("__perry_cap_") && !p.is_rest && p.arguments_object.is_none() + }) + .count() + .min(lowered_args.len()); + let user_len = lowered_args.len() - n_caps; + let (user_args, cap_args) = lowered_args.split_at(user_len); + let mut cap_iter = cap_args.iter(); + let mut out = Vec::with_capacity(params.len()); let mut visible_index = 0usize; for param in params { - if param.arguments_object.is_some() { - out.push(pack_lowered_args_array(ctx, lowered_args)); + if param.name.starts_with("__perry_cap_") + && !param.is_rest + && param.arguments_object.is_none() + { + out.push(cap_iter.next().cloned().unwrap_or_else(|| undef.clone())); + } else if param.arguments_object.is_some() { + out.push(pack_lowered_args_array(ctx, user_args)); } else if param.is_rest { - let tail = if visible_index < lowered_args.len() { - &lowered_args[visible_index..] + let tail = if visible_index < user_args.len() { + &user_args[visible_index..] } else { &[] }; out.push(pack_lowered_args_array(ctx, tail)); } else { out.push( - lowered_args + user_args .get(visible_index) .cloned() .unwrap_or_else(|| undef.clone()), @@ -361,7 +385,39 @@ fn call_local_constructor_symbol( // from `_addCheck`, where ZodNumber has no own ctor and ZodType does). let param_count = effective_constructor_param_count(ctx, class); let undef_lit = double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)); - let mut ctor_values = lowered_args.to_vec(); + // When the ctor's signature is statically known, build per-param values + // with the SAME packing rules the inline path uses — a rest param or the + // synthesized `arguments` param receives a PACKED ARRAY, not a raw + // positional value. Pre-fix, `new Kid({...})` from a method of Kid (the + // recursion-guarded symbol-call path) shoved the user arg RAW into the + // ctor's synthetic `arguments` slot; `super(...arguments)` then spread + // an object with no `length` and the parent ctor saw zero args + // (vendored zod's `z.number().int()` chain — `_addCheck` → + // `new ZodNumber({…})` → `constructor(){ super(...arguments) }`). + let effective_params: Option> = { + let mut found = class.constructor.as_ref().map(|c| c.params.clone()); + if found.is_none() { + let mut parent = class.extends_name.as_deref().map(|s| s.to_string()); + while let Some(pname) = parent { + match ctx.classes.get(&pname).copied() { + Some(pc) => { + if let Some(pctor) = pc.constructor.as_ref() { + found = Some(pctor.params.clone()); + break; + } + parent = pc.extends_name.as_deref().map(|s| s.to_string()); + } + None => break, + } + } + } + found + }; + let mut ctor_values = if let Some(params) = effective_params { + inline_constructor_param_values(ctx, ¶ms, lowered_args) + } else { + lowered_args.to_vec() + }; ctor_values.truncate(param_count); while ctor_values.len() < param_count { ctor_values.push(undef_lit.clone()); diff --git a/crates/perry-codegen/src/lower_call/property_get.rs b/crates/perry-codegen/src/lower_call/property_get.rs index fb8e251f7e..dc36fdad3c 100644 --- a/crates/perry-codegen/src/lower_call/property_get.rs +++ b/crates/perry-codegen/src/lower_call/property_get.rs @@ -329,7 +329,7 @@ pub fn try_lower_property_get_method_call( "split" | "charCodeAt" | "charAt" | "trim" | "trimStart" | "trimEnd" | "substring" | "substr" | "toLowerCase" | "toUpperCase" | "toLocaleLowerCase" | "toLocaleUpperCase" | "replaceAll" | "padStart" | "padEnd" | "repeat" - | "normalize" | "codePointAt" | "localeCompare" => true, + | "codePointAt" | "localeCompare" => true, // Annex B §B.2.2 HTML wrappers (`bold`, `link`, `anchor`, …) are // string-only in the spec but collide with common user method // names — chalk's `chalk.bold(s)` is a styled-string builder @@ -337,6 +337,9 @@ pub fn try_lower_property_get_method_call( // to its source text and wrapped it in ``. An Any-typed // receiver that really is a string still gets them via the // `jsval.is_string()` arm of `js_native_call_method`. + // (`normalize` is intentionally NOT in this unconditional list — the + // arg-gated `"normalize" if args.len() <= 1` arm below handles it so + // user 2-arg `normalize(pathname, matched)` methods fall through.) // Issue #638: `replace` is also string-exclusive, but routing // it here unconditionally caused regressions in async dispatch // pathways. Only fire when args[1] is statically detectable as @@ -363,6 +366,11 @@ pub fn try_lower_property_get_method_call( // startsWith / endsWith only exist on String — both 1-arg // and 2-arg (searchString, position) forms route here. "startsWith" | "endsWith" if args.len() == 1 || args.len() == 2 => true, + // `normalize` is string-exclusive only at 0/1 args. User classes + // commonly define 2-arg `normalize(pathname, matched)` methods + // (Next.js route normalizers) — those must fall through to the + // runtime dispatcher instead of erroring on String arity. + "normalize" if args.len() <= 1 => true, "lastIndexOf" if args.len() == 1 => true, _ => false, }; diff --git a/crates/perry-codegen/src/lower_string_method.rs b/crates/perry-codegen/src/lower_string_method.rs index 3ebf88f68d..8b6808a2bf 100644 --- a/crates/perry-codegen/src/lower_string_method.rs +++ b/crates/perry-codegen/src/lower_string_method.rs @@ -735,19 +735,19 @@ pub(crate) fn lower_string_method( Ok(nanbox_string_inline(blk, &result)) } "normalize" => { - // 0 or 1 arg. The runtime applies ToString + form validation: - // omitted (undefined) → NFC default; explicit null/""/"BAD" → - // RangeError. Pass the raw NaN-boxed form value (#2782). - if args.len() > 1 { - bail!( - "perry-codegen: String.normalize expects 0 or 1 args, got {}", - args.len() - ); - } + // Takes the form from args[0]; per spec, surplus args are + // evaluated then ignored. The runtime applies ToString + form + // validation: omitted (undefined) → NFC default; explicit + // null/""/"BAD" → RangeError. Pass the raw NaN-boxed form + // value (#2782). let form_box = if args.is_empty() { crate::nanbox::double_literal(f64::from_bits(crate::nanbox::TAG_UNDEFINED)) } else { - lower_expr(ctx, &args[0])? + let form = lower_expr(ctx, &args[0])?; + for extra in &args[1..] { + let _ = lower_expr(ctx, extra)?; + } + form }; let blk = ctx.block(); let recv_handle = unbox_str_handle(blk, &recv_box); diff --git a/crates/perry-codegen/src/runtime_decls/strings.rs b/crates/perry-codegen/src/runtime_decls/strings.rs index c27e08532f..9f4080b4de 100644 --- a/crates/perry-codegen/src/runtime_decls/strings.rs +++ b/crates/perry-codegen/src/runtime_decls/strings.rs @@ -310,6 +310,11 @@ pub fn declare_phase_b_strings(module: &mut LlModule) { VOID, &[PTR, I64, PTR, I64], ); + // Generic "throw Error/TypeError/RangeError with optional Node `.code`". + // Args: (msg_ptr, msg_len, code_ptr, code_len, kind). Used by the + // WorkerNew unresolved-path fallback. Helper diverges (`-> !`); declared + // as void-return for LLVM purposes. + module.declare_function("js_throw_error_with_code", VOID, &[PTR, I64, PTR, I64, I32]); module.declare_function("js_map_set", I64, &[I64, DOUBLE, DOUBLE]); module.declare_function("js_map_get", DOUBLE, &[I64, DOUBLE]); module.declare_function("js_map_has", I32, &[I64, DOUBLE]); @@ -1118,6 +1123,14 @@ pub fn declare_phase_b_strings(module: &mut LlModule) { // class_id from the value (ClassRef payload or ObjectHeader.class_id) // and wires the (child, parent) edge into CLASS_REGISTRY. module.declare_function("js_register_class_parent_dynamic", VOID, &[I32, DOUBLE]); + // Decl-site snapshot of a function-nested class's captured locals — + // consumed by the dynamic-construction replay (`new mod.C()`). + module.declare_function("js_class_register_capture_values", VOID, &[I32, PTR, I64]); + // Static-method prologue read of one decl-site capture snapshot slot. + module.declare_function("js_class_capture_value", DOUBLE, &[I32, I32]); + // `super(...spread)` — dynamic-arity ancestor ctor invocation on `this`. + module.declare_function("js_super_construct_apply", VOID, &[I32, DOUBLE, DOUBLE]); + module.declare_function("js_array_push_spread_any", I64, &[I64, DOUBLE]); // Issue #711 part 2: prototype-based class declaration via // `.prototype = `. Binds an object as the function's // prototype source; subsequent `class X extends ` lookups diff --git a/crates/perry-codegen/src/runtime_decls/strings_part2.rs b/crates/perry-codegen/src/runtime_decls/strings_part2.rs index 07ab7ff044..b9d072edd8 100644 --- a/crates/perry-codegen/src/runtime_decls/strings_part2.rs +++ b/crates/perry-codegen/src/runtime_decls/strings_part2.rs @@ -1191,6 +1191,13 @@ pub(crate) fn declare_phase_b_strings_part2(module: &mut LlModule) { module.declare_function("js_abort_signal_throw_if_aborted", DOUBLE, &[I64]); module.declare_function("js_event_target_new", I64, &[]); module.declare_function("js_event_new", I64, &[DOUBLE, DOUBLE, I32]); + // `super(type, options)` from `class X extends Event/CustomEvent` — + // initializes Event fields onto the existing subclass `this`. + module.declare_function( + "js_event_subclass_init", + DOUBLE, + &[DOUBLE, DOUBLE, DOUBLE, I32, I32], + ); module.declare_function("js_custom_event_new", I64, &[DOUBLE, DOUBLE, I32]); module.declare_function("js_dom_exception_new", I64, &[DOUBLE, DOUBLE]); module.declare_function("js_event_target_add_event_listener", VOID, &[I64, I64, I64]); diff --git a/crates/perry-hir/src/analysis/uses_this.rs b/crates/perry-hir/src/analysis/uses_this.rs index 769fd99f79..898e3786ee 100644 --- a/crates/perry-hir/src/analysis/uses_this.rs +++ b/crates/perry-hir/src/analysis/uses_this.rs @@ -9,6 +9,7 @@ pub(crate) fn uses_this_expr(expr: &Expr) -> bool { match expr { Expr::This => true, Expr::SuperCall(_) + | Expr::SuperCallSpread(_) | Expr::SuperMethodCall { .. } | Expr::SuperPropertyGet { .. } | Expr::SuperPropertySet { .. } diff --git a/crates/perry-hir/src/ir/decl.rs b/crates/perry-hir/src/ir/decl.rs index 9e97b729ec..be690e1de3 100644 --- a/crates/perry-hir/src/ir/decl.rs +++ b/crates/perry-hir/src/ir/decl.rs @@ -116,6 +116,14 @@ pub struct Import { /// Always `false` on `is_dynamic` synthetic edges (those are already /// dynamic targets by virtue of `is_dynamic`). pub is_dynamic_target: bool, + /// Next.js lazy-require: this `import _req_N from 'S'` was synthesized by + /// the CJS→ESM wrap from a `require('S')` whose every call site is inside a + /// FUNCTION body (never module top-level). Node loads such a module lazily + /// — only when the enclosing function runs — so it must NOT pin the target + /// eager. Like `is_dynamic`, the target still enters the compile graph but + /// is left `Deferred` unless some other (top-level) edge reaches it; the + /// require shim triggers the target's `__init` on first `require()` call. + pub is_deferred_require: bool, } /// Import specifier diff --git a/crates/perry-hir/src/ir/expr.rs b/crates/perry-hir/src/ir/expr.rs index 95c68248c4..9f55b9be1b 100644 --- a/crates/perry-hir/src/ir/expr.rs +++ b/crates/perry-hir/src/ir/expr.rs @@ -122,6 +122,13 @@ pub enum Expr { type_args: Vec, }, + /// `super(...)` with spread arguments (`super(...arguments)` — the tsc + /// pass-through-ctor emit zod's ZodNumber/ZodBigInt use). The parent + /// ctor is invoked at runtime through the CLASS_CONSTRUCTORS registry + /// with the materialized args array (codegen can't inline a dynamic + /// arg count). + SuperCallSpread(Vec), + // Named function reference FuncRef(FuncId), @@ -364,6 +371,31 @@ pub enum Expr { parent_expr: Box, }, + /// Snapshot the CURRENT values of a function-nested class's captured + /// outer-scope locals into the runtime `CLASS_CAPTURE_VALUES` table. + /// Emitted at the source-order position of the class declaration + /// (parallel to `RegisterClassParentDynamic`), so dynamic construction + /// of the class VALUE (`exports.C = C; … new mod.C()` — the webpack / + /// zod bundle pattern) can fill the synthesized `__perry_cap_` + /// constructor params. Static `new C()` sites keep passing captures as + /// trailing args and don't consult the table. + RegisterClassCaptures { + class_name: String, + captures: Vec, + }, + + /// Read slot `index` of a class's decl-site capture snapshot + /// (`CLASS_CAPTURE_VALUES`, written by `RegisterClassCaptures`). Used by + /// STATIC method bodies of function-nested capturing classes — statics + /// have no instance to carry `__perry_cap_*` fields, so their prologue + /// rebinds read the snapshot instead (vendored zod's + /// `static create(...) { … typeName: k.ZodRecord … }` where `k` is an + /// enclosing-function local). + ClassCaptureValue { + class_name: String, + index: u32, + }, + /// Issue #894: `class C { static [keyExpr] = initExpr }` where the /// class is returned from a factory function body. The static-Symbol /// registration must re-run each time the factory is called, with diff --git a/crates/perry-hir/src/lower/context.rs b/crates/perry-hir/src/lower/context.rs index d9d60a97bf..a7e89e4153 100644 --- a/crates/perry-hir/src/lower/context.rs +++ b/crates/perry-hir/src/lower/context.rs @@ -137,6 +137,7 @@ impl LoweringContext { current_class_super_ident: None, mixin_funcs: HashMap::new(), anon_shape_classes: HashMap::new(), + forward_class_names: std::collections::HashSet::new(), next_anon_shape_id: 0, class_method_return_types: Vec::new(), class_captures: Vec::new(), diff --git a/crates/perry-hir/src/lower/expr_call/mod.rs b/crates/perry-hir/src/lower/expr_call/mod.rs index 56f926a860..e9762d5915 100644 --- a/crates/perry-hir/src/lower/expr_call/mod.rs +++ b/crates/perry-hir/src/lower/expr_call/mod.rs @@ -336,7 +336,16 @@ fn lower_call_inner(ctx: &mut LoweringContext, call: &ast::CallExpr) -> Result { - // super() call in constructor + // super() call in constructor. With spread args + // (`super(...arguments)` — tsc's pass-through-ctor emit) the + // parent ctor is invoked at runtime via the + // CLASS_CONSTRUCTORS registry with the materialized args + // array; the flat lowering would pass the spread operand as + // ONE positional arg (zod's ZodNumber stored the whole + // `arguments` object into `this._def`). + if let Some(spread_args) = spread_args { + return Ok(Expr::SuperCallSpread(spread_args)); + } Ok(Expr::SuperCall(args)) } ast::Callee::Expr(expr) => { diff --git a/crates/perry-hir/src/lower/lower_expr.rs b/crates/perry-hir/src/lower/lower_expr.rs index a35cc17ba0..def80fddd2 100644 --- a/crates/perry-hir/src/lower/lower_expr.rs +++ b/crates/perry-hir/src/lower/lower_expr.rs @@ -509,6 +509,13 @@ pub(crate) fn lower_expr(ctx: &mut LoweringContext, expr: &ast::Expr) -> Result< } else if ctx.lookup_class(&name).is_some() { // Class used as a first-class value (e.g., { Point: Point }) Ok(Expr::ClassRef(name)) + } else if ctx.forward_class_names.contains(&name) { + // Forward reference to a sibling class declared LATER in the + // same function body (vendored zod: ZodType.optional() → + // ZodOptional.create(...)). JS resolves this at call time; + // emit a ClassRef by name — codegen resolves it from the + // class registry, which has every pending class by then. + Ok(Expr::ClassRef(name)) } else if name == "undefined" { // Global undefined identifier Ok(Expr::Undefined) diff --git a/crates/perry-hir/src/lower/lowering_context.rs b/crates/perry-hir/src/lower/lowering_context.rs index 9693a79297..8f76971f15 100644 --- a/crates/perry-hir/src/lower/lowering_context.rs +++ b/crates/perry-hir/src/lower/lowering_context.rs @@ -477,6 +477,15 @@ pub struct LoweringContext { /// field layout. Dedup is per-module only; cross-module dedup would need /// a stable hash and is deferred. pub(crate) anon_shape_classes: HashMap, + /// Class DECLARATION names at the top level of the function body + /// currently being lowered. JS resolves a method-body reference to a + /// sibling class declared LATER in the same function at call time + /// (vendored zod: `ZodType.optional()` calls `ZodOptional.create(...)` + /// with ZodOptional declared hundreds of lines below) — without this + /// set the Ident lowered to the unknown-global sentinel and the member + /// call dispatched into `Object.create`. Scoped save/restore in + /// `lower_fn_body_block_stmt`. + pub(crate) forward_class_names: std::collections::HashSet, /// Counter for generating anon-class names (`__AnonShape_N`). // #854: initialized in `new` but unread — anon-shape classes are now named // by content-addressed FNV hash (see `synthesize_anon_shape_class`), not by diff --git a/crates/perry-hir/src/lower/module_decl.rs b/crates/perry-hir/src/lower/module_decl.rs index bea448887c..28fad6be8d 100644 --- a/crates/perry-hir/src/lower/module_decl.rs +++ b/crates/perry-hir/src/lower/module_decl.rs @@ -458,6 +458,7 @@ pub(crate) fn lower_module_decl( type_only: whole_decl_type_only, is_dynamic: false, is_dynamic_target: false, + is_deferred_require: false, }); } ast::ModuleDecl::ExportDecl(export) => { diff --git a/crates/perry-hir/src/lower_decl/block.rs b/crates/perry-hir/src/lower_decl/block.rs index 5f7245cd96..bdc8d88416 100644 --- a/crates/perry-hir/src/lower_decl/block.rs +++ b/crates/perry-hir/src/lower_decl/block.rs @@ -184,13 +184,20 @@ pub fn lower_fn_body_block_stmt( // Phase 1: pre-define hoisted FnDecl locals so forward references in // any earlier statement resolve via `lookup_local`. Generator and - // async-generator FnDecls are excluded — those go through the - // hoist-to-top-level + FuncRef path in `lower_body_stmt` and aren't - // closure-bound at the source position. + // async-generator FnDecls ARE included: `lower_body_stmt` lowers them to + // a top-level function plus a source-position `Stmt::Let { init: FuncRef }` + // binding the name. Spec function-declaration hoisting still applies to + // generators, so a forward reference (`A.gen = gen` ABOVE the + // `function* gen(){}` in a webpack/ncc inner module — next/dist/compiled/ + // edge-runtime's `consumeUint8ArrayReadableStream`) must resolve. We + // pre-define the local here (so `lookup_local` succeeds at the forward + // reference) and Phase 3 moves the FuncRef `Let` to the front (so it is + // initialized before that reference runs). The FuncRef value is pure, so + // reordering it ahead of other statements is safe. let mut hoisted_id_set: HashSet = HashSet::new(); for stmt in &block.stmts { if let ast::Stmt::Decl(ast::Decl::Fn(fn_decl)) = stmt { - if fn_decl.function.body.is_none() || fn_decl.function.is_generator { + if fn_decl.function.body.is_none() { continue; } let name = fn_decl.ident.sym.to_string(); @@ -203,15 +210,88 @@ pub fn lower_fn_body_block_stmt( } } + // Phase 1.5: pre-register sibling class DECLARATION names so forward + // references inside earlier statements/method bodies resolve to + // `ClassRef` instead of the unknown-global sentinel. JS resolves + // these at call time (vendored zod: `ZodType.optional()` calls + // `ZodOptional.create(...)` declared far below in the same webpack + // module function). Scoped: the previous set is restored on exit so + // names don't leak across function bodies. + let saved_forward_class_names = ctx.forward_class_names.clone(); + for stmt in &block.stmts { + if let ast::Stmt::Decl(ast::Decl::Class(class_decl)) = stmt { + ctx.forward_class_names + .insert(class_decl.ident.sym.to_string()); + } + } + // Phase 2: lower the body. The inner FnDecl arm in `lower_body_stmt` // calls `lookup_local(name)` and reuses our pre-defined id. - let body = match lower_block_stmt(ctx, block) { + let mut body = match lower_block_stmt(ctx, block) { Ok(body) => body, Err(err) => { ctx.current_strict = parent_strict; + ctx.forward_class_names = saved_forward_class_names; return Err(err); } }; + ctx.forward_class_names = saved_forward_class_names; + + // Re-register capture snapshots for classes declared in this body at + // its END. The decl-site `RegisterClassCaptures` runs before later + // statements assign captured vars (tsc emits TS-enum namespaces AFTER + // the classes that reference them — vendored zod's + // ZodFirstPartyTypeKind), so static-method snapshot reads and post- + // return dynamic constructions need the FINAL values. Inserted before + // a trailing `return` when present; bodies with early returns keep the + // decl-site snapshot for those paths. + { + let mut re_regs: Vec = Vec::new(); + for stmt in &block.stmts { + if let ast::Stmt::Decl(ast::Decl::Class(class_decl)) = stmt { + let cname = class_decl.ident.sym.to_string(); + if let Some(captured) = ctx.lookup_class_captures(&cname) { + if !captured.is_empty() { + let captures: Vec = + captured.iter().map(|id| Expr::LocalGet(*id)).collect(); + // Sibling code lowered BEFORE this class registered + // its captures (forward refs — zod's + // `function createZodEnum(...) { return new + // ZodEnum({...}) }` declared above the class) has + // `new (…)` sites with NO cap args appended; + // the inline binder then misfills the ctor params. + // Append the raw outer ids now; sites lowered after + // registration already end with exactly these ids + // and are skipped (tail-match guard). Class members + // were handled by `append_self_sites` with remapped + // ids — their tails don't match the raw ids, but + // they ALREADY carry appends; restrict this pass to + // non-member code by walking the lowered body only + // (member bodies live in pending_classes, not here). + let cap_args: Vec<(perry_types::LocalId, perry_types::LocalId)> = + captured.iter().map(|id| (*id, *id)).collect(); + for s in body.iter_mut() { + super::class_captures::append_new_args_stmt(s, &cname, &cap_args, true); + } + re_regs.push(Stmt::Expr(Expr::RegisterClassCaptures { + class_name: cname, + captures, + })); + } + } + } + } + if !re_regs.is_empty() { + let insert_at = if matches!(body.last(), Some(Stmt::Return(_))) { + body.len() - 1 + } else { + body.len() + }; + for (i, s) in re_regs.into_iter().enumerate() { + body.insert(insert_at + i, s); + } + } + } // Undefined-initialised entry slots for hoisted `var`s declared in // nested blocks (see predefine_var_bindings_in_function_body docs). @@ -238,10 +318,18 @@ pub fn lower_fn_body_block_stmt( let mut hoisted_lets: Vec = Vec::new(); let mut other: Vec = Vec::new(); for s in body { + // A regular/async FnDecl lowers to a `Let { init: Closure }`; a + // generator/async-generator FnDecl lowers to a `Let { init: FuncRef }` + // (the body lives in a hoisted top-level function). Both forms are + // hoisted to the front per spec function-declaration semantics. let is_hoisted = matches!( &s, Stmt::Let { id, init: Some(Expr::Closure { .. }), .. } if hoisted_id_set.contains(id) + ) || matches!( + &s, + Stmt::Let { id, init: Some(Expr::FuncRef(_)), .. } + if hoisted_id_set.contains(id) ); if is_hoisted { hoisted_lets.push(s); diff --git a/crates/perry-hir/src/lower_decl/body_stmt.rs b/crates/perry-hir/src/lower_decl/body_stmt.rs index 0eb76fa194..f1ed467cfa 100644 --- a/crates/perry-hir/src/lower_decl/body_stmt.rs +++ b/crates/perry-hir/src/lower_decl/body_stmt.rs @@ -285,6 +285,23 @@ pub fn lower_body_stmt(ctx: &mut LoweringContext, stmt: &ast::Stmt) -> Result` ctor params. Static `new C()` + // sites still pass captures as trailing args directly. + if let Some(captured) = ctx.lookup_class_captures(&class.name) { + if !captured.is_empty() { + let captures: Vec = + captured.iter().map(|id| Expr::LocalGet(*id)).collect(); + result.push(Stmt::Expr(Expr::RegisterClassCaptures { + class_name: class.name.clone(), + captures, + })); + } + } ctx.pending_classes.push(class); } else { // Duplicate same-named class: still evaluate its computed diff --git a/crates/perry-hir/src/lower_decl/class_captures.rs b/crates/perry-hir/src/lower_decl/class_captures.rs index 730f8ee3c6..a46f887b22 100644 --- a/crates/perry-hir/src/lower_decl/class_captures.rs +++ b/crates/perry-hir/src/lower_decl/class_captures.rs @@ -32,6 +32,7 @@ pub fn synthesize_class_captures( setters: &mut Vec<(String, Function)>, computed_members: &mut Vec, constructor: &mut Option, + static_methods: &mut Vec, ) { let module_level_ids = ctx.module_level_ids.clone(); let outer_scope_ids: std::collections::HashSet = @@ -62,6 +63,15 @@ pub fn synthesize_class_captures( union_captures.insert(id); } } + // STATIC methods reference enclosing-fn locals too (vendored zod's + // `static create(...)` reads the ZodFirstPartyTypeKind enum local). + // Their refs join the union so the decl-site snapshot includes them; + // the rewrite below reads the snapshot instead of instance fields. + for sm in static_methods.iter() { + for id in collect_method_captures(sm, &outer_scope_ids, &module_level_ids) { + union_captures.insert(id); + } + } // Issue #740: field initializers (`readonly _tag = tag` declared on // a class nested inside a function) also capture outer-scope locals. // Without this, `LocalGet(outer_id)` inside a field's init expression @@ -209,7 +219,9 @@ pub fn synthesize_class_captures( // Helper closure: build a fresh-id map for one function's body, // rewrite the body refs (with field-write propagation), and // prepend the rebinding lets. - let rewrite_method_body = |ctx: &mut LoweringContext, body: &mut Vec| { + let rewrite_method_body = |ctx: &mut LoweringContext, + body: &mut Vec| + -> std::collections::HashMap { let mut id_map: std::collections::HashMap = std::collections::HashMap::new(); let mut prologue: Vec = Vec::new(); @@ -240,23 +252,99 @@ pub fn synthesize_class_captures( ); prologue.append(body); *body = prologue; + id_map }; - // 2. Methods / getters / setters. + // SELF-construction inside this class's own members: `new (…)` + // sites in method bodies were lowered BEFORE this class registered its + // captures, so the `Expr::New` Ident arm appended nothing (vendored + // zod's `_addCheck(e){ return new ZodString({…this._def…}) }`). After + // `rewrite_method_body` runs, the method prologue rebinds every capture + // under a fresh id — append those rebind ids here. Nested closure + // bodies are walked too; their capture lists already include the + // prologue ids when the closure body references them, and a closure + // whose ONLY reference is the appended arg gets the id added to its + // captures list below. + fn append_self_new_args_expr( + expr: &mut Expr, + class_name: &str, + cap_args: &[(LocalId, LocalId)], + ) { + append_new_args_expr(expr, class_name, cap_args, false) + } + fn append_self_new_args_stmt( + stmt: &mut Stmt, + class_name: &str, + cap_args: &[(LocalId, LocalId)], + ) { + append_new_args_stmt(stmt, class_name, cap_args, false) + } + + // 2. Methods / getters / setters. After each body's capture rebind, + // append the rebind ids to any SELF-construction `new (…)` + // sites the body contains (lowered before this class registered). + let append_self_sites = + |body: &mut Vec, id_map: &std::collections::HashMap| { + let cap_args: Vec<(LocalId, LocalId)> = captures_vec + .iter() + .filter_map(|oid| id_map.get(oid).map(|f| (*oid, *f))) + .collect(); + for stmt in body.iter_mut() { + append_self_new_args_stmt(stmt, name, &cap_args); + } + }; for m in methods.iter_mut() { - rewrite_method_body(ctx, &mut m.body); + let id_map = rewrite_method_body(ctx, &mut m.body); + append_self_sites(&mut m.body, &id_map); } for (_, g) in getters.iter_mut() { - rewrite_method_body(ctx, &mut g.body); + let id_map = rewrite_method_body(ctx, &mut g.body); + append_self_sites(&mut g.body, &id_map); } for (_, s) in setters.iter_mut() { - rewrite_method_body(ctx, &mut s.body); + let id_map = rewrite_method_body(ctx, &mut s.body); + append_self_sites(&mut s.body, &id_map); } for member in computed_members .iter_mut() .filter(|member| !member.is_static) { - rewrite_method_body(ctx, &mut member.function.body); + let id_map = rewrite_method_body(ctx, &mut member.function.body); + append_self_sites(&mut member.function.body, &id_map); + } + + // 2b. STATIC methods: no instance carries `__perry_cap_*` fields, so + // the prologue rebinds read the decl-site snapshot instead + // (`ClassCaptureValue { class_name, index }` → + // `js_class_capture_value(class_id, index)` at codegen). The snapshot + // is written by the `RegisterClassCaptures` statement emitted at the + // class's declaration position, which runs before any user code can + // reference the class (TDZ). + for sm in static_methods.iter_mut() { + let mut id_map: std::collections::HashMap = + std::collections::HashMap::new(); + let mut prologue: Vec = Vec::new(); + for (index, &outer_id) in captures_vec.iter().enumerate() { + let new_id = ctx.fresh_local(); + id_map.insert(outer_id, new_id); + prologue.push(Stmt::Let { + id: new_id, + name: format!("__perry_cap_{}", outer_id), + ty: captured_outer_types + .get(&outer_id) + .cloned() + .unwrap_or(Type::Any), + mutable: true, + init: Some(Expr::ClassCaptureValue { + class_name: name.to_string(), + index: index as u32, + }), + }); + } + crate::analysis::remap_local_ids_in_stmts(&mut sm.body, &id_map); + prologue.append(&mut sm.body); + sm.body = prologue; + append_self_sites(&mut sm.body, &id_map); } // 3. Constructor. @@ -273,26 +361,77 @@ pub fn synthesize_class_captures( // forced a ctor into existence. The SuperCall also routes known // user-class parents through the inline-parent-ctor arm so the // parent body runs, matching the no-own-ctor `new` path. - let mut ctor = constructor.take().unwrap_or_else(|| Function { - id: ctx.fresh_func(), - name: format!("{}::constructor", name), - type_params: Vec::new(), - params: Vec::new(), - return_type: Type::Void, - body: if has_heritage { - vec![Stmt::Expr(Expr::SuperCall(Vec::new()))] - } else { - Vec::new() - }, - is_async: false, - is_generator: false, - is_strict: true, - was_plain_async: false, - was_unrolled: false, - is_exported: false, - captures: Vec::new(), - decorators: Vec::new(), - }); + let mut ctor = match constructor.take() { + Some(c) => c, + None => { + // The spec default ctor FORWARDS its args: + // `constructor(...args) { super(...args) }`. A bare + // `SuperCall([])` dropped the construction-site user args, so + // `new Derived({def})` left the parent ctor's params undefined + // (vendored zod: ZodString.create → new ZodString({...}) → + // ZodType ctor never saw `def`, `this._def` stayed undefined). + // Synthesize explicit forwarding params matching the closest + // pending-ancestor ctor's USER arity (its `__perry_cap_*` + // params excluded). Ancestors outside `pending_classes` + // (module-level / native parents) keep the no-arg baseline. + let parent_user_arity = if has_heritage { + let mut arity = 0usize; + let mut walker: Option = extends_name.map(|s| s.to_string()); + while let Some(pname) = walker.take() { + let Some(pc) = ctx.pending_classes.iter().find(|c| c.name == pname) else { + break; + }; + if let Some(pctor) = pc.constructor.as_ref() { + arity = pctor + .params + .iter() + .filter(|p| !p.name.starts_with("__perry_cap_")) + .count(); + break; + } + walker = pc.extends_name.clone(); + } + arity + } else { + 0 + }; + let mut params: Vec = Vec::with_capacity(parent_user_arity); + let mut super_args: Vec = Vec::with_capacity(parent_user_arity); + for i in 0..parent_user_arity { + let pid = ctx.fresh_local(); + params.push(Param { + id: pid, + name: format!("__perry_dflt_arg_{}", i), + ty: Type::Any, + default: None, + decorators: Vec::new(), + is_rest: false, + arguments_object: None, + }); + super_args.push(Expr::LocalGet(pid)); + } + Function { + id: ctx.fresh_func(), + name: format!("{}::constructor", name), + type_params: Vec::new(), + params, + return_type: Type::Void, + body: if has_heritage { + vec![Stmt::Expr(Expr::SuperCall(super_args))] + } else { + Vec::new() + }, + is_async: false, + is_generator: false, + is_strict: true, + was_plain_async: false, + was_unrolled: false, + is_exported: false, + captures: Vec::new(), + decorators: Vec::new(), + } + } + }; let mut ctor_id_map: std::collections::HashMap = std::collections::HashMap::new(); let mut assignment_stmts: Vec = Vec::with_capacity(captures_vec.len()); @@ -321,10 +460,11 @@ pub fn synthesize_class_captures( // Rewrite user-written ctor body BEFORE inserting the assignment // stmts (which already reference the fresh ids directly). crate::analysis::remap_local_ids_in_stmts(&mut ctor.body, &ctor_id_map); + append_self_sites(&mut ctor.body, &ctor_id_map); let super_pos = ctor .body .iter() - .position(|s| matches!(s, Stmt::Expr(Expr::SuperCall(_)))); + .position(|s| matches!(s, Stmt::Expr(Expr::SuperCall(_) | Expr::SuperCallSpread(_)))); let insert_at = super_pos.map(|p| p + 1).unwrap_or(0); for (i, stmt) in assignment_stmts.into_iter().enumerate() { ctor.body.insert(insert_at + i, stmt); @@ -353,3 +493,161 @@ pub fn synthesize_class_captures( // construction site. ctx.register_class_captures(name.to_string(), captures_vec); } + +/// Append `cap_args` (the `.1` ids) to every `new (…)` site in +/// `expr`, descending nested closures (patching their capture lists when the +/// appended id is otherwise unreferenced). With `skip_if_present`, a site +/// whose args already END with exactly the `.1` id sequence is left alone — +/// used by the post-body pass, where sites lowered AFTER the class +/// registered already carry the appends. +pub(crate) fn append_new_args_expr( + expr: &mut Expr, + class_name: &str, + cap_args: &[(LocalId, LocalId)], + skip_if_present: bool, +) { + if let Expr::New { + class_name: cn, + args, + .. + } = expr + { + if cn == class_name { + let already = skip_if_present + && args.len() >= cap_args.len() + && args[args.len() - cap_args.len()..] + .iter() + .zip(cap_args.iter()) + .all(|(a, (_, fresh))| matches!(a, Expr::LocalGet(id) if id == fresh)); + if !already { + for (_, fresh) in cap_args { + args.push(Expr::LocalGet(*fresh)); + } + } + } + } + if let Expr::Closure { body, captures, .. } = expr { + for stmt in body.iter_mut() { + append_new_args_stmt(stmt, class_name, cap_args, skip_if_present); + } + let mut refs = Vec::new(); + let mut visited = std::collections::HashSet::new(); + for stmt in body.iter() { + crate::analysis::collect_local_refs_stmt(stmt, &mut refs, &mut visited); + } + for (_, fresh) in cap_args { + if refs.contains(fresh) && !captures.contains(fresh) { + captures.push(*fresh); + } + } + return; + } + crate::walker::walk_expr_children_mut(expr, &mut |child| { + append_new_args_expr(child, class_name, cap_args, skip_if_present) + }); +} + +/// Statement-level driver for [`append_new_args_expr`]. +pub(crate) fn append_new_args_stmt( + stmt: &mut Stmt, + class_name: &str, + cap_args: &[(LocalId, LocalId)], + skip_if_present: bool, +) { + match stmt { + Stmt::Let { init, .. } => { + if let Some(e) = init { + append_new_args_expr(e, class_name, cap_args, skip_if_present); + } + } + Stmt::Expr(e) | Stmt::Throw(e) => { + append_new_args_expr(e, class_name, cap_args, skip_if_present) + } + Stmt::Return(opt) => { + if let Some(e) = opt { + append_new_args_expr(e, class_name, cap_args, skip_if_present); + } + } + Stmt::If { + condition, + then_branch, + else_branch, + } => { + append_new_args_expr(condition, class_name, cap_args, skip_if_present); + for s in then_branch { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + if let Some(eb) = else_branch { + for s in eb { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + } + Stmt::While { condition, body } | Stmt::DoWhile { body, condition } => { + append_new_args_expr(condition, class_name, cap_args, skip_if_present); + for s in body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + Stmt::For { + init, + condition, + update, + body, + } => { + if let Some(s) = init { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + if let Some(e) = condition { + append_new_args_expr(e, class_name, cap_args, skip_if_present); + } + if let Some(e) = update { + append_new_args_expr(e, class_name, cap_args, skip_if_present); + } + for s in body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + Stmt::Labeled { body, .. } => { + append_new_args_stmt(body, class_name, cap_args, skip_if_present) + } + Stmt::Try { + body, + catch, + finally, + } => { + for s in body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + if let Some(c) = catch { + for s in &mut c.body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + if let Some(fb) = finally { + for s in fb { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + } + Stmt::Switch { + discriminant, + cases, + } => { + append_new_args_expr(discriminant, class_name, cap_args, skip_if_present); + for c in cases { + if let Some(t) = &mut c.test { + append_new_args_expr(t, class_name, cap_args, skip_if_present); + } + for s in &mut c.body { + append_new_args_stmt(s, class_name, cap_args, skip_if_present); + } + } + } + Stmt::Break + | Stmt::Continue + | Stmt::LabeledBreak(_) + | Stmt::LabeledContinue(_) + | Stmt::PreallocateBoxes(_) => {} + } +} diff --git a/crates/perry-hir/src/lower_decl/class_decl.rs b/crates/perry-hir/src/lower_decl/class_decl.rs index 4d319bb5f6..84a68be62f 100644 --- a/crates/perry-hir/src/lower_decl/class_decl.rs +++ b/crates/perry-hir/src/lower_decl/class_decl.rs @@ -1108,6 +1108,7 @@ pub fn lower_class_decl( &mut setters, &mut computed_members, &mut constructor, + &mut static_methods, ); // Phase 4.1: register each method's and getter's return type so @@ -1594,6 +1595,7 @@ pub fn lower_class_from_ast( &mut setters, &mut computed_members, &mut constructor, + &mut static_methods, ); Ok(Class { diff --git a/crates/perry-hir/src/lower_decl/class_members.rs b/crates/perry-hir/src/lower_decl/class_members.rs index 4614ef7627..c31b55faa8 100644 --- a/crates/perry-hir/src/lower_decl/class_members.rs +++ b/crates/perry-hir/src/lower_decl/class_members.rs @@ -174,7 +174,7 @@ pub fn lower_constructor( // touch only params (not `this`), so they stay at the very top. if let Some(super_pos) = body .iter() - .position(|s| matches!(s, Stmt::Expr(Expr::SuperCall(_)))) + .position(|s| matches!(s, Stmt::Expr(Expr::SuperCall(_) | Expr::SuperCallSpread(_)))) { let tail = body.split_off(super_pos + 1); body.extend(assignments); diff --git a/crates/perry-hir/src/stable_hash/expr.rs b/crates/perry-hir/src/stable_hash/expr.rs index 167cea3749..ab1fa2ab09 100644 --- a/crates/perry-hir/src/stable_hash/expr.rs +++ b/crates/perry-hir/src/stable_hash/expr.rs @@ -50,6 +50,7 @@ impl SH for Expr { Expr::Logical { op, left, right } => { tag(h, 17); op.hash(h); left.as_ref().hash(h); right.as_ref().hash(h); } Expr::Call { callee, args, type_args, } => { tag(h, 18); callee.as_ref().hash(h); args.hash(h); type_args.hash(h); } Expr::CallSpread { callee, args, type_args, } => { tag(h, 19); callee.as_ref().hash(h); args.hash(h); type_args.hash(h); } + Expr::SuperCallSpread(args) => { tag(h, 12240); for a in args { match a { CallArg::Expr(e) | CallArg::Spread(e) => e.hash(h), } } } Expr::PodLayoutSizeOf { ty } => { tag(h, 12001); ty.hash(h); } Expr::PodLayoutAlignOf { ty } => { tag(h, 12002); ty.hash(h); } Expr::PodLayoutOffsetOf { ty, field_path } => { tag(h, 12003); ty.hash(h); field_path.hash(h); } @@ -627,6 +628,8 @@ impl SH for Expr { Expr::TaggedTemplateStrings { site_id, cooked, raw } => { tag(h, 445); site_id.hash(h); cooked.hash(h); raw.hash(h); } Expr::TemplateRaw(e) => { tag(h, 446); e.as_ref().hash(h); } Expr::RegisterClassParentDynamic { class_name, parent_expr, } => { tag(h, 447); class_name.hash(h); parent_expr.as_ref().hash(h); } + Expr::RegisterClassCaptures { class_name, captures } => { tag(h, 12241); class_name.hash(h); for c in captures { c.hash(h); } } + Expr::ClassCaptureValue { class_name, index } => { tag(h, 12242); class_name.hash(h); index.hash(h); } Expr::RegisterClassStaticSymbol { class_name, key_expr, value_expr, } => { tag(h, 12025); class_name.hash(h); key_expr.as_ref().hash(h); value_expr.as_ref().hash(h); } Expr::RegisterClassComputedMethod { class_name, key_expr, method_name, is_static, param_count, has_rest } => { tag(h, 12233); class_name.hash(h); key_expr.as_ref().hash(h); method_name.hash(h); is_static.hash(h); param_count.hash(h); has_rest.hash(h); } Expr::RegisterClassComputedAccessor { class_name, key_expr, getter_name, setter_name, is_static } => { tag(h, 12234); class_name.hash(h); key_expr.as_ref().hash(h); getter_name.hash(h); setter_name.hash(h); is_static.hash(h); } diff --git a/crates/perry-hir/src/stable_hash/module.rs b/crates/perry-hir/src/stable_hash/module.rs index 08b90b8b5c..32e17a8644 100644 --- a/crates/perry-hir/src/stable_hash/module.rs +++ b/crates/perry-hir/src/stable_hash/module.rs @@ -130,6 +130,7 @@ impl SH for Import { type_only, is_dynamic, is_dynamic_target, + is_deferred_require, } = self; source.hash(h); specifiers.hash(h); @@ -139,6 +140,7 @@ impl SH for Import { type_only.hash(h); is_dynamic.hash(h); is_dynamic_target.hash(h); + is_deferred_require.hash(h); } } diff --git a/crates/perry-hir/src/stable_hash/tests.rs b/crates/perry-hir/src/stable_hash/tests.rs index 9c17904db1..87e1c96dfc 100644 --- a/crates/perry-hir/src/stable_hash/tests.rs +++ b/crates/perry-hir/src/stable_hash/tests.rs @@ -251,6 +251,7 @@ fn module_metadata_affects_hash() { type_only: false, is_dynamic: false, is_dynamic_target: false, + is_deferred_require: false, }); assert_ne!(base_hash, hash_module(&m_imp)); diff --git a/crates/perry-hir/src/walker/expr_mut.rs b/crates/perry-hir/src/walker/expr_mut.rs index d5364617a1..a3c313c0e2 100644 --- a/crates/perry-hir/src/walker/expr_mut.rs +++ b/crates/perry-hir/src/walker/expr_mut.rs @@ -579,6 +579,12 @@ where Expr::RegisterClassParentDynamic { parent_expr, .. } => { f(parent_expr); } + Expr::RegisterClassCaptures { captures, .. } => { + for c in captures { + f(c); + } + } + Expr::ClassCaptureValue { .. } => {} Expr::RegisterClassStaticSymbol { key_expr, value_expr, @@ -853,6 +859,13 @@ where } } } + Expr::SuperCallSpread(args) => { + for a in args { + match a { + CallArg::Expr(e) | CallArg::Spread(e) => f(e), + } + } + } Expr::ArraySpread(elements) => { for el in elements { match el { diff --git a/crates/perry-hir/src/walker/expr_ref.rs b/crates/perry-hir/src/walker/expr_ref.rs index a1280d98a9..038abcffcb 100644 --- a/crates/perry-hir/src/walker/expr_ref.rs +++ b/crates/perry-hir/src/walker/expr_ref.rs @@ -580,6 +580,12 @@ where Expr::RegisterClassParentDynamic { parent_expr, .. } => { f(parent_expr); } + Expr::RegisterClassCaptures { captures, .. } => { + for c in captures { + f(c); + } + } + Expr::ClassCaptureValue { .. } => {} Expr::RegisterClassStaticSymbol { key_expr, value_expr, @@ -850,6 +856,13 @@ where } } } + Expr::SuperCallSpread(args) => { + for a in args { + match a { + CallArg::Expr(e) | CallArg::Spread(e) => f(e), + } + } + } Expr::ArraySpread(elements) => { for el in elements { match el { diff --git a/crates/perry-parser/src/lib.rs b/crates/perry-parser/src/lib.rs index 99c1639ce1..8d87ab0f6e 100644 --- a/crates/perry-parser/src/lib.rs +++ b/crates/perry-parser/src/lib.rs @@ -353,6 +353,73 @@ fn looks_like_es_module(source: &str) -> bool { Some(end) } + // A `/` starts a regex literal (not division) when the preceding token + // cannot end an expression: an operator/punctuator, start of input, or a + // keyword like `return`. Regex literals may contain unescaped quote chars + // (e.g. picomatch's `/(^[*!]|[/()[\]{}"])/`), which would desync the + // string-state scan below if skipped as ordinary code. + fn regex_can_start_here(bytes: &[u8], slash_at: usize) -> bool { + let mut i = slash_at; + while i > 0 { + i -= 1; + match bytes[i] { + b' ' | b'\t' | b'\r' | b'\n' => continue, + b'=' | b'(' | b',' | b':' | b'[' | b'!' | b'&' | b'|' | b'?' | b'{' | b'}' + | b';' | b'+' | b'-' | b'*' | b'%' | b'~' | b'^' | b'<' | b'>' => return true, + c if is_ident(c) => { + let end = i + 1; + let mut start = end; + while start > 0 && is_ident(bytes[start - 1]) { + start -= 1; + } + return matches!( + &bytes[start..end], + b"return" + | b"typeof" + | b"instanceof" + | b"in" + | b"of" + | b"case" + | b"do" + | b"else" + | b"void" + | b"delete" + | b"throw" + | b"new" + | b"yield" + | b"await" + ); + } + _ => return false, + } + } + true + } + + // Returns the index just past the closing `/`, or None if no regex + // terminator is found on this line (then it was division after all). + fn skip_regex_literal(bytes: &[u8], slash_at: usize) -> Option { + let mut i = slash_at + 1; + let mut in_class = false; + while i < bytes.len() { + match bytes[i] { + b'\\' => i += 2, + b'\n' => return None, + b'[' => { + in_class = true; + i += 1; + } + b']' => { + in_class = false; + i += 1; + } + b'/' if !in_class => return Some(i + 1), + _ => i += 1, + } + } + None + } + let bytes = source.as_bytes(); let mut i = 0; let mut state = State::Code; @@ -368,6 +435,11 @@ fn looks_like_es_module(source: &str) -> bool { } else if bytes[i] == b'/' && bytes.get(i + 1) == Some(&b'*') { state = State::BlockComment; i += 2; + } else if bytes[i] == b'/' && regex_can_start_here(bytes, i) { + match skip_regex_literal(bytes, i) { + Some(end) => i = end, + None => i += 1, + } } else { if prev_allows_module_item(bytes, i) { if let Some(end) = next_after_keyword(bytes, i, b"export") { @@ -707,6 +779,26 @@ pub fn swc_span_to_span(swc_span: swc_common::Span, file_id: FileId) -> Span { mod tests { use super::*; + #[test] + fn test_looks_like_es_module_survives_regex_with_quote() { + // Regression: picomatch's bundled source contains a regex literal with + // an unescaped `"` inside a character class. The module-detection scan + // must not enter string state there, or a trailing `export` (appended + // by the CJS wrap) is missed and the file parses as a Script. + let source = "const re = /(^[*!]|[/()[\\]{}\"])/;\nconst x = \"ok\";\nexport default x;\n"; + let module = parse_typescript(source, "vendored.js").unwrap(); + assert_eq!(module.body.len(), 3); + } + + #[test] + fn test_division_not_treated_as_regex() { + // `a / b` must not be consumed as a regex literal that would swallow + // the following string quote. + let source = "const a = 1, b = 2;\nconst c = a / b; const s = \"x\";\nexport default c;\n"; + let module = parse_typescript(source, "math.js").unwrap(); + assert_eq!(module.body.len(), 4); + } + #[test] fn test_parse_simple_function() { let source = r#" diff --git a/crates/perry-runtime/src/event_target.rs b/crates/perry-runtime/src/event_target.rs index aa8f4e3c62..567f150330 100644 --- a/crates/perry-runtime/src/event_target.rs +++ b/crates/perry-runtime/src/event_target.rs @@ -217,6 +217,21 @@ fn construct_event( if event.is_null() { return std::ptr::null_mut(); } + init_event_fields(event, type_value, options, constructor_name, detail); + event +} + +/// Shared Event field/method initialization, applied either to a freshly +/// allocated Event (`construct_event`) or to an existing subclass instance +/// (`js_event_subclass_init` — `super(type, options)` from +/// `class X extends Event`). +fn init_event_fields( + event: *mut ObjectHeader, + type_value: f64, + options: f64, + constructor_name: &[u8], + detail: Option, +) { let type_ptr = string_from_value(type_value); set_event_field( event, @@ -263,15 +278,71 @@ fn construct_event( "stopImmediatePropagation", event_stop_immediate_propagation_thunk, ); - event } +/// `super(type, options)` from a user `class X extends Event` / +/// `extends CustomEvent`: initialize the standard Event fields and methods +/// onto the EXISTING subclass instance (`this`) instead of allocating a new +/// Event. The subclass's own class id stays on the header — the +/// `Subclass → Event` registry edge registered at class-definition time +/// keeps `instanceof Event` and dispatch acceptance working. +#[no_mangle] +pub extern "C" fn js_event_subclass_init( + this_value: f64, + type_value: f64, + options: f64, + argc: u32, + is_custom: u32, +) -> f64 { + let Some(event) = value_as_ptr::(this_value) else { + return undefined_value(); + }; + if argc == 0 { + throw_missing_arg("type"); + } + // `class X extends CustomEvent` must initialize as a CustomEvent: the + // `constructor` field resolves to the CustomEvent global and `detail` is + // read off the options bag (mirroring the direct `new CustomEvent(...)` + // path). Plain `extends Event` keeps `b"Event"` and no `detail`. + if is_custom != 0 { + let detail = unsafe { option_detail(options) }; + init_event_fields(event, type_value, options, b"CustomEvent", Some(detail)); + } else { + init_event_fields(event, type_value, options, b"Event", None); + } + undefined_value() +} + +/// Keepalive anchor for the auto-optimize whole-program build — +/// `js_event_subclass_init` is a generated-code-only callee. +#[used] +static KEEP_JS_EVENT_SUBCLASS_INIT: extern "C" fn(f64, f64, f64, u32, u32) -> f64 = + js_event_subclass_init; + fn is_event_instance(event: *const ObjectHeader) -> bool { if event.is_null() { return false; } let class_id = unsafe { (*event).class_id }; - class_id == CLASS_ID_EVENT || class_id == CLASS_ID_CUSTOM_EVENT + if class_id == CLASS_ID_EVENT || class_id == CLASS_ID_CUSTOM_EVENT { + return true; + } + // A user subclass (`class CloseEvent extends Event`, e.g. the `ws` + // package's WebSocket events) carries its own class id; walk the + // registered parent chain looking for the Event base. + let mut cur = class_id; + for _ in 0..64 { + match crate::object::get_parent_class_id(cur) { + Some(parent) if parent != 0 && parent != cur => { + if parent == CLASS_ID_EVENT || parent == CLASS_ID_CUSTOM_EVENT { + return true; + } + cur = parent; + } + _ => return false, + } + } + false } /// `new Event(type, options?)`. diff --git a/crates/perry-runtime/src/gc/mod.rs b/crates/perry-runtime/src/gc/mod.rs index b60a617167..d5516a92a5 100644 --- a/crates/perry-runtime/src/gc/mod.rs +++ b/crates/perry-runtime/src/gc/mod.rs @@ -111,24 +111,7 @@ fn gc_collect_minor_with_trigger(trigger: GcTriggerSnapshot) -> GcCollectOutcome let current_rss_bytes = crate::process::get_rss_bytes(); let evacuation_policy_allowed = gen_gc_evacuate_enabled(); let force_evacuation = gc_force_evacuate_enabled(); - // #5029: old-page defrag (C4b old-gen compaction) is skipped on cycles - // that run the conservative native-stack scan. Conservative stack words - // cannot be rewritten after a move, and per-object CONS_PINNED only - // protects DIRECT discoveries — the stress suite demonstrated a moved - // old object whose remaining referrer was not rewritten (clone shape - // lookups through it returned recycled memory). Until every such - // referrer surface is registered for rewrite, moving old objects is only - // sound when all roots are precise. Copying minors (the steady-state - // path) never run the conservative scan, so defrag keeps operating - // there via its own policy. - let conservative_scan_this_cycle = matches!( - roots::conservative_stack_scan_decision(), - roots::ConservativeStackScanDecision::Scan - ); - let old_page_selection = if evacuation_policy_allowed - && old_to_young_tracking_complete() - && !conservative_scan_this_cycle - { + let old_page_selection = if evacuation_policy_allowed && old_to_young_tracking_complete() { select_old_page_defrag_pages(force_evacuation) } else { OldPageDefragSelection::default() @@ -412,6 +395,7 @@ pub fn gc_init() { // singletons store heap pointers in TLS caches; keep them live and rewrite // them if a copying collection moves their backing allocations. gc_register_mutable_root_scanner(crate::object::scan_native_callable_export_roots_mut); + gc_register_mutable_root_scanner(crate::object::scan_class_capture_value_roots_mut); gc_register_mutable_root_scanner(crate::node_vm::scan_vm_roots_mut); gc_register_mutable_root_scanner(crate::tls::scan_tls_roots_mut); gc_register_mutable_root_scanner(crate::process::scan_process_finalization_roots_mut); diff --git a/crates/perry-runtime/src/node_submodules/mod.rs b/crates/perry-runtime/src/node_submodules/mod.rs index aa0ca62e69..91c8828991 100644 --- a/crates/perry-runtime/src/node_submodules/mod.rs +++ b/crates/perry-runtime/src/node_submodules/mod.rs @@ -985,6 +985,13 @@ fn submodule_has_default_object(submod_key: &str) -> bool { | "stream_consumers" | "stream_web" | "test_reporters" + // `const nodeTimers = require('node:timers')` (Next.js's + // fast-set-immediate extension) — without a default object the + // binding read the TAG_TRUE sentinel, so member reads were + // undefined and the `nodeTimers.setImmediate = patched` + // monkey-patch threw at module init. + | "timers" + | "timers_promises" ) } diff --git a/crates/perry-runtime/src/object/class_constructors.rs b/crates/perry-runtime/src/object/class_constructors.rs index 444e5d5979..dc313e31aa 100644 --- a/crates/perry-runtime/src/object/class_constructors.rs +++ b/crates/perry-runtime/src/object/class_constructors.rs @@ -65,6 +65,221 @@ fn lookup_class_constructor(class_id: u32) -> Option<(usize, u32)> { .copied() } +thread_local! { + /// Decl-site snapshots of a function-nested class DECLARATION's captured + /// outer locals, keyed by class_id. Filled by the codegen-emitted + /// `js_class_register_capture_values` call at the class's source-order + /// declaration position (parallel to `js_register_class_parent_dynamic`), + /// consumed by `replay_registered_class_constructor` so dynamic + /// construction of the class VALUE (`exports.C = C; new mod.C()` — the + /// webpack / vendored-zod bundle pattern) fills the synthesized + /// `__perry_cap_` ctor params. Re-running the enclosing function + /// overwrites the snapshot (last-definition-wins) — exact for the + /// run-once module-factory pattern these bundles use; class EXPRESSIONS + /// keep their per-evaluation `__perry_ctor_caps` snapshot instead. + static CLASS_CAPTURE_VALUES: std::cell::RefCell>> = + std::cell::RefCell::new(HashMap::new()); +} + +/// Codegen FFI: snapshot `len` capture values for `class_id`. See +/// [`CLASS_CAPTURE_VALUES`]. +/// +/// # Safety +/// `values_ptr` must point at `len` readable f64 slots. +#[no_mangle] +pub unsafe extern "C" fn js_class_register_capture_values( + class_id: u32, + values_ptr: *const f64, + len: usize, +) { + if class_id == 0 || values_ptr.is_null() { + return; + } + let mut values = Vec::with_capacity(len); + for i in 0..len { + values.push((*values_ptr.add(i)).to_bits()); + } + CLASS_CAPTURE_VALUES.with(|m| { + m.borrow_mut().insert(class_id, values); + }); +} + +/// Keepalive anchor for the auto-optimize whole-program build — +/// `js_class_register_capture_values` is a generated-code-only callee. +#[used] +static KEEP_JS_CLASS_REGISTER_CAPTURE_VALUES: unsafe extern "C" fn(u32, *const f64, usize) = + js_class_register_capture_values; + +/// GC root scan for the capture-value snapshots (registered alongside the +/// other runtime mutable-root scanners in `gc::mod`). +pub fn scan_class_capture_value_roots_mut(visitor: &mut crate::gc::RuntimeRootVisitor<'_>) { + CLASS_CAPTURE_VALUES.with(|m| { + let mut m = m.borrow_mut(); + for values in m.values_mut() { + for bits in values.iter_mut() { + visitor.visit_nanbox_u64_slot(bits); + } + } + }); +} + +/// The decl-site capture snapshot for `class_id`, if one was registered. +fn class_capture_values(class_id: u32) -> Option> { + CLASS_CAPTURE_VALUES.with(|m| m.borrow().get(&class_id).cloned()) +} + +/// Codegen FFI: read one slot of a class's decl-site capture snapshot — +/// STATIC method prologue rebinds (statics have no instance to carry the +/// `__perry_cap_*` fields). Absent snapshot/slot reads `undefined`. +#[no_mangle] +pub extern "C" fn js_class_capture_value(class_id: u32, index: u32) -> f64 { + CLASS_CAPTURE_VALUES.with(|m| { + m.borrow() + .get(&class_id) + .and_then(|v| v.get(index as usize).copied()) + .map(f64::from_bits) + .unwrap_or(f64::from_bits(crate::value::TAG_UNDEFINED)) + }) +} + +/// Keepalive anchor (generated-code-only callee). +#[used] +static KEEP_JS_CLASS_CAPTURE_VALUE: extern "C" fn(u32, u32) -> f64 = js_class_capture_value; + +/// `super(...spread)` — invoke the closest registered ancestor constructor +/// of `child_cid` on the EXISTING `this`, with args from the materialized +/// `args_array` (dynamic count; the inline-super path needs a static arg +/// list). The ancestor's trailing `__perry_cap_*` params are filled from +/// its decl-site snapshot, mirroring `replay_registered_class_constructor`. +/// +/// # Safety +/// `this_value`/`args_array` must be valid NaN-boxed heap pointers. +#[no_mangle] +pub unsafe extern "C" fn js_super_construct_apply( + child_cid: u32, + this_value: f64, + args_array: f64, +) -> f64 { + let undef = f64::from_bits(crate::value::TAG_UNDEFINED); + let this_raw = (this_value.to_bits() & crate::value::POINTER_MASK) as i64; + if std::env::var_os("PERRY_SUPER_DEBUG").is_some() { + eprintln!( + "super_apply child={} this_bits={:#x} args_bits={:#x}", + child_cid, + this_value.to_bits(), + args_array.to_bits() + ); + } + if this_raw == 0 { + return undef; + } + let arr = + (args_array.to_bits() & crate::value::POINTER_MASK) as *const crate::array::ArrayHeader; + let mut cur = crate::object::get_parent_class_id(child_cid).unwrap_or(0); + let mut depth = 0usize; + while cur != 0 && depth < 64 { + if let Some((ctor_ptr, total_params)) = lookup_class_constructor(cur) { + if std::env::var_os("PERRY_SUPER_DEBUG").is_some() { + eprintln!( + "super_apply resolved ancestor cid={} total={}", + cur, total_params + ); + } + let caps = class_capture_values(cur).unwrap_or_default(); + let user_params = (total_params as usize).saturating_sub(caps.len()); + let n = if arr.is_null() { + 0 + } else { + crate::array::js_array_length(arr) + } as usize; + let mut final_args: Vec = Vec::with_capacity(total_params as usize); + for i in 0..user_params { + if i < n { + final_args.push(crate::array::js_array_get_f64(arr, i as u32)); + } else { + final_args.push(undef); + } + } + for bits in &caps { + final_args.push(f64::from_bits(*bits)); + } + let _ = call_vtable_method( + ctor_ptr, + this_raw, + final_args.as_ptr(), + final_args.len(), + total_params, + false, + false, + ); + return undef; + } + let next = crate::object::get_parent_class_id(cur).unwrap_or(0); + if next == cur { + break; + } + cur = next; + depth += 1; + } + undef +} + +/// Keepalive anchor (generated-code-only callee). +#[used] +static KEEP_JS_SUPER_CONSTRUCT_APPLY: unsafe extern "C" fn(u32, f64, f64) -> f64 = + js_super_construct_apply; + +/// Append the spread of `value` to `target` (array handle), handling BOTH +/// real arrays AND array-likes (Perry's `arguments` object is an +/// ObjectHeader with "0".."n-1" + "length" props — `super(...arguments)` +/// spreads it). Returns the (possibly reallocated) target handle. +/// +/// # Safety +/// `target` must be a valid ArrayHeader pointer. +#[no_mangle] +pub unsafe extern "C" fn js_array_push_spread_any( + target: *mut crate::array::ArrayHeader, + value: f64, +) -> *mut crate::array::ArrayHeader { + let jv = crate::value::JSValue::from_bits(value.to_bits()); + if !jv.is_pointer() && !jv.is_string() { + return target; + } + let raw = (value.to_bits() & crate::value::POINTER_MASK) as *const u8; + if raw.is_null() { + return target; + } + // Real array → bulk append. + let as_arr = crate::array::clean_arr_ptr(raw as *const crate::array::ArrayHeader); + if !as_arr.is_null() { + return crate::array::js_array_push_spread_f64(target, as_arr); + } + // Array-like object (arguments): read `length`, copy indexed props. + let obj = raw as *const ObjectHeader; + let len_key = crate::string::js_string_from_bytes(b"length".as_ptr(), 6); + let len_v = crate::object::js_object_get_field_by_name(obj, len_key); + let len_f = f64::from_bits(len_v.bits()); + if !len_f.is_finite() || len_f < 0.0 { + return target; + } + let n = len_f as u32; + let mut cur = target; + for i in 0..n { + let idx = i.to_string(); + let key = crate::string::js_string_from_bytes(idx.as_ptr(), idx.len() as u32); + let v = crate::object::js_object_get_field_by_name(obj, key); + cur = crate::array::js_array_push_f64(cur, f64::from_bits(v.bits())); + } + cur +} + +/// Keepalive anchor (generated-code-only callee). +#[used] +static KEEP_JS_ARRAY_PUSH_SPREAD_ANY: unsafe extern "C" fn( + *mut crate::array::ArrayHeader, + f64, +) -> *mut crate::array::ArrayHeader = js_array_push_spread_any; + /// #1787: replay a class expression's constructor on a freshly-allocated /// instance. `classobj_value` is the NaN-boxed heap class object the `new` /// callee resolved to; `class_cid` is its (template) class_id; `inst` is the @@ -105,7 +320,20 @@ pub(crate) unsafe fn replay_class_object_constructor( (std::ptr::null(), 0) }; - let user_params = (total_params as usize).saturating_sub(n_caps as usize); + // A class DECLARATION reached as a heap class object (webpack interop: + // `t["default"] = PQueue` read back cross-module) has no per-evaluation + // `__perry_ctor_caps` array — fall back to the decl-site snapshot + // (CLASS_CAPTURE_VALUES), exactly like the ClassRef replay path. Without + // this, the trailing `__perry_cap_*` ctor params read the USER args + // (p-queue's `new PQueue({...})` left `i.default` undefined and + // `new e.queueClass` threw "undefined is not a constructor"). + let snapshot_caps: Vec = if n_caps == 0 { + class_capture_values(class_cid).unwrap_or_default() + } else { + Vec::new() + }; + let effective_caps = (n_caps as usize).max(snapshot_caps.len()); + let user_params = (total_params as usize).saturating_sub(effective_caps); let undef = f64::from_bits(crate::value::TAG_UNDEFINED); let mut final_args: Vec = Vec::with_capacity(total_params as usize); for i in 0..user_params { @@ -118,6 +346,9 @@ pub(crate) unsafe fn replay_class_object_constructor( for j in 0..n_caps { final_args.push(crate::array::js_array_get_f64(caps_arr, j)); } + for bits in &snapshot_caps { + final_args.push(f64::from_bits(*bits)); + } let _ = call_vtable_method( ctor_ptr, inst as i64, @@ -145,15 +376,24 @@ pub(crate) unsafe fn replay_registered_class_constructor( return; }; + // A function-nested class declaration may carry a decl-site capture + // snapshot (see CLASS_CAPTURE_VALUES). The ctor's trailing + // `__perry_cap_` params are filled from it; user args fill the rest. + let caps = class_capture_values(class_cid).unwrap_or_default(); + let user_params = (total_params as usize).saturating_sub(caps.len()); + let undef = f64::from_bits(crate::value::TAG_UNDEFINED); let mut final_args: Vec = Vec::with_capacity(total_params as usize); - for i in 0..total_params as usize { + for i in 0..user_params { if !args_ptr.is_null() && i < args_len { final_args.push(*args_ptr.add(i)); } else { final_args.push(undef); } } + for bits in &caps { + final_args.push(f64::from_bits(*bits)); + } let _ = call_vtable_method( ctor_ptr, inst as i64, diff --git a/crates/perry-runtime/src/object/class_registry.rs b/crates/perry-runtime/src/object/class_registry.rs index 993de20828..a87e7140f9 100644 --- a/crates/perry-runtime/src/object/class_registry.rs +++ b/crates/perry-runtime/src/object/class_registry.rs @@ -39,6 +39,24 @@ fn is_non_constructable_builtin_function_value(value: f64) -> bool { super::native_module::builtin_closure_is_non_constructable_value(value) } +/// True when `value` is a bound native-module method/export closure +/// (`BOUND_METHOD_FUNC_PTR` trampoline — what a `require('stream').Writable` +/// property read produces). These represent real Node classes/functions and +/// must be accepted as `extends` targets. +fn is_bound_native_method_closure_value(value: f64) -> bool { + // Gate on the native-module metadata, not the raw BOUND_METHOD_FUNC_PTR + // trampoline: reified `Function.prototype.{bind,call,apply}` values + // (`reify_function_method_value`) share that trampoline but are NOT native + // constructors, so matching the sentinel alone would let `class X extends + // obj.method {}` skip the spec-required TypeError and silently stay + // parentless. A real native-module export carries a non-empty module name. + unsafe { + super::native_module::bound_native_callable_module_and_method(value) + .map(|(module, _)| !module.is_empty()) + .unwrap_or(false) + } +} + fn throw_non_constructable_builtin_function() -> ! { super::object_ops::throw_object_type_error(b"Function is not a constructor") } @@ -295,6 +313,28 @@ pub(crate) fn class_parent_closure(class_id: u32) -> Option { .and_then(|g| g.as_ref().and_then(|m| m.get(&class_id).copied())) } +/// Walk the class parent chain looking for a registered parent-closure edge. +/// `super()` dispatch needs this because the instance's class_id is the +/// MOST-DERIVED class, while the closure-parent edge is keyed by the class +/// that directly `extends ` — possibly an ancestor. +pub(crate) fn parent_closure_in_chain(class_id: u32) -> Option { + let mut cid = class_id; + let mut depth = 0u32; + while depth < 32 && cid != 0 { + if let Some(addr) = class_parent_closure(cid) { + return Some(addr); + } + match get_parent_class_id(cid) { + Some(p) if p != 0 && p != cid => { + cid = p; + depth += 1; + } + _ => break, + } + } + None +} + /// Reverse lookup: which declared class's `.prototype` is this heap object? /// Used by `Object.getOwnPropertyDescriptor(C.prototype, name)` to surface /// vtable accessors as own properties of the prototype object. Linear scan — @@ -1598,7 +1638,11 @@ pub unsafe extern "C" fn js_new_function_construct( || jv.is_any_string() || jv.is_bigint() { - super::object_ops::throw_object_type_error(b"is not a constructor"); + let desc = unsafe { super::object_ops::describe_value_for_type_error(func_value) }; + super::object_ops::throw_object_type_error_with_suffix( + &format!("{desc} "), + "is not a constructor", + ); } } // `new (new String(""))` / `new (new Number(1))` — a boxed primitive WRAPPER @@ -4301,6 +4345,38 @@ pub extern "C" fn js_register_class_parent(class_id: u32, parent_class_id: u32) /// recursive helper that returns its receiver can't create a cycle. #[no_mangle] pub extern "C" fn js_register_class_parent_dynamic(class_id: u32, parent_value: f64) { + // A globalThis builtin constructor closure is a valid superclass + // (`class CloseEvent extends Event` — the `ws` package's WebSocket + // events). Resolve it through the same name table the dynamic + // `instanceof` path uses and register the edge when the builtin has a + // runtime class id, so subclass instances satisfy `instanceof Event` + // and Event-shaped dispatch gates. Builtins without a class id keep the + // parentless baseline (no throw — they ARE constructors). + if let Some(name) = identify_global_builtin_constructor(parent_value) { + let parent_cid = super::instanceof::global_builtin_constructor_class_id(name); + if parent_cid != 0 && parent_cid != class_id { + register_class(class_id, parent_cid); + } + // A dynamic subclass that resolves its parent through this builtin + // branch must still record the fetch-parent kind so `new X()` attaches + // the native Request/Response handle — the bookkeeping below this + // early return would otherwise be skipped. + match name { + "Request" => super::register_fetch_parent_kind(class_id, 1), + "Response" => super::register_fetch_parent_kind(class_id, 2), + _ => {} + } + return; + } + // A bound native-module export (`const { Writable } = require('stream'); + // class Receiver extends Writable` — the `ws` package's shape) is a real + // Node constructor even though Perry models it as a BOUND_METHOD closure. + // Keep the parentless baseline rather than mis-throwing; native-parent + // method inheritance is handled by codegen's extends_name machinery, not + // by this registry edge. + if is_bound_native_method_closure_value(parent_value) { + return; + } // Spec: a non-`null` superclass that is not a constructor throws a TypeError // at class-definition time (before any `.prototype` access). (Test262 // subclass/superclass-* and definition/invalid-extends.) diff --git a/crates/perry-runtime/src/object/field_get_set.rs b/crates/perry-runtime/src/object/field_get_set.rs index 2adf3103e5..23594f5572 100644 --- a/crates/perry-runtime/src/object/field_get_set.rs +++ b/crates/perry-runtime/src/object/field_get_set.rs @@ -2681,6 +2681,11 @@ fn reified_function_method_name(name: &str) -> Option<&'static [u8]> { "call" => Some(b"call"), "apply" => Some(b"apply"), "isPrototypeOf" => Some(b"isPrototypeOf"), + // `fn.toString` read as a VALUE (`original.toString.bind(original)` — + // Next.js's unhandled-rejection extension preserves patched-function + // toString this way). Previously read back `undefined`, so the + // subsequent `.bind` threw "Bind must be called on a function". + "toString" => Some(b"toString"), _ => None, } } diff --git a/crates/perry-runtime/src/object/field_set_by_name.rs b/crates/perry-runtime/src/object/field_set_by_name.rs index 0f06136a9b..e16f604e10 100644 --- a/crates/perry-runtime/src/object/field_set_by_name.rs +++ b/crates/perry-runtime/src/object/field_set_by_name.rs @@ -617,6 +617,18 @@ pub extern "C" fn js_object_set_field_by_name( super::set_buffer_pool_size(value); return; } + // CommonJS module exports are MUTABLE in Node: monkey-patching + // like Next.js's `require('node:timers').setImmediate = patched` + // must store the override (read back via `vt_get_own_field`) + // instead of falling through to the frozen-object throw. + if !module_name.is_empty() && property_name != "__module__" { + super::native_module::native_namespace_prop_override_store( + &module_name, + property_name, + value, + ); + return; + } } // Refs #486 (hono): class setter dispatch. JS spec: a `set X(...)` diff --git a/crates/perry-runtime/src/object/global_this.rs b/crates/perry-runtime/src/object/global_this.rs index 602602108e..017f58a73b 100644 --- a/crates/perry-runtime/src/object/global_this.rs +++ b/crates/perry-runtime/src/object/global_this.rs @@ -460,8 +460,49 @@ pub unsafe extern "C" fn js_fetch_or_value_super( undef } _ => { + // `class PQ extends t {}` nested inside another function (webpack/ + // ncc inner modules — next/dist/compiled/p-queue extending + // eventemitter3): HIR lowers the heritage Ident at class-DECL + // scope, but codegen re-emits that expression inside the + // constructor, where the captured slot index is unrelated, so + // `parent_val` arrives stale (undefined). The decl-site + // `js_register_class_parent_dynamic` call DID see the live value + // and recorded it in CLASS_PARENT_CLOSURES — prefer that + // registration whenever `parent_val` isn't actually callable, so + // the parent function body still runs with `this` bound (sets + // `this._events` etc.). A valid closure / class-object parent + // value keeps the existing direct-dispatch path untouched. + let mut callee = parent_val; + let bits = parent_val.to_bits(); + const POINTER_TAG: u64 = 0x7FFD_0000_0000_0000; + const TAG_MASK: u64 = 0xFFFF_0000_0000_0000; + const PTR_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; + let usable = if bits & TAG_MASK == POINTER_TAG { + let p = (bits & PTR_MASK) as usize; + // A real callability test: a closure, or a per-evaluation class + // OBJECT (constructor). The prior `class_id != 0` accepted any + // pointer-tagged object with a class id — including non-callable + // instances — so a stale captured slot holding one of those + // skipped the `parent_closure_in_chain` recovery below and + // dispatched `js_native_call_value` on a non-function. + crate::closure::is_closure_ptr(p) + || super::class_registry::is_class_object_ptr(p as *const u8) + } else { + // INT32-tagged ClassRefs route through the static super paths + // before reaching here; anything else (undefined / a stale + // numeric slot) is not a constructor. + bits & TAG_MASK == 0x7FFE_0000_0000_0000 + }; + if !usable { + if let Some(obj) = subclass_this_object_ptr(this_box) { + let cid = crate::object::js_object_get_class_id(obj); + if let Some(addr) = super::class_registry::parent_closure_in_chain(cid) { + callee = f64::from_bits(POINTER_TAG | addr as u64); + } + } + } let prev = crate::object::js_implicit_this_set(this_box); - let r = crate::closure::js_native_call_value(parent_val, args_ptr, args_len); + let r = crate::closure::js_native_call_value(callee, args_ptr, args_len); crate::object::js_implicit_this_set(prev); r } diff --git a/crates/perry-runtime/src/object/instanceof.rs b/crates/perry-runtime/src/object/instanceof.rs index a68632f773..c7e0de1fa3 100644 --- a/crates/perry-runtime/src/object/instanceof.rs +++ b/crates/perry-runtime/src/object/instanceof.rs @@ -269,59 +269,7 @@ pub extern "C" fn js_instanceof_dynamic(value: f64, type_ref: f64) -> f64 { } _ => {} } - let class_id = match name { - // Reference-type global constructors used as runtime *values* - // (e.g. `Function.prototype[Symbol.hasInstance].call(Map, m)`, or a - // dynamic `x instanceof ctorVar`). These mirror the synthetic ids - // the compile-time `instanceof` operator emits — see - // perry-codegen/src/expr/instance_misc1.rs — which `js_instanceof` - // resolves via the per-type registries (#3662). `Array`/`Object`/ - // `Date` carry their own coercion thunks rather than the shared - // noop thunk; #4102 added those thunks to the - // `identify_global_builtin_constructor` allow-list so the dynamic / - // reflective path now resolves them here just like the literal-RHS - // operator does at compile time. - "Map" => 0xFFFF0022, - "Set" => 0xFFFF0023, - "RegExp" => 0xFFFF0021, - "ArrayBuffer" => 0xFFFF0025, - "Array" => 0xFFFF0024, - "Object" => 0xFFFF0050, - "Function" => CLASS_ID_FUNCTION, - "Number" => 0xFFFF00D0, - "String" => 0xFFFF00D1, - "Boolean" => 0xFFFF00D2, - "BigInt" => 0xFFFF00D3, - "Symbol" => 0xFFFF00D4, - "Date" => 0xFFFF0020, - "Error" => crate::error::CLASS_ID_ERROR, - "TypeError" => crate::error::CLASS_ID_TYPE_ERROR, - "RangeError" => crate::error::CLASS_ID_RANGE_ERROR, - "ReferenceError" => crate::error::CLASS_ID_REFERENCE_ERROR, - "SyntaxError" => crate::error::CLASS_ID_SYNTAX_ERROR, - "EvalError" => crate::error::CLASS_ID_EVAL_ERROR, - "URIError" => crate::error::CLASS_ID_URI_ERROR, - "AggregateError" => crate::error::CLASS_ID_AGGREGATE_ERROR, - "Promise" => CLASS_ID_PROMISE, - "Navigator" => crate::navigator::NAVIGATOR_CLASS_ID, - "TextEncoderStream" => crate::object::CLASS_ID_TEXT_ENCODER_STREAM, - "TextDecoderStream" => crate::object::CLASS_ID_TEXT_DECODER_STREAM, - "CompressionStream" => crate::object::CLASS_ID_COMPRESSION_STREAM, - "DecompressionStream" => crate::object::CLASS_ID_DECOMPRESSION_STREAM, - "Event" => crate::event_target::CLASS_ID_EVENT, - "CustomEvent" => crate::event_target::CLASS_ID_CUSTOM_EVENT, - "DOMException" => crate::event_target::CLASS_ID_DOM_EXCEPTION, - // TypedArray constructors used as runtime *values* (a dynamic - // `x instanceof TA` where `TA` is a variable — e.g. test262's - // `testWithTypedArrayConstructors`). Mirrors the per-kind synthetic - // ids the compile-time `instanceof Float64Array` operator resolves. - "Int8Array" | "Uint8Array" | "Uint8ClampedArray" | "Int16Array" | "Uint16Array" - | "Int32Array" | "Uint32Array" | "Float16Array" | "Float32Array" | "Float64Array" - | "BigInt64Array" | "BigUint64Array" => crate::typedarray::kind_for_name(name) - .map(crate::typedarray::class_id_for_kind) - .unwrap_or(0), - _ => 0, - }; + let class_id = global_builtin_constructor_class_id(name); if class_id != 0 { return js_instanceof(value, class_id); } @@ -333,6 +281,71 @@ pub extern "C" fn js_instanceof_dynamic(value: f64, type_ref: f64) -> f64 { f64::from_bits(TAG_FALSE) }; } + return js_instanceof_dynamic_tail(value, type_ref); +} + +/// Runtime class id for a globalThis built-in constructor *name*. +/// +/// Reference-type global constructors used as runtime values (e.g. +/// `Function.prototype[Symbol.hasInstance].call(Map, m)`, or a dynamic +/// `x instanceof ctorVar`). These mirror the synthetic ids the compile-time +/// `instanceof` operator emits — see perry-codegen/src/expr/instance_misc1.rs +/// — which `js_instanceof` resolves via the per-type registries (#3662). +/// `Array`/`Object`/`Date` carry their own coercion thunks rather than the +/// shared noop thunk; #4102 added those thunks to the +/// `identify_global_builtin_constructor` allow-list so the dynamic / +/// reflective path resolves them just like the literal-RHS operator does at +/// compile time. Also consulted by `js_register_class_parent_dynamic` so a +/// user `class X extends Event` registers the `X → Event` chain edge. +/// Returns 0 for names without a runtime class id. +pub(crate) fn global_builtin_constructor_class_id(name: &str) -> u32 { + match name { + "Map" => 0xFFFF0022, + "Set" => 0xFFFF0023, + "RegExp" => 0xFFFF0021, + "ArrayBuffer" => 0xFFFF0025, + "Array" => 0xFFFF0024, + "Object" => 0xFFFF0050, + "Function" => CLASS_ID_FUNCTION, + "Number" => 0xFFFF00D0, + "String" => 0xFFFF00D1, + "Boolean" => 0xFFFF00D2, + "BigInt" => 0xFFFF00D3, + "Symbol" => 0xFFFF00D4, + "Date" => 0xFFFF0020, + "Error" => crate::error::CLASS_ID_ERROR, + "TypeError" => crate::error::CLASS_ID_TYPE_ERROR, + "RangeError" => crate::error::CLASS_ID_RANGE_ERROR, + "ReferenceError" => crate::error::CLASS_ID_REFERENCE_ERROR, + "SyntaxError" => crate::error::CLASS_ID_SYNTAX_ERROR, + "EvalError" => crate::error::CLASS_ID_EVAL_ERROR, + "URIError" => crate::error::CLASS_ID_URI_ERROR, + "AggregateError" => crate::error::CLASS_ID_AGGREGATE_ERROR, + "Promise" => CLASS_ID_PROMISE, + "Navigator" => crate::navigator::NAVIGATOR_CLASS_ID, + "TextEncoderStream" => crate::object::CLASS_ID_TEXT_ENCODER_STREAM, + "TextDecoderStream" => crate::object::CLASS_ID_TEXT_DECODER_STREAM, + "CompressionStream" => crate::object::CLASS_ID_COMPRESSION_STREAM, + "DecompressionStream" => crate::object::CLASS_ID_DECOMPRESSION_STREAM, + "Event" => crate::event_target::CLASS_ID_EVENT, + "CustomEvent" => crate::event_target::CLASS_ID_CUSTOM_EVENT, + "DOMException" => crate::event_target::CLASS_ID_DOM_EXCEPTION, + // TypedArray constructors used as runtime *values* (a dynamic + // `x instanceof TA` where `TA` is a variable — e.g. test262's + // `testWithTypedArrayConstructors`). Mirrors the per-kind synthetic + // ids the compile-time `instanceof Float64Array` operator resolves. + "Int8Array" | "Uint8Array" | "Uint8ClampedArray" | "Int16Array" | "Uint16Array" + | "Int32Array" | "Uint32Array" | "Float16Array" | "Float32Array" | "Float64Array" + | "BigInt64Array" | "BigUint64Array" => crate::typedarray::kind_for_name(name) + .map(crate::typedarray::class_id_for_kind) + .unwrap_or(0), + _ => 0, + } +} + +#[inline] +fn js_instanceof_dynamic_tail(value: f64, type_ref: f64) -> f64 { + use crate::value::TAG_FALSE; if crate::node_submodules::is_diagnostics_bounded_channel_constructor_value(type_ref) { return if crate::node_submodules::diagnostics_bounded_channel_is_instance_value(value) { f64::from_bits(crate::value::TAG_TRUE) diff --git a/crates/perry-runtime/src/object/native_module.rs b/crates/perry-runtime/src/object/native_module.rs index 7b1d9b2b92..718ea16bb8 100644 --- a/crates/perry-runtime/src/object/native_module.rs +++ b/crates/perry-runtime/src/object/native_module.rs @@ -36,6 +36,32 @@ thread_local! { static MODULE_CJS_GLOBAL_PATHS_VALUE: Cell = const { Cell::new(0) }; static NATIVE_MODULE_NAMESPACES: RefCell> = RefCell::new(HashMap::new()); + /// User overrides of native-module namespace properties, keyed + /// `"{module}\0{prop}"`. CommonJS module exports are MUTABLE in Node — + /// monkey-patching like Next.js's + /// `require('node:timers').setImmediate = patched` must store and win + /// subsequent property reads instead of throwing read-only. + static NATIVE_NAMESPACE_PROP_OVERRIDES: RefCell> = + RefCell::new(HashMap::new()); +} + +/// Store a user override for a native-module namespace property +/// (`require('node:timers').setImmediate = fn`). Wins subsequent reads via +/// `vt_get_own_field`. +pub(crate) fn native_namespace_prop_override_store(module: &str, prop: &str, value: f64) { + NATIVE_NAMESPACE_PROP_OVERRIDES.with(|m| { + m.borrow_mut() + .insert(format!("{module}\0{prop}"), value.to_bits()); + }); +} + +/// Read back a stored native-namespace property override, if any. +pub(crate) fn native_namespace_prop_override_get(module: &str, prop: &str) -> Option { + NATIVE_NAMESPACE_PROP_OVERRIDES.with(|m| { + m.borrow() + .get(&format!("{module}\0{prop}")) + .map(|bits| f64::from_bits(*bits)) + }) } fn bound_native_method_length(name: &str) -> Option { @@ -59,6 +85,12 @@ pub fn scan_native_callable_export_roots_mut(visitor: &mut crate::gc::RuntimeRoo visitor.visit_nanbox_u64_slot(value_bits); } }); + NATIVE_NAMESPACE_PROP_OVERRIDES.with(|cache| { + let mut cache = cache.borrow_mut(); + for value_bits in cache.values_mut() { + visitor.visit_nanbox_u64_slot(value_bits); + } + }); NATIVE_MODULE_ACCESSOR_EXPORTS.with(|cache| { let mut cache = cache.borrow_mut(); for value_bits in cache.values_mut() { @@ -2936,6 +2968,18 @@ pub(crate) fn native_module_enumerable_keys(module_name: &str) -> Option<&'stati VM_NAMESPACE_KEYS }), "vm.constants" => Some(VM_CONSTANTS_KEYS), + // Plain `timers` was missing — `require('node:timers').setImmediate` + // read undefined (Next.js's fast-set-immediate extension reads and + // patches it at module init). + "timers" => Some(&[ + b"setTimeout", + b"clearTimeout", + b"setInterval", + b"clearInterval", + b"setImmediate", + b"clearImmediate", + b"promises", + ]), "timers/promises" => Some(&[b"setTimeout", b"setImmediate", b"setInterval", b"scheduler"]), "readline/promises" => Some(&[b"Interface", b"Readline", b"createInterface"]), "zlib" => Some(&[b"codes"]), @@ -4545,6 +4589,23 @@ fn attach_module_cjs_constructor_statics(closure_addr: usize) { bound_native_callable_export_value("module", name), ); } + // `Module.prototype` — Node's require-hook pattern (Next.js): + // `const mod = require('module'); const orig = mod.prototype.require; + // mod.prototype.require = function(request) {…}`. Expose a plain object + // carrying a `require` method so the read+patch round-trips; the patch + // is inert under AOT compilation (Perry resolves modules at compile + // time), but startup must not throw on the access. + let proto = js_object_alloc(0, 1); + native_set_field( + proto, + "require", + bound_native_callable_export_value("module", "_load"), + ); + crate::closure::closure_set_dynamic_prop( + closure_addr, + "prototype", + crate::value::js_nanbox_pointer(proto as i64), + ); } fn native_color_tuple(open: i32, close: i32) -> f64 { @@ -4753,6 +4814,21 @@ pub(crate) fn set_bound_native_closure_name( let ptr = crate::string::js_string_from_bytes(name.as_ptr(), name.len() as u32); let name_value = f64::from_bits(JSValue::string_ptr(ptr).bits()); crate::closure::closure_set_dynamic_prop(closure as usize, "name", name_value); + // Spec: a function's `name` property is { writable:false, enumerable:false, + // configurable:true }. Storing it as a plain dynamic prop left it ENUMERABLE + // by default, so `for (k in Buffer)` yielded "name" — even though + // `getOwnPropertyDescriptor(Buffer,'name').enumerable` correctly reported + // false via the function-name special case. The inconsistency broke + // safe-buffer's `copyProps(Buffer, SafeBuffer)` (`for (k in Buffer) + // SafeBuffer[k] = Buffer[k]`): it copied "name" onto SafeBuffer, whose own + // `name` is read-only, throwing `Cannot assign to read only property 'name'` + // in strict mode (jsonwebtoken → Next.js). Pin the proper descriptor so + // enumeration matches reflection. + crate::object::set_property_attrs( + closure as usize, + "name".to_string(), + crate::object::PropertyAttrs::new(false, false, true), + ); } thread_local! { @@ -8440,6 +8516,11 @@ unsafe fn vt_get_own_field( } let property_name = std::str::from_utf8(std::slice::from_raw_parts(key_ptr, key_len)).unwrap_or(""); + // A user override (`require('node:timers').setImmediate = patched`) + // wins all built-in resolution below — CJS exports are mutable in Node. + if let Some(value) = native_namespace_prop_override_get(&module_name, property_name) { + return Some(JSValue::from_bits(value.to_bits())); + } if matches!( module_name, "process" | "process.namespace" | "process.default" diff --git a/crates/perry-runtime/src/object/reflect_support.rs b/crates/perry-runtime/src/object/reflect_support.rs index 018ae51f2e..d6a3f6b08e 100644 --- a/crates/perry-runtime/src/object/reflect_support.rs +++ b/crates/perry-runtime/src/object/reflect_support.rs @@ -87,6 +87,25 @@ pub(crate) fn obj_value_has_own_key(value: f64, key: f64) -> bool { }; return super::has_own_helpers::closure_own_key_present(obj_addr, &key_name); } + // Native-module namespaces (console, fs, …) expose their members as + // VIRTUAL keys — dispatch tables, not keys_array entries. Mirror the + // `js_object_get_own_property_descriptor` arm so a redefinition like + // `Object.defineProperty(console, 'error', { value })` (Next.js + // patches console methods this way, repeatedly) is treated as + // redefining an EXISTING property — absent descriptor attributes then + // retain the property's writable/enumerable/configurable=true + // defaults instead of collapsing to the new-property `false`s (which + // made the SECOND patch throw `Cannot redefine property`). + if (*obj).class_id == super::native_module::NATIVE_MODULE_CLASS_ID { + if let (Some(module_name), Some(key_name)) = ( + super::native_module::read_native_module_name(obj), + key_to_rust_string(key), + ) { + if super::native_module::native_module_has_enumerable_key(&module_name, &key_name) { + return true; + } + } + } let key_str = crate::builtins::js_string_coerce(key); if key_str.is_null() { return false; diff --git a/crates/perry-runtime/src/proxy.rs b/crates/perry-runtime/src/proxy.rs index 64650d5c98..efdf0cb0dc 100644 --- a/crates/perry-runtime/src/proxy.rs +++ b/crates/perry-runtime/src/proxy.rs @@ -1107,14 +1107,6 @@ fn ordinary_set_with_receiver(target: f64, key: f64, value: f64, receiver: f64) return ok; } - // #5054 fast path: the spec walk below probes own_set_descriptor on the - // target, which ends in a LINEAR keys_array scan — so every dynamic - // `obj[key] = v` was O(own-key-count) and building a wide dynamic object - // quadratic (10k props ~ 12s). When nothing the walk models can apply — - // plain GC_TYPE_OBJECT receiver written as itself, no property/accessor - // descriptor ever installed in the process (monotonic global), no class - // machinery (class_id 0), no recorded setPrototypeOf target, extensible, - // string key — the write reduces to the ordinary data-property store. // #5054 fast path: the spec walk below probes own_set_descriptor on the // target, which ends in a LINEAR keys_array scan — so every dynamic // `obj[key] = v` was O(own-key-count) and building a wide dynamic object @@ -1161,6 +1153,38 @@ fn ordinary_set_with_receiver(target: f64, key: f64, value: f64, receiver: f64) } } + // CommonJS native-module namespaces are MUTABLE in Node — monkey-patching + // like Next.js's `require('node:timers').setImmediate = patched` must + // store the override (read back through the namespace vtable's + // `get_own_field`) rather than reporting the built-in member + // non-writable and throwing under strict mode. + { + let jv = crate::value::JSValue::from_bits(target.to_bits()); + if jv.is_pointer() { + let obj = extract_pointer(target.to_bits()) as *const crate::object::ObjectHeader; + if !obj.is_null() && unsafe { (*obj).class_id } == crate::object::NATIVE_MODULE_CLASS_ID + { + let module_name = unsafe { crate::object::get_module_name_from_namespace(target) }; + if let (false, Some(prop)) = + (module_name.is_empty(), property_key_to_rust_string(key)) + { + if prop != "__module__" { + if module_name == "buffer.Buffer" && prop == "poolSize" { + crate::object::set_buffer_pool_size(value); + } else { + crate::object::native_namespace_prop_override_store( + module_name, + &prop, + value, + ); + } + return true; + } + } + } + } + } + let mut current = target; for _ in 0..64 { // Integer-Indexed exotic [[Set]] (§10.4.5.5): a typed array in the diff --git a/crates/perry-transform/src/inline/mod.rs b/crates/perry-transform/src/inline/mod.rs index 85d8f13a94..9a09c2b66f 100644 --- a/crates/perry-transform/src/inline/mod.rs +++ b/crates/perry-transform/src/inline/mod.rs @@ -493,6 +493,7 @@ pub fn inline_functions( type_only: false, is_dynamic: false, is_dynamic_target: false, + is_deferred_require: false, }); } } diff --git a/crates/perry/src/commands/compile.rs b/crates/perry/src/commands/compile.rs index 40871f099d..f503319387 100644 --- a/crates/perry/src/commands/compile.rs +++ b/crates/perry/src/commands/compile.rs @@ -2207,7 +2207,10 @@ pub fn run_with_parse_cache( } }; for import in &hir_module.imports { - if import.is_dynamic || import.type_only { + // `is_deferred_require`: a function-local `require('S')` + // (lazy in Node). S must NOT chain into this module's init + // — it inits only when the require shim is actually called. + if import.is_dynamic || import.type_only || import.is_deferred_require { continue; } if let Some(resolved) = &import.resolved_path { diff --git a/crates/perry/src/commands/compile/bootstrap.rs b/crates/perry/src/commands/compile/bootstrap.rs index e108d0b301..e06e18d383 100644 --- a/crates/perry/src/commands/compile/bootstrap.rs +++ b/crates/perry/src/commands/compile/bootstrap.rs @@ -505,6 +505,7 @@ mod js_runtime_gate_tests { type_only: false, is_dynamic: false, is_dynamic_target: false, + is_deferred_require: false, }); let mut package = empty_module("pkg"); diff --git a/crates/perry/src/commands/compile/cjs_wrap/detect.rs b/crates/perry/src/commands/compile/cjs_wrap/detect.rs index 81bd4a3ba6..e6cfc6c58e 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/detect.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/detect.rs @@ -21,21 +21,226 @@ use super::*; /// `ImportExportInScript`. The guard below short-circuits the wrap when a /// top-level `import`/`export` statement is detected. pub(in crate::commands::compile) fn is_commonjs(source: &str) -> bool { + // An empty (or whitespace-only) file is a valid CJS module exporting + // `{}` — marker packages like react's `client-only` ship a 0-byte + // index.js whose default import must resolve to the empty exports + // object, which only the wrap provides. + if source.trim().is_empty() { + return true; + } + // ALL token scans run on comment/string-stripped source. Real packages + // defeat raw-text scans in both directions: Next.js's + // `setup-node-env.external.js` has the word "import " in a header + // comment (which flipped the `require(` arm), and `next/dist/build/ + // utils.js` GENERATES an ESM server.js inside a template literal whose + // column-0 `import path from 'node:path'` line made `has_top_level_esm` + // classify the (thoroughly CJS) file as ESM — its bare `exports` then + // threw a ReferenceError at module init. + let stripped = strip_comments_and_strings(source); // ESM-at-the-top wins: a top-level `import`/`export` makes this an // ES module regardless of CJS patterns appearing deeper in the file. - if has_top_level_esm(source) { + if has_top_level_esm(&stripped) { return false; } - source.contains("module.exports") - || source.contains("exports.") + if stripped.contains("module.exports") + || stripped.contains("exports.") // Issue #4872: tsc-compiled type-only modules (nestjs dist // `*.interface.js`) contain ONLY the interop marker // `Object.defineProperty(exports, "__esModule", { value: true });` // — no `exports.X =`, no `require(`. Without this arm they fall // through to the ESM pipeline, where the bare `exports` identifier // throws a ReferenceError at module init. - || source.contains("defineProperty(exports,") - || (source.contains("require(") && !source.contains("import ")) + || stripped.contains("defineProperty(exports,") + { + return true; + } + stripped.contains("require(") && !stripped.contains("import ") +} + +/// Replace comment bodies and string/template-literal contents with spaces +/// so token scans (`require(`, `import `) only see real code. Same scanner +/// shape as `looks_like_es_module` in perry-parser, including the +/// regex-literal tracking — a regex containing an unescaped quote (e.g. +/// `/['"]/` in vendored minified bundles like comment-json) would otherwise +/// desync the string state and mask the rest of the file, hiding a trailing +/// `module.exports = …`. +pub(crate) fn strip_comments_and_strings(source: &str) -> String { + #[derive(Clone, Copy, PartialEq, Eq)] + enum State { + Code, + Str(u8), + LineComment, + BlockComment, + } + + fn is_ident(b: u8) -> bool { + b == b'_' || b == b'$' || b.is_ascii_alphanumeric() + } + + // A `/` starts a regex literal (not division) when the preceding token + // cannot end an expression. Mirrors perry-parser's heuristic. + fn regex_can_start_here(bytes: &[u8], slash_at: usize) -> bool { + let mut i = slash_at; + while i > 0 { + i -= 1; + match bytes[i] { + b' ' | b'\t' | b'\r' | b'\n' => continue, + b'=' | b'(' | b',' | b':' | b'[' | b'!' | b'&' | b'|' | b'?' | b'{' | b'}' + | b';' | b'+' | b'-' | b'*' | b'%' | b'~' | b'^' | b'<' | b'>' => return true, + c if is_ident(c) => { + let end = i + 1; + let mut start = end; + while start > 0 && is_ident(bytes[start - 1]) { + start -= 1; + } + return matches!( + &bytes[start..end], + b"return" + | b"typeof" + | b"instanceof" + | b"in" + | b"of" + | b"case" + | b"do" + | b"else" + | b"void" + | b"delete" + | b"throw" + | b"new" + | b"yield" + | b"await" + ); + } + _ => return false, + } + } + true + } + + // Returns the index just past the closing `/`, or None if no regex + // terminator is found on this line (then it was division after all). + fn skip_regex_literal(bytes: &[u8], slash_at: usize) -> Option { + let mut i = slash_at + 1; + let mut in_class = false; + while i < bytes.len() { + match bytes[i] { + b'\\' => i += 2, + b'\n' => return None, + b'[' => { + in_class = true; + i += 1; + } + b']' => { + in_class = false; + i += 1; + } + b'/' if !in_class => return Some(i + 1), + _ => i += 1, + } + } + None + } + + let bytes = source.as_bytes(); + let mut out = vec![b' '; bytes.len()]; + let mut state = State::Code; + let mut i = 0; + // Open `${…}` template interpolations: each entry is the `{`-nesting + // depth inside that interpolation. The interpolation body is real code + // (left unmasked) and may itself contain nested template literals — + // next/dist/build/utils.js generates server.js via + // `` `${moduleType ? `import …` : `const …`}` `` and a non-nesting + // scanner ends the outer template at the first INNER backtick, + // unmasking the generated `import` lines. + let mut template_interp_depth: Vec = Vec::new(); + while i < bytes.len() { + match state { + State::Code => { + if bytes[i] == b'\'' || bytes[i] == b'"' || bytes[i] == b'`' { + state = State::Str(bytes[i]); + i += 1; + } else if bytes[i] == b'/' && bytes.get(i + 1) == Some(&b'/') { + state = State::LineComment; + i += 2; + } else if bytes[i] == b'/' && bytes.get(i + 1) == Some(&b'*') { + state = State::BlockComment; + i += 2; + } else if bytes[i] == b'/' && regex_can_start_here(bytes, i) { + // Regex literal: mask its body (it may contain quotes) + // but keep scanning code after it. + match skip_regex_literal(bytes, i) { + Some(end) => i = end, + None => { + out[i] = bytes[i]; + i += 1; + } + } + } else if bytes[i] == b'{' { + if let Some(depth) = template_interp_depth.last_mut() { + *depth += 1; + } + out[i] = bytes[i]; + i += 1; + } else if bytes[i] == b'}' { + match template_interp_depth.last_mut() { + Some(0) => { + // Close of a `${…}` — resume the template literal. + template_interp_depth.pop(); + state = State::Str(b'`'); + i += 1; + } + Some(depth) => { + *depth -= 1; + out[i] = bytes[i]; + i += 1; + } + None => { + out[i] = bytes[i]; + i += 1; + } + } + } else { + out[i] = bytes[i]; + i += 1; + } + } + State::Str(quote) => { + if bytes[i] == b'\\' { + i += 2; + } else if quote == b'`' && bytes[i] == b'$' && bytes.get(i + 1) == Some(&b'{') { + // `${` — interpolation body is code (and may nest). + template_interp_depth.push(0); + state = State::Code; + i += 2; + } else { + if bytes[i] == quote { + state = State::Code; + } + i += 1; + } + } + State::LineComment => { + if bytes[i] == b'\n' { + state = State::Code; + out[i] = b'\n'; + } + i += 1; + } + State::BlockComment => { + if bytes[i] == b'*' && bytes.get(i + 1) == Some(&b'/') { + state = State::Code; + i += 2; + } else { + i += 1; + } + } + } + } + // SAFETY-free: `out` is pure ASCII spaces plus bytes copied verbatim + // from `source` at their original positions, so it remains valid UTF-8 + // except where a multi-byte char was partially masked — use lossy + // conversion to stay safe. + String::from_utf8_lossy(&out).into_owned() } /// Returns true if `source` contains an unindented `import ` / `import{` / diff --git a/crates/perry/src/commands/compile/cjs_wrap/extract_exports.rs b/crates/perry/src/commands/compile/cjs_wrap/extract_exports.rs index 77d9ed8e9c..8b77e578a0 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/extract_exports.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/extract_exports.rs @@ -355,8 +355,15 @@ pub fn extract_exports_from_source(source: &str) -> Vec { }; // Shape 1: `exports.X = ...` / `module.exports.X = ...` + // The boundary class excludes `.` so `e.exports.X = …` (a property write on + // some OTHER object — e.g. a webpack/ncc inner module's own exports param, + // as in next/dist/compiled/p-queue's `e.exports.TimeoutError = TimeoutError`) + // is NOT mistaken for a named export of the outer bundle. A false positive + // here makes the wrap emit `export const X = _cjs.X;` at module scope, + // which shadows the inner binding of the same name during lowering and + // turns every inner reference to it into `undefined`. let dot_re = regex::Regex::new( - r"(?:^|[^A-Za-z0-9_$])(?:module\.)?exports\.([A-Za-z_$][A-Za-z0-9_$]*)\s*=", + r"(?:^|[^A-Za-z0-9_$.])(?:module\.)?exports\.([A-Za-z_$][A-Za-z0-9_$]*)\s*=", ) .unwrap(); for cap in dot_re.captures_iter(source) { diff --git a/crates/perry/src/commands/compile/cjs_wrap/extract_requires.rs b/crates/perry/src/commands/compile/cjs_wrap/extract_requires.rs index 73c7a93540..3bd4f72592 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/extract_requires.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/extract_requires.rs @@ -179,3 +179,156 @@ pub fn identifier_is_reassigned(source: &str, name: &str) -> bool { } false } + +/// Next.js lazy-require classification (single forward pass). Returns the set +/// of specifiers whose EVERY `require('')` call site is lexically inside +/// a FUNCTION body — never at module top level, and never inside a top-level +/// control-flow block that runs at module load. Node loads such a module +/// lazily (only when the enclosing function runs), so Perry must not eager-init +/// it. +/// +/// Conservative by construction: a spec with any top-level call site (including +/// top-level `if`/`for`/`try` blocks, which execute during module evaluation) +/// is excluded and keeps the default eager behavior. A misclassification is +/// self-correcting at runtime — the require shim triggers the target's init +/// when `require()` is actually called — so this only governs eager-init-loop +/// membership. +/// +/// Brace/paren scanning runs on a comment/string/regex-masked copy (same +/// length, code structure preserved) so literal braces never corrupt the scope +/// stack. Call-site offsets + specifiers come from the original source. +pub fn function_local_specs(source: &str) -> std::collections::HashSet { + use std::collections::{HashMap, HashSet}; + + // (offset, spec) for every static `require('')` call, in source order. + let re = regex::Regex::new(r#"require\s*\(\s*['"]([^'"]+)['"]\s*\)"#).unwrap(); + let sbytes = source.as_bytes(); + let mut sites: Vec<(usize, &str)> = Vec::new(); + for cap in re.captures_iter(source) { + let m0 = cap.get(0).unwrap(); + // Skip member-access matches (`foo.require('x')`). + let mut p = m0.start(); + while p > 0 && (sbytes[p - 1] as char).is_whitespace() { + p -= 1; + } + if p > 0 && sbytes[p - 1] == b'.' { + continue; + } + sites.push((m0.start(), cap.get(1).unwrap().as_str())); + } + if sites.is_empty() { + return HashSet::new(); + } + + let masked = super::detect::strip_comments_and_strings(source); + let mbytes = masked.as_bytes(); + let is_ident = |c: u8| c == b'_' || c == b'$' || c.is_ascii_alphanumeric(); + let control_keywords = ["if", "for", "while", "switch", "catch", "with", "else"]; + + #[derive(PartialEq)] + enum Scope { + Function, + Block, + } + let mut scopes: Vec = Vec::new(); + // spec → (seen any site, all sites so far in-function). + let mut state: HashMap<&str, (bool, bool)> = HashMap::new(); + let mut next_site = 0usize; + let in_function = |scopes: &[Scope]| scopes.iter().any(|s| *s == Scope::Function); + + let mut i = 0usize; + while i < mbytes.len() { + // Record any require site at this offset before processing the char. + while next_site < sites.len() && sites[next_site].0 == i { + let (_, spec) = sites[next_site]; + let here = in_function(&scopes); + let e = state.entry(spec).or_insert((false, true)); + e.0 = true; + e.1 = e.1 && here; + next_site += 1; + } + match mbytes[i] { + b'{' => { + let mut p = i; + while p > 0 && (mbytes[p - 1] as char).is_whitespace() { + p -= 1; + } + let kind = if p >= 2 && &masked[p - 2..p] == "=>" { + Scope::Function + } else if p > 0 && mbytes[p - 1] == b')' { + let head = matched_open_head(&masked, mbytes, p - 1, &is_ident); + if control_keywords.iter().any(|k| *k == head) { + Scope::Block + } else { + // `function f(...) {`, method `m(...) {`, arrow + // `(...) => {` (caught above), IIFE `(...)(...) {`… + Scope::Function + } + } else { + Scope::Block + }; + scopes.push(kind); + } + b'}' => { + scopes.pop(); + } + _ => {} + } + i += 1; + } + // Any sites at EOF offset (defensive). + while next_site < sites.len() { + let (_, spec) = sites[next_site]; + let e = state.entry(spec).or_insert((false, true)); + e.0 = true; + e.1 = e.1 && in_function(&scopes); + next_site += 1; + } + + state + .into_iter() + .filter_map(|(spec, (seen, all_in_fn))| { + if seen && all_in_fn { + Some(spec.to_string()) + } else { + None + } + }) + .collect() +} + +/// Given the index of a `)` in the masked source, walk back to its matching +/// `(` and return the identifier/keyword immediately before that `(`. +fn matched_open_head( + masked: &str, + mbytes: &[u8], + close_paren: usize, + is_ident: &impl Fn(u8) -> bool, +) -> String { + let mut depth = 0i32; + let mut i = close_paren; + loop { + match mbytes[i] { + b')' => depth += 1, + b'(' => { + depth -= 1; + if depth == 0 { + let mut p = i; + while p > 0 && (mbytes[p - 1] as char).is_whitespace() { + p -= 1; + } + let end = p; + while p > 0 && is_ident(mbytes[p - 1]) { + p -= 1; + } + return masked[p..end].to_string(); + } + } + _ => {} + } + if i == 0 { + return String::new(); + } + i -= 1; + } +} diff --git a/crates/perry/src/commands/compile/cjs_wrap/hoist_classes.rs b/crates/perry/src/commands/compile/cjs_wrap/hoist_classes.rs index 5378581e66..93aac6563a 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/hoist_classes.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/hoist_classes.rs @@ -248,11 +248,14 @@ pub fn extract_top_level_class_decls(source: &str) -> (String, Vec, Stri continue; }; - // Match optional leading whitespace. - let mut p = line_start; - while p < bytes.len() && (bytes[p] == b' ' || bytes[p] == b'\t') { - p += 1; - } + // Column-0 only: an indented `class` is (almost always) nested inside + // a function — `function mod() {\n const f = ...;\n class Event2 { + // constructor(t) { this[f] = t; } }\n}` (the `ws` package's event + // classes have this shape). Hoisting a nested class out of the IIFE + // severs its closure over the enclosing function's locals, turning + // `f` into a ReferenceError at runtime. The #2310 let/const/var + // guard below can't catch those — it only collects TOP-LEVEL names. + let p = line_start; if p + 6 <= bytes.len() && &bytes[p..p + 6] == b"class " { // Skip past "class ". diff --git a/crates/perry/src/commands/compile/cjs_wrap/mod.rs b/crates/perry/src/commands/compile/cjs_wrap/mod.rs index c7930a9837..c9277873df 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/mod.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/mod.rs @@ -50,7 +50,7 @@ pub(self) use extract_exports::{ }; pub(self) use extract_requires::{ extract_export_star_specs, extract_require_aliases_with_ranges, extract_require_specifiers, - identifier_is_reassigned, + function_local_specs, identifier_is_reassigned, }; pub(self) use hoist_classes::{ extract_top_level_class_decls, rewrite_module_exports_class_expression, @@ -96,6 +96,76 @@ mod tests { assert!(!is_commonjs("import x from 'foo'; export const y = 1;")); } + #[test] + fn require_only_file_with_import_word_in_comment_is_cjs() { + // Next.js `setup-node-env.external.js`: pure side-effect requires, + // but the header comment contains the word "import". The comment + // must not flip classification to ESM. + let src = r#"// This is a minimal import that initializes the node environment +"use strict"; +if (process.env.NEXT_RUNTIME !== 'edge') { + require('next/dist/server/node-environment'); +} +"#; + assert!( + is_commonjs(src), + "comment text must not defeat require( arm" + ); + } + + #[test] + fn template_literal_esm_codegen_is_still_cjs() { + // next/dist/build/utils.js writes an ESM server.js via a template + // literal whose column-0 `import path from 'node:path'` line must + // not flip this CJS file to the ESM pipeline. + let src = "\"use strict\";\nObject.defineProperty(exports, \"__esModule\", { value: true });\nexports.write = function() {\n return `performance.mark('next-start');\nimport path from 'node:path'\nimport module from 'node:module'\n`;\n};\n"; + assert!( + is_commonjs(src), + "template-literal import must not defeat CJS detection" + ); + } + + #[test] + fn nested_template_interpolation_stays_masked() { + // next/dist/build/utils.js shape: an outer template whose `${…}` + // interpolation contains NESTED templates with column-0 `import` + // lines. The whole construct must stay masked as string content. + let src = "\"use strict\";\nexports.write = (m) => {\n return `${m ? `x\nimport path from 'node:path'\n` : `const path = require('path')`}\nrest`;\n};\n"; + assert!( + is_commonjs(src), + "nested template import lines must not defeat CJS detection" + ); + } + + #[test] + fn regex_with_quote_does_not_mask_trailing_module_exports() { + // comment-json's bundle shape: regex literals containing quotes + // followed by the real `module.exports=` tail. The stripper must + // track regex literals or the tail is masked as string content. + let src = "const e = s.split(/['\"]/);\nvar i = make();\nmodule.exports = i;\n"; + assert!( + is_commonjs(src), + "regex with quote must not hide module.exports" + ); + } + + #[test] + fn require_in_string_only_is_not_cjs() { + // `require(` appearing only inside a string literal is not evidence + // of CommonJS. + let src = "const msg = \"call require('x') yourself\";\nconsole.log(msg);\n"; + assert!(!is_commonjs(src)); + } + + #[test] + fn empty_file_is_cjs() { + // Marker packages (react's `client-only`) ship a 0-byte index.js; + // its default import must resolve to the wrap's empty exports + // object, so empty/whitespace-only sources count as CommonJS. + assert!(is_commonjs("")); + assert!(is_commonjs(" \n\t\n")); + } + #[test] fn issue_851_rollup_hybrid_esm_with_inner_cjs_is_esm() { // Rollup-bundled output (vitest's `dist/chunks/*.js` shape): @@ -1195,6 +1265,34 @@ module.exports = SafeBuffer;"#; assert!(names.contains(&"version".to_string())); } + #[test] + fn extract_exports_skips_inner_module_exports_param() { + // next/dist/compiled/p-queue: webpack/ncc inner modules write to their + // OWN exports object (`e.exports.X = …`), which is not a named export + // of the outer bundle. Pre-fix the dot-boundary regex matched it, the + // wrap emitted `export const TimeoutError = _cjs.TimeoutError;` at + // module scope, and that const shadowed the inner class binding — + // every inner reference to `TimeoutError` became undefined. + let src = "var mods = { 816: (e, t, n) => {\n\ + class TimeoutError extends Error {}\n\ + const pTimeout = (p) => p;\n\ + e.exports = pTimeout;\n\ + e.exports.str = 'hello';\n\ + e.exports.TimeoutError = TimeoutError;\n\ + }};\n\ + exports.real = 1;\n\ + module.exports.alsoReal = 2;\n"; + let names = extract_exports_from_source(src); + assert!( + !names.contains(&"TimeoutError".to_string()), + "`e.exports.X` is an inner module's exports, not ours: {:?}", + names + ); + assert!(!names.contains(&"str".to_string()), "got: {:?}", names); + assert!(names.contains(&"real".to_string())); + assert!(names.contains(&"alsoReal".to_string())); + } + #[test] fn wrap_pino_shape_parses_cleanly() { // Issue #845 — pino sub-bug: end-to-end check that a pino-shaped diff --git a/crates/perry/src/commands/compile/cjs_wrap/wrap.rs b/crates/perry/src/commands/compile/cjs_wrap/wrap.rs index 31d1c470a7..10b1c47fb2 100644 --- a/crates/perry/src/commands/compile/cjs_wrap/wrap.rs +++ b/crates/perry/src/commands/compile/cjs_wrap/wrap.rs @@ -118,6 +118,16 @@ pub(in crate::commands::compile) fn wrap_commonjs_for_target( } true }; + // Next.js lazy-require: specifiers whose every `require('S')` call site is + // inside a function body (lazy in Node). Computed up front because it also + // suppresses alias ADOPTION below — a function-local `const dep = + // require('S')` is a function-scoped const, not a module binding, and + // adopting it would hoist `import dep from 'S'` to module scope (eager). We + // instead keep the synthetic binding and rename it `_lazyreq_N` so the + // target stays `Deferred` and inits only when the shim's + // `return _lazyreq_N` runs (i.e. when the function actually calls require). + let lazy_specs = function_local_specs(source); + let mut import_local_names: Vec = require_specs .iter() .enumerate() @@ -129,6 +139,10 @@ pub(in crate::commands::compile) fn wrap_commonjs_for_target( if !alias_is_safe(alias) { continue; } + if lazy_specs.contains(spec) { + // Don't adopt a function-local alias — keep it lazy (see above). + continue; + } if import_local_names.iter().any(|n| n == alias) { continue; } @@ -142,6 +156,17 @@ pub(in crate::commands::compile) fn wrap_commonjs_for_target( chosen_alias_per_spec.insert(spec.clone()); } + // Rename the surviving synthetic bindings for function-local specs so + // `collect_modules` can tag the import `is_deferred_require` by name and + // codegen can fire `__init()` at the shim read site. + if !lazy_specs.is_empty() { + for (i, spec) in require_specs.iter().enumerate() { + if import_local_names[i] == format!("_req_{i}") && lazy_specs.contains(spec) { + import_local_names[i] = format!("_lazyreq_{i}"); + } + } + } + // #1721: ranges of `const = require()` lines whose alias we // ADOPTED as the import local name above (`import_local_names[idx] == alias`). // The synthetic `require` returns that name, and the hoisted `import ` @@ -182,12 +207,90 @@ pub(in crate::commands::compile) fn wrap_commonjs_for_target( .collect::>() .join("\n"); + // An UNRESOLVABLE adopted specifier (`require('@opentelemetry/api')` + // with only Next's vendored copy on disk) leaves its hoisted import + // binding as the boolean TRUE sentinel at runtime. Returning that from + // the shim defeats the ubiquitous try/require-fallback pattern — Node + // throws MODULE_NOT_FOUND and the catch loads the vendored copy, but + // the shim handed back `true` and the catch never ran. Guard such an + // entry with a throw — but ONLY when a call site of that specifier + // sits inside a `try` block: a BARE top-level require of a pruned + // build-only module (`require('next/dist/compiled/browserslist')` in + // get-supported-browsers.js) must keep the silent sentinel, because + // Perry initializes every collected module eagerly while Node never + // loads that file at all — a throw there kills startup. (A real module + // default-exporting a boolean would mis-trip the guard; no such + // package shape has been observed.) let require_cases = require_specs .iter() .zip(import_local_names.iter()) - .map(|(spec, local)| format!(" if (specifier === '{}') return {};", spec, local)) + .map(|(spec, local)| { + if require_site_in_try(source, spec) { + format!( + " if (specifier === '{spec}') {{ if (typeof {local} === 'boolean') \ + throw __perry_cjs_require_error('error', 'MODULE_NOT_FOUND', \ + \"Cannot find module '{spec}'\"); return {local}; }}" + ) + } else { + format!(" if (specifier === '{}') return {};", spec, local) + } + }) .collect::>() .join("\n"); + // Heuristic: is any `require('')` call site lexically inside a + // `try { … }` block? Reverse brace-depth scan from the call offset to + // the nearest unmatched `{`, checking whether `try` precedes it. + // String/comment contexts are not stripped — a false positive only + // turns the silent sentinel into a (more Node-faithful) throw. + fn require_site_in_try(source: &str, spec: &str) -> bool { + let needle_sq = format!("require('{}')", spec); + let needle_dq = format!("require(\"{}\")", spec); + let bytes = source.as_bytes(); + let mut search = 0usize; + loop { + let hit = source[search..] + .find(&needle_sq) + .or_else(|| source[search..].find(&needle_dq)); + let Some(rel) = hit else { return false }; + let at = search + rel; + // Walk backwards to the nearest unmatched `{`, repeatedly: each + // enclosing block is checked for a preceding `try`. + let mut depth = 0i32; + let mut i = at; + while i > 0 { + i -= 1; + match bytes[i] { + b'}' => depth += 1, + b'{' => { + if depth > 0 { + depth -= 1; + } else { + // Enclosing block opener — does `try` precede it? + let mut j = i; + while j > 0 + && (bytes[j - 1] == b' ' + || bytes[j - 1] == b'\t' + || bytes[j - 1] == b'\r' + || bytes[j - 1] == b'\n') + { + j -= 1; + } + if j >= 3 + && &bytes[j - 3..j] == b"try" + && (j == 3 || !bytes[j - 4].is_ascii_alphanumeric()) + { + return true; + } + // Keep walking outward (this block wasn't a try). + } + } + _ => {} + } + } + search = at + 1; + } + } + let require_resolve_cases = require_specs .iter() .map(|spec| format!(" if (specifier === '{}') return '{}';", spec, spec)) diff --git a/crates/perry/src/commands/compile/collect_modules.rs b/crates/perry/src/commands/compile/collect_modules.rs index 710c9d9e2c..ef3c440f08 100644 --- a/crates/perry/src/commands/compile/collect_modules.rs +++ b/crates/perry/src/commands/compile/collect_modules.rs @@ -33,6 +33,7 @@ use super::{ mod create_require_transform; mod crypto_ns; mod dynamic_glob; +mod native_addon; mod parse_error; #[cfg(test)] mod tests; @@ -40,6 +41,7 @@ mod tests; use create_require_transform::transform_create_require_literal_requires; use crypto_ns::module_uses_global_crypto_namespace; use dynamic_glob::expand_dynamic_import_glob; +use native_addon::refuse_compile_package_native_addon; use parse_error::annotate_parse_error; const MAX_CROSS_MODULE_INLINE_PRIOR_MODULES: usize = 128; @@ -214,150 +216,6 @@ pub(super) fn known_node_submodule_key(source: &str) -> Option<&'static str> { } } -fn nearest_package_root(path: &std::path::Path) -> Option { - let mut dir = path.parent(); - while let Some(candidate) = dir { - if candidate.join("package.json").exists() { - return Some(candidate.to_path_buf()); - } - dir = candidate.parent(); - } - None -} - -fn package_root_for_compile_package( - ctx: &CompilationContext, - path: &std::path::Path, -) -> Option { - ctx.compile_package_dirs - .values() - .filter(|dir| path.starts_with(dir)) - .max_by_key(|dir| dir.components().count()) - .cloned() - .or_else(|| nearest_package_root(path)) -} - -fn package_name_from_package_json(package_root: &std::path::Path) -> Option { - let package_json = fs::read_to_string(package_root.join("package.json")).ok()?; - let parsed = serde_json::from_str::(&package_json).ok()?; - parsed - .get("name") - .and_then(|name| name.as_str()) - .map(str::to_string) -} - -fn find_node_addon_file(dir: &std::path::Path, max_depth: usize) -> Option { - if max_depth == 0 { - return None; - } - let Ok(entries) = fs::read_dir(dir) else { - return None; - }; - for entry in entries.flatten() { - let path = entry.path(); - let file_name = entry.file_name(); - let file_name = file_name.to_string_lossy(); - if file_name == "node_modules" || file_name == ".git" { - continue; - } - if path.is_file() && path.extension().and_then(|ext| ext.to_str()) == Some("node") { - return Some(path); - } - if path.is_dir() { - if let Some(found) = find_node_addon_file(&path, max_depth - 1) { - return Some(found); - } - } - } - None -} - -fn node_addon_marker(package_root: &std::path::Path) -> Option<(&'static str, String)> { - let binding_gyp = package_root.join("binding.gyp"); - if binding_gyp.exists() { - return Some(("binding.gyp", binding_gyp.display().to_string())); - } - let prebuilds = package_root.join("prebuilds"); - if prebuilds.is_dir() { - return Some(("prebuilds/", prebuilds.display().to_string())); - } - let package_json_path = package_root.join("package.json"); - if let Ok(package_json) = fs::read_to_string(&package_json_path) { - if let Ok(parsed) = serde_json::from_str::(&package_json) { - if parsed - .get("gypfile") - .and_then(|value| value.as_bool()) - .unwrap_or(false) - { - return Some(( - "package.json gypfile", - package_json_path.display().to_string(), - )); - } - if package_json_dependency_uses_native_addon_loader(&parsed, "node-gyp-build") - || package_json_dependency_uses_native_addon_loader(&parsed, "bindings") - { - return Some(( - "native addon loader dependency", - package_json_path.display().to_string(), - )); - } - } - } - if let Some(node_file) = find_node_addon_file(package_root, 5) { - return Some(("*.node", node_file.display().to_string())); - } - None -} - -fn package_json_dependency_uses_native_addon_loader( - package_json: &serde_json::Value, - loader_name: &str, -) -> bool { - ["dependencies", "optionalDependencies"] - .iter() - .any(|section| { - package_json - .get(section) - .and_then(|deps| deps.as_object()) - .is_some_and(|deps| deps.contains_key(loader_name)) - }) -} - -fn refuse_compile_package_native_addon( - ctx: &mut CompilationContext, - canonical: &std::path::Path, -) -> Result<()> { - let Some(package_root) = package_root_for_compile_package(ctx, canonical) else { - return Ok(()); - }; - if !ctx - .checked_compile_package_native_addon_roots - .insert(package_root.clone()) - { - return Ok(()); - } - if has_perry_native_library(&package_root) { - return Ok(()); - } - let Some((marker, marker_path)) = node_addon_marker(&package_root) else { - return Ok(()); - }; - let package_name = package_name_from_package_json(&package_root) - .unwrap_or_else(|| package_root.display().to_string()); - anyhow::bail!( - "package `{}` is in `perry.compilePackages` but uses a Node native addon ({}) at {}.\n\ - Perry cannot load Node `.node` / N-API addons inside a native Perry binary. \ - Remove `{}` from `perry.compilePackages`, choose a pure JS/TS package, \ - or replace the native boundary with a Perry native binding \ - (`perry.nativeLibrary` / perry-ffi).", - package_name, - marker, - marker_path, - package_name - ); -} - /// Collect all modules to compile (transitive closure of imports) pub(super) fn collect_modules( entry_path: &PathBuf, @@ -1012,10 +870,20 @@ fn collect_module_one( worker_path_sets.push(set); } perry_hir::Resolution::Unresolved(reason) => { - dyn_errors.push(format!( - "worker_threads Worker in module {}: {}", - module_name, reason - )); + // Real-world packages (e.g. Next.js build-time worker + // pools) construct Workers on paths that are never hit + // when the compiled program runs. Warn and let codegen + // lower this WorkerNew to a runtime throw instead of + // failing the whole compile. Push an empty set to keep + // the fill pass aligned with resolved siblings. + if matches!(format, OutputFormat::Text) { + eprintln!( + " Warning: worker_threads Worker in module {}: {} — \ + this Worker will throw if constructed at runtime", + module_name, reason + ); + } + worker_path_sets.push(Vec::new()); } } } @@ -1081,6 +949,7 @@ fn collect_module_one( type_only: false, is_dynamic: true, is_dynamic_target: false, + is_deferred_require: false, }); } @@ -1541,6 +1410,38 @@ fn collect_module_one( } } + // Next.js lazy-require: the CJS→ESM wrap names a binding `_lazyreq_N` when + // every `require('S')` call site is inside a function body (lazy in Node). + // Tag the import so `classify_eager_modules` leaves the target Deferred — + // matching Node, which only loads such a module when the enclosing function + // runs (e.g. jsonwebtoken, required only inside Next.js's request handlers). + // The require shim triggers the target's `__init` on first `require()`, so + // an over-eager classification is self-correcting at runtime. Limited to + // Perry-compiled (`NativeCompiled`) targets — native stdlib / V8 modules + // have their own init paths. + if was_cjs_wrapped { + for import in &mut hir_module.imports { + if import.type_only + || import.is_dynamic + || import.is_native + || import.module_kind != perry_hir::ModuleKind::NativeCompiled + { + continue; + } + let is_lazy = import.specifiers.iter().any(|s| { + let local = match s { + perry_hir::ImportSpecifier::Default { local } => local, + perry_hir::ImportSpecifier::Namespace { local } => local, + perry_hir::ImportSpecifier::Named { local, .. } => local, + }; + local.starts_with("_lazyreq_") + }); + if is_lazy { + import.is_deferred_require = true; + } + } + } + // Process re-exports for export in &hir_module.exports { let source = match export { diff --git a/crates/perry/src/commands/compile/collect_modules/native_addon.rs b/crates/perry/src/commands/compile/collect_modules/native_addon.rs new file mode 100644 index 0000000000..d5de61317c --- /dev/null +++ b/crates/perry/src/commands/compile/collect_modules/native_addon.rs @@ -0,0 +1,162 @@ +//! Compile-package Node native-addon detection. +//! +//! Extracted from `collect_modules.rs` (file-size cap). A package listed in +//! `perry.compilePackages` must be pure JS/TS — Perry cannot load Node +//! `.node` / N-API addons inside a native binary. These helpers locate the +//! package root for a resolved file and probe it for native-addon markers +//! (`binding.gyp`, `prebuilds/`, `gypfile`, `node-gyp-build`/`bindings` +//! loader deps, or a stray `*.node`), so `refuse_compile_package_native_addon` +//! can fail the compile with an actionable message instead of silently +//! emitting a broken binary. + +use anyhow::Result; +use std::fs; +use std::path::PathBuf; + +// Parent (`collect_modules`) private imports are visible to this child module. +use super::has_perry_native_library; +use super::CompilationContext; + +fn nearest_package_root(path: &std::path::Path) -> Option { + let mut dir = path.parent(); + while let Some(candidate) = dir { + if candidate.join("package.json").exists() { + return Some(candidate.to_path_buf()); + } + dir = candidate.parent(); + } + None +} + +fn package_root_for_compile_package( + ctx: &CompilationContext, + path: &std::path::Path, +) -> Option { + ctx.compile_package_dirs + .values() + .filter(|dir| path.starts_with(dir)) + .max_by_key(|dir| dir.components().count()) + .cloned() + .or_else(|| nearest_package_root(path)) +} + +fn package_name_from_package_json(package_root: &std::path::Path) -> Option { + let package_json = fs::read_to_string(package_root.join("package.json")).ok()?; + let parsed = serde_json::from_str::(&package_json).ok()?; + parsed + .get("name") + .and_then(|name| name.as_str()) + .map(str::to_string) +} + +fn find_node_addon_file(dir: &std::path::Path, max_depth: usize) -> Option { + if max_depth == 0 { + return None; + } + let Ok(entries) = fs::read_dir(dir) else { + return None; + }; + for entry in entries.flatten() { + let path = entry.path(); + let file_name = entry.file_name(); + let file_name = file_name.to_string_lossy(); + if file_name == "node_modules" || file_name == ".git" { + continue; + } + if path.is_file() && path.extension().and_then(|ext| ext.to_str()) == Some("node") { + return Some(path); + } + if path.is_dir() { + if let Some(found) = find_node_addon_file(&path, max_depth - 1) { + return Some(found); + } + } + } + None +} + +fn node_addon_marker(package_root: &std::path::Path) -> Option<(&'static str, String)> { + let binding_gyp = package_root.join("binding.gyp"); + if binding_gyp.exists() { + return Some(("binding.gyp", binding_gyp.display().to_string())); + } + let prebuilds = package_root.join("prebuilds"); + if prebuilds.is_dir() { + return Some(("prebuilds/", prebuilds.display().to_string())); + } + let package_json_path = package_root.join("package.json"); + if let Ok(package_json) = fs::read_to_string(&package_json_path) { + if let Ok(parsed) = serde_json::from_str::(&package_json) { + if parsed + .get("gypfile") + .and_then(|value| value.as_bool()) + .unwrap_or(false) + { + return Some(( + "package.json gypfile", + package_json_path.display().to_string(), + )); + } + if package_json_dependency_uses_native_addon_loader(&parsed, "node-gyp-build") + || package_json_dependency_uses_native_addon_loader(&parsed, "bindings") + { + return Some(( + "native addon loader dependency", + package_json_path.display().to_string(), + )); + } + } + } + if let Some(node_file) = find_node_addon_file(package_root, 5) { + return Some(("*.node", node_file.display().to_string())); + } + None +} + +fn package_json_dependency_uses_native_addon_loader( + package_json: &serde_json::Value, + loader_name: &str, +) -> bool { + ["dependencies", "optionalDependencies"] + .iter() + .any(|section| { + package_json + .get(section) + .and_then(|deps| deps.as_object()) + .is_some_and(|deps| deps.contains_key(loader_name)) + }) +} + +pub(super) fn refuse_compile_package_native_addon( + ctx: &mut CompilationContext, + canonical: &std::path::Path, +) -> Result<()> { + let Some(package_root) = package_root_for_compile_package(ctx, canonical) else { + return Ok(()); + }; + if !ctx + .checked_compile_package_native_addon_roots + .insert(package_root.clone()) + { + return Ok(()); + } + if has_perry_native_library(&package_root) { + return Ok(()); + } + let Some((marker, marker_path)) = node_addon_marker(&package_root) else { + return Ok(()); + }; + let package_name = package_name_from_package_json(&package_root) + .unwrap_or_else(|| package_root.display().to_string()); + anyhow::bail!( + "package `{}` is in `perry.compilePackages` but uses a Node native addon ({}) at {}.\n\ + Perry cannot load Node `.node` / N-API addons inside a native Perry binary. \ + Remove `{}` from `perry.compilePackages`, choose a pure JS/TS package, \ + or replace the native boundary with a Perry native binding \ + (`perry.nativeLibrary` / perry-ffi).", + package_name, + marker, + marker_path, + package_name + ); +} diff --git a/crates/perry/src/commands/compile/init_order.rs b/crates/perry/src/commands/compile/init_order.rs index ad37c46184..106544e94a 100644 --- a/crates/perry/src/commands/compile/init_order.rs +++ b/crates/perry/src/commands/compile/init_order.rs @@ -48,7 +48,7 @@ pub(super) fn classify_eager_modules(ctx: &mut CompilationContext, entry_path: & let static_targets: Vec = module .imports .iter() - .filter(|i| !i.is_dynamic && !i.type_only) + .filter(|i| !i.is_dynamic && !i.type_only && !i.is_deferred_require) .filter_map(|i| i.resolved_path.as_ref().map(PathBuf::from)) .collect(); let reexport_sources: Vec = module @@ -136,7 +136,10 @@ pub(super) fn topo_sort_non_entry_modules( // (transitively reached via the same phony edge chain), // so tracer's top-level `Context.Reference()(...)` ran // against an uninitialized Context global and threw. - if import.type_only { + // `is_deferred_require`: a function-local `require('S')` is not an + // init-order edge — S inits lazily when the require shim runs, not + // as part of this module's eager init. + if import.type_only || import.is_deferred_require { continue; } if let Some(ref resolved) = import.resolved_path { diff --git a/crates/perry/src/commands/compile/resolve.rs b/crates/perry/src/commands/compile/resolve.rs index 2cd742f014..6cb9c04f0e 100644 --- a/crates/perry/src/commands/compile/resolve.rs +++ b/crates/perry/src/commands/compile/resolve.rs @@ -408,28 +408,57 @@ pub(super) fn resolve_with_extensions(base: &Path) -> Option { return Some(base.to_path_buf()); } - // Try with extensions in order of preference (TS before JS) + // Try with extensions in order of preference (TS before JS). + // + // Node module resolution APPENDS the extension to the full specifier + // (`./stream-ops.web` -> `./stream-ops.web.js`); it never strips a dotted + // segment that isn't a real module extension. `Path::with_extension` + // REPLACES the last `.foo` segment, so on `stream-ops.web` it produces + // `stream-ops.js` — which, in Next.js's app-render dir, is the *requiring* + // module itself (`stream-ops.js` requires `./stream-ops.web`). Returning it + // makes the module self-require and its re-export getters recurse forever + // (`exports.chainStreams` -> `self.chainStreams` -> ... stack overflow). + // + // So: always try the APPEND form first. Only fall back to the REPLACE form + // when the specifier already ends in a recognized module extension — that + // path exists purely for Perry's TS-over-JS preference (`./foo.js` whose + // `.js` was pruned but `./foo.ts` is present), never to swap an arbitrary + // filename segment like `.web`. + let base_ext_is_module = base + .extension() + .and_then(|e| e.to_str()) + .map(|e| { + matches!( + e, + "js" | "mjs" | "cjs" | "ts" | "tsx" | "mts" | "cts" | "json" | "node" + ) + }) + .unwrap_or(false); + let path_str = base.to_string_lossy().to_string(); for ext in all_extensions { - let with_ext = base.with_extension(ext.trim_start_matches('.')); - if with_ext.exists() && with_ext.is_file() { - return Some(with_ext); - } - - // Also try adding extension to full path (for paths like ./foo.js) - let path_str = base.to_string_lossy(); - let with_ext = PathBuf::from(format!("{}{}", path_str, ext)); - if with_ext.exists() && with_ext.is_file() { - // If we found a JS file, check for TS equivalent first + // APPEND: `./stream-ops.web` + `.js` -> `./stream-ops.web.js`. + let appended = PathBuf::from(format!("{}{}", path_str, ext)); + if appended.exists() && appended.is_file() { + // If we landed on a JS file, prefer a co-located TS source. if matches!(ext, ".js" | ".mjs" | ".cjs") { - let stem_str = path_str.to_string(); for ts_ext in ts_extensions { - let ts_path = PathBuf::from(format!("{}{}", stem_str, ts_ext)); + let ts_path = PathBuf::from(format!("{}{}", path_str, ts_ext)); if ts_path.exists() && ts_path.is_file() { return Some(ts_path); } } } - return Some(with_ext); + return Some(appended); + } + + // REPLACE: only safe when the specifier already carries a real module + // extension (e.g. `./foo.js` -> `./foo.ts`). Skipped for `.web`-style + // dotted filenames so we never resolve to a sibling module. + if base_ext_is_module { + let replaced = base.with_extension(ext.trim_start_matches('.')); + if replaced.exists() && replaced.is_file() { + return Some(replaced); + } } } @@ -465,7 +494,13 @@ pub(super) fn resolve_package_entry(package_dir: &Path, subpath: Option<&str>) - }; if let Some(exports) = pkg.get("exports") { - if let Some(entry) = resolve_exports(exports, &export_key) { + // Try every condition branch in priority order and take the first + // target that exists on disk. A single-winner pick breaks under + // Next.js standalone output: its file tracing prunes the package + // files the build didn't load, so `@swc/helpers`' `import` target + // (`esm/*.js`) is absent while the `default` target (`cjs/*.cjs`) + // is present — Node resolves the latter at require time. + for entry in resolve_exports_candidates(exports, &export_key) { let entry_path = package_dir.join(&entry); if entry_path.exists() { return Some(entry_path); @@ -656,6 +691,60 @@ fn resolve_subpath_import(import_source: &str, importer_path: &Path) -> Option

Vec { + const CONDITIONS: &[&str] = &["perry", "import", "module", "default", "require", "node"]; + fn collect(value: &serde_json::Value, subpath: &str, out: &mut Vec) { + match value { + serde_json::Value::String(s) => { + if !out.contains(s) { + out.push(s.clone()); + } + } + serde_json::Value::Object(map) => { + if let Some(entry) = map.get(subpath) { + collect(entry, subpath, out); + return; + } + for (key, entry) in map.iter() { + if key.contains('*') { + let parts: Vec<&str> = key.splitn(2, '*').collect(); + if parts.len() == 2 { + let (prefix, suffix) = (parts[0], parts[1]); + if subpath.starts_with(prefix) && subpath.ends_with(suffix) { + let matched = &subpath[prefix.len()..subpath.len() - suffix.len()]; + let mut templates = Vec::new(); + collect(entry, subpath, &mut templates); + for template in templates { + let resolved = template.replace('*', matched); + if !out.contains(&resolved) { + out.push(resolved); + } + } + } + } + } + } + for condition in CONDITIONS { + if let Some(entry) = map.get(*condition) { + collect(entry, subpath, out); + } + } + } + _ => {} + } + } + let mut out = Vec::new(); + collect(exports, subpath, &mut out); + out +} + fn canonical_existing_declaration(path: PathBuf) -> Option { if path.exists() && is_declaration_file(&path) { Some(path.canonicalize().unwrap_or(path)) diff --git a/crates/perry/src/commands/compile/resolve/tests.rs b/crates/perry/src/commands/compile/resolve/tests.rs index 49dbc2e32d..1bc0faa426 100644 --- a/crates/perry/src/commands/compile/resolve/tests.rs +++ b/crates/perry/src/commands/compile/resolve/tests.rs @@ -1327,6 +1327,40 @@ mod manifest_parse_tests { assert!(msg.contains("backends.vulkan.available"), "got: {msg}"); assert!(msg.contains("expected boolean"), "got: {msg}"); } + + #[test] + fn dotted_specifier_appends_not_replaces_extension() { + // Next.js app-render dir: `stream-ops.js` and `stream-ops.web.js` + // coexist, and `stream-ops.js` does `require("./stream-ops.web")`. + // `Path::with_extension("js")` REPLACES `.web` → `stream-ops.js` (the + // requiring file itself); resolving to it makes the module self-require + // and its re-export getters recurse forever. The resolver must APPEND: + // `./stream-ops.web` → `./stream-ops.web.js`. + let dir = tempfile::tempdir().expect("tempdir"); + let root = dir.path(); + std::fs::write(root.join("stream-ops.js"), "// requiring module\n").expect("write"); + std::fs::write(root.join("stream-ops.web.js"), "// the real target\n").expect("write"); + + let resolved = resolve_with_extensions(&root.join("stream-ops.web")).expect("must resolve"); + assert_eq!( + resolved, + root.join("stream-ops.web.js"), + "must append `.js` to the full specifier, not strip `.web`" + ); + } + + #[test] + fn pruned_js_specifier_still_falls_back_to_ts_via_replace() { + // The REPLACE path is retained for Perry's TS-over-JS preference: a + // `require("./foo.js")` whose `.js` was pruned but whose `./foo.ts` + // source is present must still resolve to the TS file. + let dir = tempfile::tempdir().expect("tempdir"); + let root = dir.path(); + std::fs::write(root.join("foo.ts"), "export const x = 1;\n").expect("write"); + + let resolved = resolve_with_extensions(&root.join("foo.js")).expect("must resolve"); + assert_eq!(resolved, root.join("foo.ts")); + } } #[cfg(test)] @@ -1695,3 +1729,42 @@ mod subpath_imports_tests { ); } } + +#[cfg(test)] +mod exports_candidates_tests { + use crate::commands::compile::resolve::resolve_exports_candidates; + + #[test] + fn pruned_import_target_falls_back_to_default() { + // @swc/helpers shape under Next.js standalone output: file tracing + // prunes esm/, so the `import` condition target is absent on disk and + // the resolver must surface `default` (cjs) as a later candidate. + let exports: serde_json::Value = serde_json::json!({ + ".": { "import": "./esm/index.js", "default": "./cjs/index.cjs" }, + "./_/_interop_require_default": { + "import": "./esm/_interop_require_default.js", + "default": "./cjs/_interop_require_default.cjs" + } + }); + let candidates = resolve_exports_candidates(&exports, "./_/_interop_require_default"); + assert_eq!( + candidates, + vec![ + "./esm/_interop_require_default.js".to_string(), + "./cjs/_interop_require_default.cjs".to_string(), + ] + ); + } + + #[test] + fn wildcard_candidates_expand_star() { + let exports: serde_json::Value = serde_json::json!({ + "./cjs/*": { "import": "./esm/*.js", "default": "./cjs/*.cjs" } + }); + let candidates = resolve_exports_candidates(&exports, "./cjs/foo"); + assert_eq!( + candidates, + vec!["./esm/foo.js".to_string(), "./cjs/foo.cjs".to_string()] + ); + } +}