diff --git a/crates/perry-hir/src/lower/context.rs b/crates/perry-hir/src/lower/context.rs index 97585c7ae..6b840eefd 100644 --- a/crates/perry-hir/src/lower/context.rs +++ b/crates/perry-hir/src/lower/context.rs @@ -145,6 +145,7 @@ impl LoweringContext { mixin_funcs: HashMap::new(), anon_shape_classes: HashMap::new(), forward_class_names: std::collections::HashSet::new(), + module_class_decl_names: std::collections::HashSet::new(), next_anon_shape_id: 0, class_method_return_types: Vec::new(), class_captures: Vec::new(), diff --git a/crates/perry-hir/src/lower/expr_call/regex_string.rs b/crates/perry-hir/src/lower/expr_call/regex_string.rs index 0d37c09ca..0c4d00933 100644 --- a/crates/perry-hir/src/lower/expr_call/regex_string.rs +++ b/crates/perry-hir/src/lower/expr_call/regex_string.rs @@ -25,16 +25,32 @@ pub(super) fn try_regex_string_methods( if let ast::MemberProp::Ident(method_ident) = &member.prop { let m = method_ident.sym.as_ref(); if (m == "test" || m == "exec") && args.len() == 1 { - // Check if the object is a regex literal or a local assigned to a regex + // Check if the object is a regex literal or a local assigned to a regex. + // + // CRITICAL (#A — semver/minimatch source-compile): do NOT + // treat `x.test(arg)` as a RegExp test merely because `x` is + // an `Any`/`Unknown`/untyped local. `.test()` is also a + // common INSTANCE method name (semver's `Comparator.test` / + // `Range.test`, etc.). An imported class instance is typed + // `Any` at the call site, so the old `Any | Unknown | + // unwrap_or(true)` heuristic mis-lowered `comparator.test(v)` + // into `js_regexp_test(comparator-coerced-to-string, ...)`, + // silently returning a bogus boolean and never running the + // real method body — breaking `semver.satisfies` / + // `minimatch(...)`. The runtime already routes a genuine + // RegExp receiver's `.test()`/`.exec()` through the dynamic + // method-dispatch path (`dispatch_regex_receiver_method` in + // `js_native_call_method`, #1731), so falling through to a + // normal method call is correct for BOTH a regex value + // (runtime regex dispatch) and a class instance (instance + // method). Only take the codegen fast path when we have + // positive evidence the receiver is a regex. let is_regex_obj = match member.obj.as_ref() { ast::Expr::Lit(ast::Lit::Regex(_)) => true, ast::Expr::Ident(ident) => ctx .lookup_local_type(ident.sym.as_ref()) - .map(|ty| { - matches!(ty, Type::Any | Type::Unknown) - || matches!(ty, Type::Named(n) if n == "RegExp") - }) - .unwrap_or(true), + .map(|ty| matches!(ty, Type::Named(n) if n == "RegExp")) + .unwrap_or(false), _ => false, }; if is_regex_obj { @@ -70,17 +86,31 @@ pub(super) fn try_regex_string_methods( && args.len() == 1 { let is_match_all = method_ident.sym.as_ref() == "matchAll"; - // Check if the argument is a regex literal or a local holding a regex + // Check if the argument is a regex literal or a local holding a regex. + // + // CRITICAL (#A — minimatch source-compile): do NOT assume + // an `Any`/`Unknown`/untyped ARG is a regex (which would + // treat the RECEIVER as a string and lower to + // `Expr::StringMatch`). `.match()` is also a common + // INSTANCE method name — minimatch's `Minimatch.match` is + // invoked as `new Minimatch(pat).match(p)` inside the + // top-level `minimatch(p, pat)` arrow, where `p` is an + // untyped param. The old `Any | Unknown | None => true` + // heuristic mis-lowered that into + // `StringMatch(new Minimatch(pat), p)`, so `minimatch(...)` + // returned `null` instead of the boolean match result. + // The runtime already routes a genuine string receiver's + // `.match(regex)` through dynamic dispatch (#519/#510), so + // falling through to a normal method call is correct for + // BOTH a string and a class instance. Only take the codegen + // fast path with positive evidence the arg is a regex. let arg_is_regex = match call.args.first().map(|a| a.expr.as_ref()) { Some(ast::Expr::Lit(ast::Lit::Regex(_))) => true, Some(ast::Expr::Ident(ident)) => { - match ctx.lookup_local_type(ident.sym.as_ref()) { - // Known regex local - Some(Type::Named(n)) if n == "RegExp" => true, - // Unknown type — assume could be regex - Some(Type::Any) | Some(Type::Unknown) | None => true, - _ => false, - } + matches!( + ctx.lookup_local_type(ident.sym.as_ref()), + Some(Type::Named(n)) if n == "RegExp" + ) } _ => false, }; diff --git a/crates/perry-hir/src/lower/lower_expr.rs b/crates/perry-hir/src/lower/lower_expr.rs index 6d3523a11..1b437ec94 100644 --- a/crates/perry-hir/src/lower/lower_expr.rs +++ b/crates/perry-hir/src/lower/lower_expr.rs @@ -2352,6 +2352,40 @@ fn lower_expr_impl(ctx: &mut LoweringContext, expr: &ast::Expr) -> Result // back up. ast::Expr::Class(class_expr) => { let ident_name = class_expr.ident.as_ref().map(|i| i.sym.to_string()); + // A NAMED class EXPRESSION used as a VALUE whose name collides + // with an existing module-scope class — a TOP-LEVEL `class X` + // declaration OR an imported class binding — must NOT reuse that + // class's name / ClassId. Per JS spec a class-expression's name + // binds only inside its own body, so the two are distinct + // classes. Reusing the id silently overwrote the real class with + // the (often nearly empty) nested expression. minimatch's + // `defaults()` returns + // `Object.assign(m, { Minimatch: class Minimatch extends + // orig.Minimatch {…}, AST: class AST extends orig.AST {…} })` + // — `Minimatch` collides with the top-level `export class + // Minimatch` (caught via `module_class_decl_names`), and `AST` + // collides with the IMPORTED `import { AST } from './ast.js'` + // (caught via `lookup_class`, since named class imports are + // registered too). Both nested expressions hijacked the real + // class id: `new Minimatch(pattern)` built a body-less instance, + // and `AST.fromGlob(...)` inside `Minimatch.parse` dispatched to + // the wrong (empty) class. Rename the colliding expression to a + // fresh unique name so it gets its own ClassId; the value + // position (object property / `new` site) holds the resulting + // ClassRef directly, so the original name is not needed at module + // scope. The `current_class` guard avoids renaming the rare + // self-referential `class C { … new C() … }` expression form. + let ident_name = match ident_name { + Some(n) + if (ctx.module_class_decl_names.contains(&n) + || ctx.lookup_class(&n).is_some() + || ctx.lookup_imported_func(&n).is_some()) + && ctx.current_class.as_deref() != Some(n.as_str()) => + { + Some(format!("{}__class_expr_{}", n, ctx.fresh_class())) + } + other => other, + }; let synthetic_name = ident_name.unwrap_or_else(|| { if !anonymous_class_has_static_name_member(&class_expr.class) { if let Some(name) = ctx.assignment_inferred_name.as_ref() { diff --git a/crates/perry-hir/src/lower/lower_module_fn.rs b/crates/perry-hir/src/lower/lower_module_fn.rs index e2860b416..1b90347b1 100644 --- a/crates/perry-hir/src/lower/lower_module_fn.rs +++ b/crates/perry-hir/src/lower/lower_module_fn.rs @@ -735,6 +735,11 @@ pub fn lower_module_full( _ => None, }; if let Some((name, cd)) = class_decl { + // Record this as a real top-level class DECLARATION so a + // same-named nested class EXPRESSION (minimatch's + // `defaults()` → `{ Minimatch: class Minimatch extends … }`) + // doesn't hijack its ClassId in `lower_class_from_ast`. + ctx.module_class_decl_names.insert(name.clone()); if ctx.lookup_class(&name).is_none() { let id = ctx.fresh_class(); ctx.register_class(name.clone(), id); diff --git a/crates/perry-hir/src/lower/lowering_context.rs b/crates/perry-hir/src/lower/lowering_context.rs index df9af3dfd..e0e07a222 100644 --- a/crates/perry-hir/src/lower/lowering_context.rs +++ b/crates/perry-hir/src/lower/lowering_context.rs @@ -531,6 +531,19 @@ pub struct LoweringContext { /// call dispatched into `Object.create`. Scoped save/restore in /// `lower_fn_body_block_stmt`. pub(crate) forward_class_names: std::collections::HashSet, + /// Names of TOP-LEVEL `class X { … }` declarations in the module being + /// lowered (populated by the module pre-pass). A NAMED class EXPRESSION + /// nested in a function body — e.g. minimatch's `defaults()` returns + /// `Object.assign(m, { Minimatch: class Minimatch extends orig.Minimatch + /// {…} })` — whose name collides with one of these must NOT reuse the + /// top-level class's ClassId / module-scope registration: per JS spec a + /// class-expression's name binds only inside its own body. Reusing the id + /// silently overwrote the real exported class with the (nearly empty) + /// nested expression, so `new Minimatch(...)` produced a body-less + /// instance (every field/method undefined). This set lets + /// `lower_class_from_ast` detect that collision and allocate a fresh, + /// uniquely-named class instead. + pub(crate) module_class_decl_names: std::collections::HashSet, /// Counter for generating anon-class names (`__AnonShape_N`). // #854: initialized in `new` but unread — anon-shape classes are now named // by content-addressed FNV hash (see `synthesize_anon_shape_class`), not by diff --git a/crates/perry-hir/tests/instance_test_method_not_regex.rs b/crates/perry-hir/tests/instance_test_method_not_regex.rs new file mode 100644 index 000000000..6d600a6d7 --- /dev/null +++ b/crates/perry-hir/tests/instance_test_method_not_regex.rs @@ -0,0 +1,117 @@ +//! Batch-3 functional-correctness fix (semver / minimatch source-compile): +//! `receiver.test(arg)` on an `Any`/`Unknown`/untyped local must NOT be +//! lowered to the RegExp `Expr::RegExpTest` codegen fast path. `.test()` is +//! also a common INSTANCE method name (semver's `Comparator.test` / +//! `Range.test`), and an imported class instance is typed `Any` at the call +//! site. The old heuristic (`Type::Any | Type::Unknown | unwrap_or(true)`) +//! mis-lowered `comparator.test(v)` to `js_regexp_test(comparator-as-string)`, +//! silently returning a bogus boolean and never running the real method body — +//! so `semver.satisfies(...)` always returned `false`. +//! +//! The runtime already routes a genuine RegExp receiver's `.test()`/`.exec()` +//! through dynamic method dispatch (`dispatch_regex_receiver_method`, #1731), +//! so falling through to a normal method call is correct for BOTH a regex +//! value and a class instance. A regex *literal* receiver still takes the fast +//! path. + +use perry_diagnostics::SourceCache; +use perry_hir::{lower_module, Module}; +use perry_parser::parse_typescript_with_cache; + +fn lower(src: &str) -> Module { + let mut cache = SourceCache::new(); + let parsed = parse_typescript_with_cache(src, "/tmp/instance_test_method.ts", &mut cache) + .expect("parse failed"); + lower_module(&parsed.module, "test", "/tmp/instance_test_method.ts").expect("lower failed") +} + +/// True if any function/init body in the module debug-prints a `RegExpTest` +/// node. Using the Debug rendering keeps the test independent of the internal +/// walker API surface. +fn module_has_regexp_test(module: &Module) -> bool { + format!("{:#?}", module).contains("RegExpTest") +} + +#[test] +fn untyped_receiver_test_call_is_not_regexp_test() { + // `c` is an untyped local (the `as any` mirror of an imported class + // instance). `c.test(10)` must lower to a normal method call, not a + // RegExpTest fast path. + let src = r#" + const c: any = makeComparator(); + const r = c.test(10); + "#; + let module = lower(src); + assert!( + !module_has_regexp_test(&module), + "`c.test(10)` on an untyped local must NOT lower to RegExpTest" + ); +} + +#[test] +fn member_receiver_test_call_is_not_regexp_test() { + // A member-access receiver (`this.set[i].test(f)` shape) must also fall + // through to dynamic dispatch. + let src = r#" + function f(obj: any, file: any) { + return obj.matcher.test(file); + } + "#; + let module = lower(src); + assert!( + !module_has_regexp_test(&module), + "`obj.matcher.test(file)` must NOT lower to RegExpTest" + ); +} + +#[test] +fn regex_literal_receiver_test_call_still_uses_fast_path() { + // A regex *literal* receiver has positive evidence and keeps the fast path. + let src = r#" + const hit = /foo/.test("foobar"); + "#; + let module = lower(src); + assert!( + module_has_regexp_test(&module), + "`/foo/.test(...)` (regex literal) should still lower to RegExpTest" + ); +} + +/// True if any body debug-prints a `StringMatch`/`StringMatchAll` node. +fn module_has_string_match(module: &Module) -> bool { + let dbg = format!("{:#?}", module); + dbg.contains("StringMatch") +} + +#[test] +fn chained_new_dot_match_is_not_string_match() { + // minimatch's `minimatch(p, pat)` arrow returns + // `new Minimatch(pat).match(p)` — a chained `new X(arg).match(arg)` where + // `arg` is an untyped param. `.match()` here is an INSTANCE method, not + // `String.prototype.match(regex)`. The old heuristic (untyped arg ⇒ + // regex) lowered it to `StringMatch(new Minimatch(pat), p)`, so the call + // returned `null` instead of the boolean match result. + let src = r#" + function f(p: any, pat: any) { + return new Matcher(pat).match(p); + } + "#; + let module = lower(src); + assert!( + !module_has_string_match(&module), + "`new Matcher(pat).match(p)` must NOT lower to StringMatch" + ); +} + +#[test] +fn string_match_regex_literal_still_uses_fast_path() { + // A regex *literal* arg keeps the StringMatch fast path. + let src = r#" + const m = "abc123".match(/\d+/); + "#; + let module = lower(src); + assert!( + module_has_string_match(&module), + "`\"...\".match(/\\d+/)` (regex literal arg) should still lower to StringMatch" + ); +} diff --git a/crates/perry-hir/tests/nested_class_expr_name_collision.rs b/crates/perry-hir/tests/nested_class_expr_name_collision.rs new file mode 100644 index 000000000..41e7a1d3b --- /dev/null +++ b/crates/perry-hir/tests/nested_class_expr_name_collision.rs @@ -0,0 +1,92 @@ +//! Batch-3 functional-correctness fix (minimatch source-compile): a NAMED +//! class EXPRESSION nested in a function body whose name collides with a +//! TOP-LEVEL class DECLARATION in the same module must NOT reuse the +//! declaration's ClassId / module-scope registration. +//! +//! minimatch's `defaults()` returns +//! `Object.assign(m, { Minimatch: class Minimatch extends orig.Minimatch +//! { constructor(){ super(...) } static defaults(){…} } })` +//! — a near-empty nested class expression that happens to share the name of +//! the real top-level `export class Minimatch { …18 fields, ~10 methods… }`. +//! Per JS spec a class-expression's name binds only inside its own body, so +//! the two are distinct classes. Perry reused the top-level class's ClassId +//! for the nested expression, silently overwriting the real exported class +//! with the body-less nested one — `new Minimatch(pattern)` then produced an +//! instance whose every field/method was `undefined`. +//! +//! The fix renames the colliding class expression to a fresh unique name so +//! it gets its own ClassId; the value position (object property / `new` site) +//! holds the resulting ClassRef directly. + +use perry_diagnostics::SourceCache; +use perry_hir::{lower_module, Module}; +use perry_parser::parse_typescript_with_cache; + +fn lower(src: &str) -> Module { + let src = src.to_string(); + std::thread::Builder::new() + .stack_size(32 * 1024 * 1024) + .spawn(move || { + let mut cache = SourceCache::new(); + let parsed = + parse_typescript_with_cache(&src, "/tmp/nested_class_collision.ts", &mut cache) + .expect("parse failed"); + lower_module(&parsed.module, "test", "/tmp/nested_class_collision.ts") + .expect("lower failed") + }) + .expect("spawn") + .join() + .expect("lower thread panicked") +} + +/// Find the lowered `Class` whose name is exactly `name` (not a synthetic +/// `__class_expr_N` rename). +fn find_class<'a>(module: &'a Module, name: &str) -> Option<&'a perry_hir::Class> { + module.classes.iter().find(|c| c.name == name) +} + +#[test] +fn nested_class_expr_does_not_overwrite_toplevel_declaration() { + // The nested `class Thing` appears in file order BEFORE the real + // top-level `class Thing` — exactly the minimatch shape (the nested + // expression in `defaults()` precedes `export class Minimatch`). + let src = r#" + const reg: any = (p: any) => p; + export const defaults = (def: any) => { + const orig: any = reg; + const m: any = (p: any) => orig(p); + return Object.assign(m, { + Thing: class Thing extends orig.Thing { + constructor(x: any) { super(x); } + static defaults() { return null; } + }, + }); + }; + export class Thing { + a; + set; + constructor(x: any) { + this.a = x; + this.set = [1, 2]; + } + greet() { return "hi"; } + make() { return this.set.length; } + } + "#; + let module = lower(src); + + // The real top-level `Thing` must survive with its full body — at least + // one real method (`greet`/`make`) and its declared fields. The nested + // expression (constructor + a single static) must NOT have clobbered it. + let thing = find_class(&module, "Thing").expect("top-level `Thing` class must exist"); + let method_names: Vec<&str> = thing.methods.iter().map(|m| m.name.as_str()).collect(); + assert!( + method_names.contains(&"greet") && method_names.contains(&"make"), + "top-level `Thing` must keep its real methods (got methods: {:?})", + method_names + ); + assert!( + thing.fields.iter().any(|f| f.name == "a") && thing.fields.iter().any(|f| f.name == "set"), + "top-level `Thing` must keep its declared fields" + ); +}