Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/perry-hir/src/lower/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ impl LoweringContext {
mixin_funcs: HashMap::new(),
anon_shape_classes: HashMap::new(),
forward_class_names: std::collections::HashSet::new(),
module_class_decl_names: std::collections::HashSet::new(),
next_anon_shape_id: 0,
class_method_return_types: Vec::new(),
class_captures: Vec::new(),
Expand Down
58 changes: 44 additions & 14 deletions crates/perry-hir/src/lower/expr_call/regex_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,32 @@ pub(super) fn try_regex_string_methods(
if let ast::MemberProp::Ident(method_ident) = &member.prop {
let m = method_ident.sym.as_ref();
if (m == "test" || m == "exec") && args.len() == 1 {
// Check if the object is a regex literal or a local assigned to a regex
// Check if the object is a regex literal or a local assigned to a regex.
//
// CRITICAL (#A — semver/minimatch source-compile): do NOT
// treat `x.test(arg)` as a RegExp test merely because `x` is
// an `Any`/`Unknown`/untyped local. `.test()` is also a
// common INSTANCE method name (semver's `Comparator.test` /
// `Range.test`, etc.). An imported class instance is typed
// `Any` at the call site, so the old `Any | Unknown |
// unwrap_or(true)` heuristic mis-lowered `comparator.test(v)`
// into `js_regexp_test(comparator-coerced-to-string, ...)`,
// silently returning a bogus boolean and never running the
// real method body — breaking `semver.satisfies` /
// `minimatch(...)`. The runtime already routes a genuine
// RegExp receiver's `.test()`/`.exec()` through the dynamic
// method-dispatch path (`dispatch_regex_receiver_method` in
// `js_native_call_method`, #1731), so falling through to a
// normal method call is correct for BOTH a regex value
// (runtime regex dispatch) and a class instance (instance
// method). Only take the codegen fast path when we have
// positive evidence the receiver is a regex.
let is_regex_obj = match member.obj.as_ref() {
ast::Expr::Lit(ast::Lit::Regex(_)) => true,
ast::Expr::Ident(ident) => ctx
.lookup_local_type(ident.sym.as_ref())
.map(|ty| {
matches!(ty, Type::Any | Type::Unknown)
|| matches!(ty, Type::Named(n) if n == "RegExp")
})
.unwrap_or(true),
.map(|ty| matches!(ty, Type::Named(n) if n == "RegExp"))
.unwrap_or(false),
_ => false,
};
if is_regex_obj {
Expand Down Expand Up @@ -70,17 +86,31 @@ pub(super) fn try_regex_string_methods(
&& args.len() == 1
{
let is_match_all = method_ident.sym.as_ref() == "matchAll";
// Check if the argument is a regex literal or a local holding a regex
// Check if the argument is a regex literal or a local holding a regex.
//
// CRITICAL (#A — minimatch source-compile): do NOT assume
// an `Any`/`Unknown`/untyped ARG is a regex (which would
// treat the RECEIVER as a string and lower to
// `Expr::StringMatch`). `.match()` is also a common
// INSTANCE method name — minimatch's `Minimatch.match` is
// invoked as `new Minimatch(pat).match(p)` inside the
// top-level `minimatch(p, pat)` arrow, where `p` is an
// untyped param. The old `Any | Unknown | None => true`
// heuristic mis-lowered that into
// `StringMatch(new Minimatch(pat), p)`, so `minimatch(...)`
// returned `null` instead of the boolean match result.
// The runtime already routes a genuine string receiver's
// `.match(regex)` through dynamic dispatch (#519/#510), so
// falling through to a normal method call is correct for
// BOTH a string and a class instance. Only take the codegen
// fast path with positive evidence the arg is a regex.
let arg_is_regex = match call.args.first().map(|a| a.expr.as_ref()) {
Some(ast::Expr::Lit(ast::Lit::Regex(_))) => true,
Some(ast::Expr::Ident(ident)) => {
match ctx.lookup_local_type(ident.sym.as_ref()) {
// Known regex local
Some(Type::Named(n)) if n == "RegExp" => true,
// Unknown type — assume could be regex
Some(Type::Any) | Some(Type::Unknown) | None => true,
_ => false,
}
matches!(
ctx.lookup_local_type(ident.sym.as_ref()),
Some(Type::Named(n)) if n == "RegExp"
)
}
_ => false,
};
Expand Down
34 changes: 34 additions & 0 deletions crates/perry-hir/src/lower/lower_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2352,6 +2352,40 @@ fn lower_expr_impl(ctx: &mut LoweringContext, expr: &ast::Expr) -> Result<Expr>
// back up.
ast::Expr::Class(class_expr) => {
let ident_name = class_expr.ident.as_ref().map(|i| i.sym.to_string());
// A NAMED class EXPRESSION used as a VALUE whose name collides
// with an existing module-scope class — a TOP-LEVEL `class X`
// declaration OR an imported class binding — must NOT reuse that
// class's name / ClassId. Per JS spec a class-expression's name
// binds only inside its own body, so the two are distinct
// classes. Reusing the id silently overwrote the real class with
// the (often nearly empty) nested expression. minimatch's
// `defaults()` returns
// `Object.assign(m, { Minimatch: class Minimatch extends
// orig.Minimatch {…}, AST: class AST extends orig.AST {…} })`
// — `Minimatch` collides with the top-level `export class
// Minimatch` (caught via `module_class_decl_names`), and `AST`
// collides with the IMPORTED `import { AST } from './ast.js'`
// (caught via `lookup_class`, since named class imports are
// registered too). Both nested expressions hijacked the real
// class id: `new Minimatch(pattern)` built a body-less instance,
// and `AST.fromGlob(...)` inside `Minimatch.parse` dispatched to
// the wrong (empty) class. Rename the colliding expression to a
// fresh unique name so it gets its own ClassId; the value
// position (object property / `new` site) holds the resulting
// ClassRef directly, so the original name is not needed at module
// scope. The `current_class` guard avoids renaming the rare
// self-referential `class C { … new C() … }` expression form.
let ident_name = match ident_name {
Some(n)
if (ctx.module_class_decl_names.contains(&n)
|| ctx.lookup_class(&n).is_some()
|| ctx.lookup_imported_func(&n).is_some())
&& ctx.current_class.as_deref() != Some(n.as_str()) =>
Comment on lines +2380 to +2383

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail
rg -n "lookup_imported_func|register_imported_func" crates/perry-hir/src/lower -g '*.rs'
rg -n "lookup_class|register_class" crates/perry-hir/src/lower -g '*.rs'
rg -n "lookup_imported_class|register_imported_class" crates/perry-hir/src/lower -g '*.rs'

Repository: PerryTS/perry

Length of output: 16118


🏁 Script executed:

sed -n '2370,2390p' crates/perry-hir/src/lower/lower_expr.rs | cat -n

Repository: PerryTS/perry

Length of output: 1455


🏁 Script executed:

# Check how imported items are registered, especially classes
sed -n '280,340p' crates/perry-hir/src/lower/module_decl.rs | cat -n

Repository: PerryTS/perry

Length of output: 4550


🏁 Script executed:

# Look for context on how imported classes flow through the system
rg -n "imported.*class|class.*import" crates/perry-hir/src/lower/lower_expr.rs -A 2 -B 2

Repository: PerryTS/perry

Length of output: 1272


🏁 Script executed:

rg -n "class_expr|CollisionDetect" crates/perry-hir/src/lower/tests.rs -A 3 -B 3

Repository: PerryTS/perry

Length of output: 39


🏁 Script executed:

# Check if there are any tests that exercise the imported function case for class expressions
rg -n "new.*\(class|class.*expression" crates/perry-hir/src/lower/tests.rs -A 2 -B 2

Repository: PerryTS/perry

Length of output: 39


🏁 Script executed:

# Look for documentation or comments about why lookup_imported_func might be needed
sed -n '2350,2400p' crates/perry-hir/src/lower/lower_expr.rs | cat -n

Repository: PerryTS/perry

Length of output: 3540


Remove overly broad collision check from class expression renaming logic.

The comment at line 2368 explicitly states that imported class bindings are caught via lookup_class (since "named class imports are registered too"). Checking ctx.lookup_imported_func(&n) is unnecessary and over-broad—it triggers renaming on collisions with imported functions, not just classes. This changes behavior for non-class import name collisions when it shouldn't. Restrict the predicate to class bindings only.

Suggested diff
-                    if (ctx.module_class_decl_names.contains(&n)
-                        || ctx.lookup_class(&n).is_some()
-                        || ctx.lookup_imported_func(&n).is_some())
+                    if (ctx.module_class_decl_names.contains(&n)
+                        || ctx.lookup_class(&n).is_some())
                         && ctx.current_class.as_deref() != Some(n.as_str()) =>
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@crates/perry-hir/src/lower/lower_expr.rs` around lines 2380 - 2383, The
condition check at lines 2380-2383 includes an overly broad collision check with
ctx.lookup_imported_func(&n).is_some() that is unnecessary and causes unintended
renaming behavior. According to the comment at line 2368, imported class
bindings are already caught via ctx.lookup_class(), so the imported function
check should be removed. Remove the ctx.lookup_imported_func(&n).is_some()
portion from the logical OR condition, keeping only the checks for
ctx.module_class_decl_names.contains(&n), ctx.lookup_class(&n).is_some(), and
the current_class comparison to restrict the predicate to class bindings only.

{
Some(format!("{}__class_expr_{}", n, ctx.fresh_class()))
}
other => other,
};
let synthetic_name = ident_name.unwrap_or_else(|| {
if !anonymous_class_has_static_name_member(&class_expr.class) {
if let Some(name) = ctx.assignment_inferred_name.as_ref() {
Expand Down
5 changes: 5 additions & 0 deletions crates/perry-hir/src/lower/lower_module_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,11 @@ pub fn lower_module_full(
_ => None,
};
if let Some((name, cd)) = class_decl {
// Record this as a real top-level class DECLARATION so a
// same-named nested class EXPRESSION (minimatch's
// `defaults()` → `{ Minimatch: class Minimatch extends … }`)
// doesn't hijack its ClassId in `lower_class_from_ast`.
ctx.module_class_decl_names.insert(name.clone());
if ctx.lookup_class(&name).is_none() {
let id = ctx.fresh_class();
ctx.register_class(name.clone(), id);
Expand Down
13 changes: 13 additions & 0 deletions crates/perry-hir/src/lower/lowering_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,19 @@ pub struct LoweringContext {
/// call dispatched into `Object.create`. Scoped save/restore in
/// `lower_fn_body_block_stmt`.
pub(crate) forward_class_names: std::collections::HashSet<String>,
/// Names of TOP-LEVEL `class X { … }` declarations in the module being
/// lowered (populated by the module pre-pass). A NAMED class EXPRESSION
/// nested in a function body — e.g. minimatch's `defaults()` returns
/// `Object.assign(m, { Minimatch: class Minimatch extends orig.Minimatch
/// {…} })` — whose name collides with one of these must NOT reuse the
/// top-level class's ClassId / module-scope registration: per JS spec a
/// class-expression's name binds only inside its own body. Reusing the id
/// silently overwrote the real exported class with the (nearly empty)
/// nested expression, so `new Minimatch(...)` produced a body-less
/// instance (every field/method undefined). This set lets
/// `lower_class_from_ast` detect that collision and allocate a fresh,
/// uniquely-named class instead.
pub(crate) module_class_decl_names: std::collections::HashSet<String>,
/// Counter for generating anon-class names (`__AnonShape_N`).
// #854: initialized in `new` but unread — anon-shape classes are now named
// by content-addressed FNV hash (see `synthesize_anon_shape_class`), not by
Expand Down
117 changes: 117 additions & 0 deletions crates/perry-hir/tests/instance_test_method_not_regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
//! Batch-3 functional-correctness fix (semver / minimatch source-compile):
//! `receiver.test(arg)` on an `Any`/`Unknown`/untyped local must NOT be
//! lowered to the RegExp `Expr::RegExpTest` codegen fast path. `.test()` is
//! also a common INSTANCE method name (semver's `Comparator.test` /
//! `Range.test`), and an imported class instance is typed `Any` at the call
//! site. The old heuristic (`Type::Any | Type::Unknown | unwrap_or(true)`)
//! mis-lowered `comparator.test(v)` to `js_regexp_test(comparator-as-string)`,
//! silently returning a bogus boolean and never running the real method body —
//! so `semver.satisfies(...)` always returned `false`.
//!
//! The runtime already routes a genuine RegExp receiver's `.test()`/`.exec()`
//! through dynamic method dispatch (`dispatch_regex_receiver_method`, #1731),
//! so falling through to a normal method call is correct for BOTH a regex
//! value and a class instance. A regex *literal* receiver still takes the fast
//! path.

use perry_diagnostics::SourceCache;
use perry_hir::{lower_module, Module};
use perry_parser::parse_typescript_with_cache;

fn lower(src: &str) -> Module {
let mut cache = SourceCache::new();
let parsed = parse_typescript_with_cache(src, "/tmp/instance_test_method.ts", &mut cache)
.expect("parse failed");
lower_module(&parsed.module, "test", "/tmp/instance_test_method.ts").expect("lower failed")
}

/// True if any function/init body in the module debug-prints a `RegExpTest`
/// node. Using the Debug rendering keeps the test independent of the internal
/// walker API surface.
fn module_has_regexp_test(module: &Module) -> bool {
format!("{:#?}", module).contains("RegExpTest")
}

#[test]
fn untyped_receiver_test_call_is_not_regexp_test() {
// `c` is an untyped local (the `as any` mirror of an imported class
// instance). `c.test(10)` must lower to a normal method call, not a
// RegExpTest fast path.
let src = r#"
const c: any = makeComparator();
const r = c.test(10);
"#;
let module = lower(src);
assert!(
!module_has_regexp_test(&module),
"`c.test(10)` on an untyped local must NOT lower to RegExpTest"
);
}

#[test]
fn member_receiver_test_call_is_not_regexp_test() {
// A member-access receiver (`this.set[i].test(f)` shape) must also fall
// through to dynamic dispatch.
let src = r#"
function f(obj: any, file: any) {
return obj.matcher.test(file);
}
"#;
let module = lower(src);
assert!(
!module_has_regexp_test(&module),
"`obj.matcher.test(file)` must NOT lower to RegExpTest"
);
}

#[test]
fn regex_literal_receiver_test_call_still_uses_fast_path() {
// A regex *literal* receiver has positive evidence and keeps the fast path.
let src = r#"
const hit = /foo/.test("foobar");
"#;
let module = lower(src);
assert!(
module_has_regexp_test(&module),
"`/foo/.test(...)` (regex literal) should still lower to RegExpTest"
);
}

/// True if any body debug-prints a `StringMatch`/`StringMatchAll` node.
fn module_has_string_match(module: &Module) -> bool {
let dbg = format!("{:#?}", module);
dbg.contains("StringMatch")
}

#[test]
fn chained_new_dot_match_is_not_string_match() {
// minimatch's `minimatch(p, pat)` arrow returns
// `new Minimatch(pat).match(p)` — a chained `new X(arg).match(arg)` where
// `arg` is an untyped param. `.match()` here is an INSTANCE method, not
// `String.prototype.match(regex)`. The old heuristic (untyped arg ⇒
// regex) lowered it to `StringMatch(new Minimatch(pat), p)`, so the call
// returned `null` instead of the boolean match result.
let src = r#"
function f(p: any, pat: any) {
return new Matcher(pat).match(p);
}
"#;
let module = lower(src);
assert!(
!module_has_string_match(&module),
"`new Matcher(pat).match(p)` must NOT lower to StringMatch"
);
}

#[test]
fn string_match_regex_literal_still_uses_fast_path() {
// A regex *literal* arg keeps the StringMatch fast path.
let src = r#"
const m = "abc123".match(/\d+/);
"#;
let module = lower(src);
assert!(
module_has_string_match(&module),
"`\"...\".match(/\\d+/)` (regex literal arg) should still lower to StringMatch"
);
}
92 changes: 92 additions & 0 deletions crates/perry-hir/tests/nested_class_expr_name_collision.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
//! Batch-3 functional-correctness fix (minimatch source-compile): a NAMED
//! class EXPRESSION nested in a function body whose name collides with a
//! TOP-LEVEL class DECLARATION in the same module must NOT reuse the
//! declaration's ClassId / module-scope registration.
//!
//! minimatch's `defaults()` returns
//! `Object.assign(m, { Minimatch: class Minimatch extends orig.Minimatch
//! { constructor(){ super(...) } static defaults(){…} } })`
//! — a near-empty nested class expression that happens to share the name of
//! the real top-level `export class Minimatch { …18 fields, ~10 methods… }`.
//! Per JS spec a class-expression's name binds only inside its own body, so
//! the two are distinct classes. Perry reused the top-level class's ClassId
//! for the nested expression, silently overwriting the real exported class
//! with the body-less nested one — `new Minimatch(pattern)` then produced an
//! instance whose every field/method was `undefined`.
//!
//! The fix renames the colliding class expression to a fresh unique name so
//! it gets its own ClassId; the value position (object property / `new` site)
//! holds the resulting ClassRef directly.

use perry_diagnostics::SourceCache;
use perry_hir::{lower_module, Module};
use perry_parser::parse_typescript_with_cache;

fn lower(src: &str) -> Module {
let src = src.to_string();
std::thread::Builder::new()
.stack_size(32 * 1024 * 1024)
.spawn(move || {
let mut cache = SourceCache::new();
let parsed =
parse_typescript_with_cache(&src, "/tmp/nested_class_collision.ts", &mut cache)
.expect("parse failed");
lower_module(&parsed.module, "test", "/tmp/nested_class_collision.ts")
.expect("lower failed")
})
.expect("spawn")
.join()
.expect("lower thread panicked")
}

/// Find the lowered `Class` whose name is exactly `name` (not a synthetic
/// `<name>__class_expr_N` rename).
fn find_class<'a>(module: &'a Module, name: &str) -> Option<&'a perry_hir::Class> {
module.classes.iter().find(|c| c.name == name)
}

#[test]
fn nested_class_expr_does_not_overwrite_toplevel_declaration() {
// The nested `class Thing` appears in file order BEFORE the real
// top-level `class Thing` — exactly the minimatch shape (the nested
// expression in `defaults()` precedes `export class Minimatch`).
let src = r#"
const reg: any = (p: any) => p;
export const defaults = (def: any) => {
const orig: any = reg;
const m: any = (p: any) => orig(p);
return Object.assign(m, {
Thing: class Thing extends orig.Thing {
constructor(x: any) { super(x); }
static defaults() { return null; }
},
});
};
export class Thing {
a;
set;
constructor(x: any) {
this.a = x;
this.set = [1, 2];
}
greet() { return "hi"; }
make() { return this.set.length; }
}
"#;
let module = lower(src);

// The real top-level `Thing` must survive with its full body — at least
// one real method (`greet`/`make`) and its declared fields. The nested
// expression (constructor + a single static) must NOT have clobbered it.
let thing = find_class(&module, "Thing").expect("top-level `Thing` class must exist");
let method_names: Vec<&str> = thing.methods.iter().map(|m| m.name.as_str()).collect();
assert!(
method_names.contains(&"greet") && method_names.contains(&"make"),
"top-level `Thing` must keep its real methods (got methods: {:?})",
method_names
);
assert!(
thing.fields.iter().any(|f| f.name == "a") && thing.fields.iter().any(|f| f.name == "set"),
"top-level `Thing` must keep its declared fields"
);
}
Loading