Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
af3410c
perf(hir): O(1) closure-capture analysis — incremental id_set + share…
Jun 16, 2026
5baf879
perf(hir): O(1) registry lookups — index native instances/modules/sta…
Jun 16, 2026
11731e5
perf(hir): fix exponential re-lowering of native-fluent method chains
Jun 16, 2026
0ddb979
fix(codegen): argless builtins ignore extra args instead of bailing
Jun 16, 2026
1e41fe7
fix(transform): async-generator catch with break/continue no longer d…
Jun 16, 2026
74f2988
fix(transform): labeled break/continue from a nested loop in an async…
Jun 16, 2026
1b99c43
fix(hir): labeled break to a labeled non-loop statement (if/switch)
Jun 17, 2026
b942603
fix(codegen): unique class-keys global per class — distinct names tha…
Jun 17, 2026
0c10e26
perf(codegen): outline per-new-site inline allocator (smaller IR + fa…
Jun 17, 2026
9e22074
perf(codegen): call shared constructor symbol instead of inlining at …
Jun 17, 2026
c367a0a
fix(codegen): injective function-symbol names (distinct names that sa…
Jun 17, 2026
dc9e096
fix(codegen): disambiguate same-named functions within a module
Jun 17, 2026
a5a7d6e
fix(codegen): disambiguate duplicate class-name method symbols by cla…
Jun 17, 2026
91f0275
fix(codegen): emit each function symbol once (dedup duplicate defines)
Jun 17, 2026
e1e5de7
perf(codegen): outline class-field-SET IC diamond behind PERRY_OUTLIN…
Jun 17, 2026
4335b17
perf(codegen): class-field-SET inline cache — fast-inline / slow-outl…
Jun 17, 2026
1a0a709
perf(codegen): outline class-field-SET guard-miss arm to one call (#5…
Jun 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/perry-codegen/src/codegen/artifacts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1640,6 +1640,7 @@ pub(super) fn emit_module_artifacts(c: ModuleArtifactsCtx<'_>) -> Result<()> {
&user_fn_wrapper_strict,
&user_fn_display_names,
&user_fn_source,
&super::helpers::duplicate_class_names(&hir.classes),
);

Ok(())
Expand Down
78 changes: 68 additions & 10 deletions crates/perry-codegen/src/codegen/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,14 @@ pub(crate) fn write_barriers_enabled() -> bool {
}

pub(super) fn scoped_fn_name(module_prefix: &str, hir_name: &str) -> String {
format!("perry_fn_{}__{}", module_prefix, sanitize(hir_name))
// Use the INJECTIVE sanitizer (same as scoped_static_method_name): plain
// `sanitize` maps every non-`[A-Za-z0-9_]` char to `_`, so distinct minified
// function names like `$Z5` and `_Z5` both became `perry_fn_<mod>___Z5` and
// clang rejected the module with "invalid redefinition of function". `func_names`
// is keyed by func id and every reference resolves through it, so changing the
// mangling here keeps all local-function call sites consistent. Byte-identical
// to `sanitize` for plain `[A-Za-z0-9_]` names (the overwhelming common case).
format!("perry_fn_{}__{}", module_prefix, sanitize_member(hir_name))
Comment on lines 111 to +119

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Make the member sanitizer collision-free before using it for function symbols.

sanitize_member is not actually injective: "$Z5" escapes to u__24_Z5, while a plain function named "u__24_Z5" stays u__24_Z5. That can still emit duplicate perry_fn_* definitions. Reserve the u_ namespace for escaped names, or switch to a length/tagged encoding, and add a regression for the plain-vs-escaped collision.

Possible localized fix
 pub(super) fn sanitize_member(name: &str) -> String {
     let is_plain = name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_');
-    if is_plain {
+    if is_plain && !name.starts_with("u_") {
         // Byte-identical to `sanitize` for plain names (incl. leading-digit fix).
         return sanitize(name);
     }
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@crates/perry-codegen/src/codegen/helpers.rs` around lines 111 - 119, The
scoped_fn_name function uses sanitize_member which is not injective and creates
collisions: escaped names like "$Z5" produce "u__24_Z5" while a plain function
named "u__24_Z5" stays the same, creating duplicate perry_fn_* definitions. Fix
sanitize_member to be collision-free by either reserving the "u_" namespace for
escaped names (transforming or rejecting plain names that start with "u_") or
implementing a length/tagged encoding scheme. Additionally, add a regression
test that verifies the collision between escaped "$Z5" and plain "u__24_Z5" is
prevented.

}

pub(super) fn scoped_static_method_name(
Expand Down Expand Up @@ -254,21 +261,72 @@ pub(super) fn collect_return_class(
}
}

/// Mangle a class method name into an LLVM symbol, scoped by module
/// prefix and class name.
/// Mangle a class method name into an LLVM symbol, scoped by module prefix and
/// class name, optionally disambiguated by the unique HIR class id.
///
/// `perry_method_<modprefix>__<class>__<method>`.
/// * `disambiguate == false` → `perry_method_<modprefix>__<class>__<method>`
/// (the historical, id-less form). Used for the overwhelming common case: a
/// class whose name is unique within its module. This form is also the ABI a
/// cross-module consumer reconstructs from import metadata (the consumer
/// can't always recover the source's HIR id), so EXPORTED classes — which are
/// always name-unique — must use it for symbols to resolve at link time.
///
/// * `disambiguate == true` → `perry_method_<modprefix>__<class>__c<id>__<method>`,
/// mirroring [`scoped_static_method_name`]. Reserved for the minified-bundle
/// pathology where two DISTINCT classes share a short name (`class j` reused
/// across scopes). Without the `c<id>` infix their methods mangle to the SAME
/// symbol (`perry_method_<mod>__j__getElementsByTagName`) and clang rejects
/// the module with `invalid redefinition of function`. The HIR class id is
/// unique per class, so the infix guarantees distinct symbols. Such
/// duplicate-named classes are necessarily non-exported (a module can't
/// export the same name twice), so the disambiguated form never needs to be
/// reconstructed cross-module.
///
/// Dispatch through the runtime VTABLE_REGISTRY (`js_register_class_method`) is
/// pointer-based and keyed by class id, so the symbol string only needs to be
/// unique — it is never parsed. Both branches must be chosen IDENTICALLY at the
/// definition site, every reference site, and the vtable-registration site for
/// a given class, or the symbols desync and the linker fails.
pub(super) fn scoped_method_name(
module_prefix: &str,
class_id: u32,
class_name: &str,
method_name: &str,
disambiguate: bool,
) -> String {
format!(
"perry_method_{}__{}__{}",
module_prefix,
sanitize_member(class_name),
sanitize_member(method_name)
)
if disambiguate {
format!(
"perry_method_{}__{}__c{}__{}",
module_prefix,
sanitize_member(class_name),
class_id,
sanitize_member(method_name)
)
} else {
format!(
"perry_method_{}__{}__{}",
module_prefix,
sanitize_member(class_name),
sanitize_member(method_name)
)
}
}

/// Names that appear on MORE THAN ONE class in this module. Methods of a class
/// whose name is in this set must be mangled with the disambiguating class-id
/// infix (see [`scoped_method_name`]); every other class keeps the id-less,
/// cross-module-stable form. Computed once per `compile_module`.
pub(super) fn duplicate_class_names(
classes: &[perry_hir::Class],
) -> std::collections::HashSet<String> {
let mut seen: HashMap<&str, u32> = HashMap::new();
for c in classes {
*seen.entry(c.name.as_str()).or_insert(0) += 1;
}
seen.into_iter()
.filter(|(_, n)| *n > 1)
.map(|(name, _)| name.to_string())
.collect()
}

/// Sanitize a name for use in an LLVM symbol — replace anything that isn't
Expand Down
97 changes: 96 additions & 1 deletion crates/perry-codegen/src/codegen/method.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,34 @@ use crate::types::{LlvmType, DOUBLE, I64};
use super::helpers::scoped_static_method_name;
use super::opts::CrossModuleCtx;

/// Replace the `__c<digits>__` class-id segment of a mangled method symbol with
/// `__c<class_id>__`. Method/static symbols built by `scoped_method_name` /
/// `scoped_static_method_name` always contain exactly one such segment
/// (`perry_method_<mod>__<class>__c<id>__<method>`); symbols that don't carry
/// one (e.g. the standalone constructor `<prefix>__<class>_constructor`) are
/// returned unchanged. Only the FIRST `__c<digits>__` group is rewritten so a
/// method whose own name happens to contain a `__c<digits>__` substring is left
/// intact.
fn retarget_class_id_in_symbol(symbol: &str, class_id: u32) -> String {
// Find `__c` followed by one or more ASCII digits followed by `__`.
let bytes = symbol.as_bytes();
let mut i = 0usize;
while i + 3 < bytes.len() {
if &bytes[i..i + 3] == b"__c" {
let mut j = i + 3;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
// Require at least one digit AND a closing `__`.
if j > i + 3 && j + 1 < bytes.len() && &bytes[j..j + 2] == b"__" {
return format!("{}__c{}{}", &symbol[..i], class_id, &symbol[j..]);
Comment on lines +26 to +38

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Do not retarget the first __cN__ blindly.

A class name like A__c12__B produces perry_method_m__A__c12__B__c5__m; this helper rewrites the class-name segment and leaves the real class-id segment unchanged. That emits a symbol no reference site can reconstruct. Please retarget from structured inputs instead of parsing the symbol string, or add a parser/test that covers __c<digits>__ inside the class-name component.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@crates/perry-codegen/src/codegen/method.rs` around lines 26 - 38, The
retarget_class_id_in_symbol function blindly replaces the first occurrence of
the __cN__ pattern without distinguishing between patterns that are part of the
class-name component versus the actual class-id segment. This causes incorrect
retargeting when class names contain __cN__ patterns (e.g., A__c12__B). Either
refactor this function to accept structured inputs like the class name and ID
separately instead of parsing the full symbol string, or implement a more
sophisticated parser that correctly identifies and targets only the
__c<digits>__ pattern representing the actual class-id segment (which appears
after the class-name component) rather than any arbitrary occurrence within the
symbol string. Add tests that cover symbols with __c<digits>__ patterns embedded
in the class-name portion.

}
}
i += 1;
}
symbol.to_string()
}

fn node_stream_parent_kind(
classes: &HashMap<String, &perry_hir::Class>,
class: &perry_hir::Class,
Expand Down Expand Up @@ -65,7 +93,7 @@ pub(super) fn compile_method(
closure_rest_params: &HashMap<u32, usize>,
cross_module: &CrossModuleCtx,
) -> Result<()> {
let llvm_name = methods
let registry_name = methods
.get(&(class.name.clone(), method.name.clone()))
.cloned()
.ok_or_else(|| {
Expand All @@ -76,6 +104,26 @@ pub(super) fn compile_method(
)
})?;

// Pin the DEFINITION symbol to THIS class's unique HIR id.
//
// The dispatch registry (`methods`) is keyed by `(class_name, method_name)`,
// so two distinct local classes that share a minified name (`class j` reused
// across scopes — the 13MB-bundle pattern) collapse to ONE registry entry,
// whose `__c<id>__` infix carries only the LAST-seen class's id. But every
// class body is emitted here (artifacts.rs iterates `hir.classes`, which
// keeps each class with its own id), so without this both `j` bodies would
// define the SAME `perry_method_…__c<id>__…` symbol and clang would reject
// the module with `invalid redefinition of function`.
//
// `retarget_class_id_in_symbol` swaps the registry symbol's `__c<regid>__`
// segment for `__c<class.id>__`, leaving the class-name and method-name
// components (which the registry derived correctly — getters use the inner
// `f.name`, imports use the source prefix/name) untouched. Symbols without a
// `__c<digits>__` segment — chiefly the constructor (`<prefix>__<class>_
// constructor`) — are returned verbatim. For a uniquely-named class
// `regid == class.id`, so this is a no-op and the symbol stays byte-stable.
let llvm_name = retarget_class_id_in_symbol(&registry_name, class.id);

// Build the param list: (this, arg0, arg1, ...). All are doubles.
let mut params: Vec<(LlvmType, String)> = Vec::with_capacity(method.params.len() + 1);
params.push((DOUBLE, "%this_arg".to_string()));
Expand Down Expand Up @@ -821,3 +869,50 @@ pub(super) fn compile_static_method(
}
Ok(())
}

#[cfg(test)]
mod retarget_tests {
use super::retarget_class_id_in_symbol;

#[test]
fn rewrites_class_id_segment() {
assert_eq!(
retarget_class_id_in_symbol("perry_method_cli_js__j__c12__getElementsByTagName", 11),
"perry_method_cli_js__j__c12__getElementsByTagName".replace("c12", "c11")
);
assert_eq!(
retarget_class_id_in_symbol("perry_static_mod_ts__x__c12__lex", 7),
"perry_static_mod_ts__x__c7__lex"
);
}

#[test]
fn unique_named_class_is_noop() {
let s = "perry_method_mod_ts__Animal__c3__speak";
assert_eq!(retarget_class_id_in_symbol(s, 3), s);
}

#[test]
fn constructor_symbol_unchanged() {
// Standalone constructor carries no `__c<id>__` segment.
let s = "constructor_recursion_ts__RecursiveCtor_constructor";
assert_eq!(retarget_class_id_in_symbol(s, 99), s);
}

#[test]
fn only_first_segment_rewritten() {
// A method name that itself contains `__c5__` must not be touched —
// only the class-id segment (the first `__c<digits>__`).
assert_eq!(
retarget_class_id_in_symbol("perry_method_m_ts__C__c2__weird__c5__name", 8),
"perry_method_m_ts__C__c8__weird__c5__name"
);
}

#[test]
fn no_segment_when_not_digits() {
// `__catch__` looks superficially close but isn't `__c<digits>__`.
let s = "perry_method_m_ts__C__catch__handler";
assert_eq!(retarget_class_id_in_symbol(s, 4), s);
}
}
Loading