-
-
Notifications
You must be signed in to change notification settings - Fork 132
perf(codegen): outline class-field-SET guard-miss arm to one call (#5334 lever A) #5336
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
af3410c
5baf879
11731e5
0ddb979
1e41fe7
74f2988
1b99c43
b942603
0c10e26
9e22074
c367a0a
dc9e096
a5a7d6e
91f0275
e1e5de7
4335b17
1a0a709
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,34 @@ use crate::types::{LlvmType, DOUBLE, I64}; | |
| use super::helpers::scoped_static_method_name; | ||
| use super::opts::CrossModuleCtx; | ||
|
|
||
| /// Replace the `__c<digits>__` class-id segment of a mangled method symbol with | ||
| /// `__c<class_id>__`. Method/static symbols built by `scoped_method_name` / | ||
| /// `scoped_static_method_name` always contain exactly one such segment | ||
| /// (`perry_method_<mod>__<class>__c<id>__<method>`); symbols that don't carry | ||
| /// one (e.g. the standalone constructor `<prefix>__<class>_constructor`) are | ||
| /// returned unchanged. Only the FIRST `__c<digits>__` group is rewritten so a | ||
| /// method whose own name happens to contain a `__c<digits>__` substring is left | ||
| /// intact. | ||
| fn retarget_class_id_in_symbol(symbol: &str, class_id: u32) -> String { | ||
| // Find `__c` followed by one or more ASCII digits followed by `__`. | ||
| let bytes = symbol.as_bytes(); | ||
| let mut i = 0usize; | ||
| while i + 3 < bytes.len() { | ||
| if &bytes[i..i + 3] == b"__c" { | ||
| let mut j = i + 3; | ||
| while j < bytes.len() && bytes[j].is_ascii_digit() { | ||
| j += 1; | ||
| } | ||
| // Require at least one digit AND a closing `__`. | ||
| if j > i + 3 && j + 1 < bytes.len() && &bytes[j..j + 2] == b"__" { | ||
| return format!("{}__c{}{}", &symbol[..i], class_id, &symbol[j..]); | ||
|
Comment on lines
+26
to
+38
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not retarget the first A class name like 🤖 Prompt for AI Agents |
||
| } | ||
| } | ||
| i += 1; | ||
| } | ||
| symbol.to_string() | ||
| } | ||
|
|
||
| fn node_stream_parent_kind( | ||
| classes: &HashMap<String, &perry_hir::Class>, | ||
| class: &perry_hir::Class, | ||
|
|
@@ -65,7 +93,7 @@ pub(super) fn compile_method( | |
| closure_rest_params: &HashMap<u32, usize>, | ||
| cross_module: &CrossModuleCtx, | ||
| ) -> Result<()> { | ||
| let llvm_name = methods | ||
| let registry_name = methods | ||
| .get(&(class.name.clone(), method.name.clone())) | ||
| .cloned() | ||
| .ok_or_else(|| { | ||
|
|
@@ -76,6 +104,26 @@ pub(super) fn compile_method( | |
| ) | ||
| })?; | ||
|
|
||
| // Pin the DEFINITION symbol to THIS class's unique HIR id. | ||
| // | ||
| // The dispatch registry (`methods`) is keyed by `(class_name, method_name)`, | ||
| // so two distinct local classes that share a minified name (`class j` reused | ||
| // across scopes — the 13MB-bundle pattern) collapse to ONE registry entry, | ||
| // whose `__c<id>__` infix carries only the LAST-seen class's id. But every | ||
| // class body is emitted here (artifacts.rs iterates `hir.classes`, which | ||
| // keeps each class with its own id), so without this both `j` bodies would | ||
| // define the SAME `perry_method_…__c<id>__…` symbol and clang would reject | ||
| // the module with `invalid redefinition of function`. | ||
| // | ||
| // `retarget_class_id_in_symbol` swaps the registry symbol's `__c<regid>__` | ||
| // segment for `__c<class.id>__`, leaving the class-name and method-name | ||
| // components (which the registry derived correctly — getters use the inner | ||
| // `f.name`, imports use the source prefix/name) untouched. Symbols without a | ||
| // `__c<digits>__` segment — chiefly the constructor (`<prefix>__<class>_ | ||
| // constructor`) — are returned verbatim. For a uniquely-named class | ||
| // `regid == class.id`, so this is a no-op and the symbol stays byte-stable. | ||
| let llvm_name = retarget_class_id_in_symbol(®istry_name, class.id); | ||
|
|
||
| // Build the param list: (this, arg0, arg1, ...). All are doubles. | ||
| let mut params: Vec<(LlvmType, String)> = Vec::with_capacity(method.params.len() + 1); | ||
| params.push((DOUBLE, "%this_arg".to_string())); | ||
|
|
@@ -821,3 +869,50 @@ pub(super) fn compile_static_method( | |
| } | ||
| Ok(()) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod retarget_tests { | ||
| use super::retarget_class_id_in_symbol; | ||
|
|
||
| #[test] | ||
| fn rewrites_class_id_segment() { | ||
| assert_eq!( | ||
| retarget_class_id_in_symbol("perry_method_cli_js__j__c12__getElementsByTagName", 11), | ||
| "perry_method_cli_js__j__c12__getElementsByTagName".replace("c12", "c11") | ||
| ); | ||
| assert_eq!( | ||
| retarget_class_id_in_symbol("perry_static_mod_ts__x__c12__lex", 7), | ||
| "perry_static_mod_ts__x__c7__lex" | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn unique_named_class_is_noop() { | ||
| let s = "perry_method_mod_ts__Animal__c3__speak"; | ||
| assert_eq!(retarget_class_id_in_symbol(s, 3), s); | ||
| } | ||
|
|
||
| #[test] | ||
| fn constructor_symbol_unchanged() { | ||
| // Standalone constructor carries no `__c<id>__` segment. | ||
| let s = "constructor_recursion_ts__RecursiveCtor_constructor"; | ||
| assert_eq!(retarget_class_id_in_symbol(s, 99), s); | ||
| } | ||
|
|
||
| #[test] | ||
| fn only_first_segment_rewritten() { | ||
| // A method name that itself contains `__c5__` must not be touched — | ||
| // only the class-id segment (the first `__c<digits>__`). | ||
| assert_eq!( | ||
| retarget_class_id_in_symbol("perry_method_m_ts__C__c2__weird__c5__name", 8), | ||
| "perry_method_m_ts__C__c8__weird__c5__name" | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn no_segment_when_not_digits() { | ||
| // `__catch__` looks superficially close but isn't `__c<digits>__`. | ||
| let s = "perry_method_m_ts__C__catch__handler"; | ||
| assert_eq!(retarget_class_id_in_symbol(s, 4), s); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make the member sanitizer collision-free before using it for function symbols.
sanitize_memberis not actually injective:"$Z5"escapes tou__24_Z5, while a plain function named"u__24_Z5"staysu__24_Z5. That can still emit duplicateperry_fn_*definitions. Reserve theu_namespace for escaped names, or switch to a length/tagged encoding, and add a regression for the plain-vs-escaped collision.Possible localized fix
pub(super) fn sanitize_member(name: &str) -> String { let is_plain = name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_'); - if is_plain { + if is_plain && !name.starts_with("u_") { // Byte-identical to `sanitize` for plain names (incl. leading-digit fix). return sanitize(name); }🤖 Prompt for AI Agents