From 7fcd58c35ed985eeb5e56ee4494e9bbe0b9f9ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Mon, 15 Jun 2026 06:40:43 +0200 Subject: [PATCH 1/2] wip(events): route native EventEmitter use like a node:events import (#5140) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PARTIAL — not yet a complete fix. Investigation of the eventemitter3 link failure (undefined _js_event_emitter_* symbols) found three layers; this commit addresses two of them: 1. Routing: a program that constructs a native EventEmitter *by name* (e.g. eventemitter3's default export, local binding `EventEmitter`) now marks the build as using events — detected in collect_modules via the lowered `class_name: "EventEmitter"` token (ctx.uses_event_emitter, new field in types.rs, threaded into the optimized-libs cache key) — and inserts "events" into native_module_imports so the full node:events wiring fires (well-known archive routing + bundled-events + external-events-construct), exactly as a real `import 'events'` would. 2. Symbol retention: perry-ext-events is a non-tokio wrapper, so the auto-optimize linker uses its prebuilt standalone staticlib. Thin-LTO was internalizing and dropping the crate's entire #[no_mangle] C API from that staticlib (verified: lto=false restores the symbols; a #[used] address-anchor does NOT beat thin-LTO here). Mirroring the UI crates' profile (strip=false + codegen-units=16) keeps the exported symbols in target/release/libperry_ext_events.a. REMAINING (3rd layer, unsolved): even with (1) and (2), the eventemitter3 repro still fails to link. The well-known events archive IS resolved and added to OptimizedLibs.well_known_libs (the 'routing events -> libperry_ext_events.a' message prints), and the archive now contains the symbols, yet for a compilePackages program the archive does NOT appear on the final cc link line (confirmed via a cc shim: 1 occurrence for a real `import 'events'` program, 0 for the eventemitter3 program). So well_known_libs is being dropped from the link composition specifically on the compilePackages path. That last hop still needs tracing. No changelog/version bump. --- Cargo.toml | 14 +++++++++++ .../src/commands/compile/collect_modules.rs | 23 +++++++++++++++++++ .../src/commands/compile/optimized_libs.rs | 3 ++- crates/perry/src/commands/compile/types.rs | 9 ++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 754a3e22c..4066144cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -200,6 +200,20 @@ codegen-units = 16 [profile.release.package.perry-stdlib] opt-level = "s" # Optimize for size in stdlib +# perry-ext-events exports #[no_mangle] EventEmitter FFI symbols +# (js_event_emitter_*) that generated native code calls directly when a program +# constructs a native EventEmitter by name (e.g. eventemitter3's default +# export). It is a non-tokio wrapper, so the auto-optimize linker uses its +# prebuilt standalone staticlib rather than rebuilding it alongside a crate that +# references those symbols. Thin-LTO then internalizes and drops the whole C API +# from that staticlib, so the program fails to link with undefined +# _js_event_emitter_* symbols (#5140). Mirror the UI crates' settings (which +# export #[no_mangle] extern "C" symbols for the same reason): more codegen +# units + no strip keeps the exported C API in the staticlib. +[profile.release.package.perry-ext-events] +strip = false +codegen-units = 16 + [workspace.package] version = "0.5.1171" edition = "2021" diff --git a/crates/perry/src/commands/compile/collect_modules.rs b/crates/perry/src/commands/compile/collect_modules.rs index 4d36a3bf7..1802f6739 100644 --- a/crates/perry/src/commands/compile/collect_modules.rs +++ b/crates/perry/src/commands/compile/collect_modules.rs @@ -1884,6 +1884,29 @@ fn collect_module_finish( } } + // #5140 — detect native `EventEmitter` construction. The `EventEmitter` + // builtin-new path (`new EventEmitter()` / `EventEmitterAsyncResource`, + // routed by the local binding NAME — so it fires for `eventemitter3`'s + // default export too, not only `node:events`) emits `js_event_emitter_*` + // calls. Those helpers live in perry-stdlib's `events` module behind + // `bundled-events`; a program that uses native EventEmitter without + // importing `node:events` otherwise fails to link with undefined + // `_js_event_emitter_*` symbols. Match the lowered `Expr::New` token. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("class_name: \"EventEmitter\"") + || hir_debug.contains("class_name: \"EventEmitterAsyncResource\"") + { + ctx.uses_event_emitter = true; + // Treat native EventEmitter use exactly like a `node:events` import + // so the full events wiring fires: the perry-ext-events well-known + // archive (which defines `js_event_emitter_*`) is linked, the + // `bundled-events` feature is enabled, and the construct dispatcher + // is registered (`external-events-construct`). Idempotent — a set. + ctx.native_module_imports.insert("events".to_string()); + } + } + // Detect WHATWG URL API usage. The `url`+`idna` host-canonicalization // engine (~195 KB) is gated behind `perry-runtime/url-engine`; Perry's URL // parsing is otherwise hand-rolled, so a program with no URL API links none diff --git a/crates/perry/src/commands/compile/optimized_libs.rs b/crates/perry/src/commands/compile/optimized_libs.rs index b9c62b206..61b015558 100644 --- a/crates/perry/src/commands/compile/optimized_libs.rs +++ b/crates/perry/src/commands/compile/optimized_libs.rs @@ -670,13 +670,14 @@ pub(super) fn build_optimized_libs( // Cheap djb2 — no need for the SipHash overhead. let target_str = target.unwrap_or("host"); let key_input = format!( - "{}|{}|{}|wasm={}|regex={}|temporal={}|url={}|norm={}|seg={}|diag={}|dgram={}|v={}", + "{}|{}|{}|wasm={}|regex={}|temporal={}|ee={}|url={}|norm={}|seg={}|diag={}|dgram={}|v={}", feature_arg, panic_abort_safe, target_str, ctx.needs_wasm_runtime, ctx.uses_regex, ctx.uses_temporal, + ctx.uses_event_emitter, ctx.uses_url, ctx.uses_string_normalize, ctx.uses_intl_segmenter, diff --git a/crates/perry/src/commands/compile/types.rs b/crates/perry/src/commands/compile/types.rs index 1f6b791cd..58cca8551 100644 --- a/crates/perry/src/commands/compile/types.rs +++ b/crates/perry/src/commands/compile/types.rs @@ -555,6 +555,14 @@ pub struct CompilationContext { /// implementation — so a program using `Date` but never `Temporal.*` links /// none of this. pub uses_temporal: bool, + /// Whether codegen routes any construction to the native `EventEmitter` + /// (a `new EventEmitter()` / `EventEmitterAsyncResource`, regardless of + /// where the binding was imported from — e.g. `eventemitter3`'s default + /// export, whose local name is `EventEmitter`). The `js_event_emitter_*` + /// helpers live in perry-stdlib's `events` module behind `bundled-events`; + /// without this flag a program that uses native EventEmitter but never + /// imports `node:events` fails to link (#5140). + pub uses_event_emitter: bool, /// Whether any TS module uses a WHATWG URL API (`new URL`, the hostname /// setter, `url.domainToASCII/Unicode`, legacy `url.resolve`, /// `URLSearchParams`, `URLPattern`). Gates `perry-runtime/url-engine` (the @@ -831,6 +839,7 @@ impl CompilationContext { uses_crypto_builtins: false, uses_regex: false, uses_temporal: false, + uses_event_emitter: false, uses_url: false, uses_string_normalize: false, uses_intl_segmenter: false, From 1faf256daa7047713a758afa7c7a5c5544c3230c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Mon, 15 Jun 2026 07:44:47 +0200 Subject: [PATCH 2/2] style(collect_modules): extract feature-detection into submodule (under 2000-line cap) The #5140 EventEmitter-detection block pushed collect_modules.rs to 2023 lines, over the 2000-line lint cap. Move the whole optional-feature detection sequence (fetch/wasm/crypto/regex/temporal/events/url/normalize/ diagnostics/dgram/readline/ioredis) into a new collect_modules/feature_detect.rs submodule and call it from collect_module_finish. --- .../src/commands/compile/collect_modules.rs | 228 +---------------- .../compile/collect_modules/feature_detect.rs | 241 ++++++++++++++++++ 2 files changed, 245 insertions(+), 224 deletions(-) create mode 100644 crates/perry/src/commands/compile/collect_modules/feature_detect.rs diff --git a/crates/perry/src/commands/compile/collect_modules.rs b/crates/perry/src/commands/compile/collect_modules.rs index 1802f6739..01be91422 100644 --- a/crates/perry/src/commands/compile/collect_modules.rs +++ b/crates/perry/src/commands/compile/collect_modules.rs @@ -33,13 +33,13 @@ use super::{ mod create_require_transform; mod crypto_ns; mod dynamic_glob; +mod feature_detect; mod native_addon; mod parse_error; #[cfg(test)] mod tests; use create_require_transform::transform_create_require_literal_requires; -use crypto_ns::module_uses_global_crypto_namespace; use dynamic_glob::expand_dynamic_import_glob; use native_addon::refuse_compile_package_native_addon; use parse_error::annotate_parse_error; @@ -1785,229 +1785,9 @@ fn collect_module_finish( transform_generators(&mut hir_module); } - // Detect fetch() usage — js_fetch_with_options lives in perry-stdlib - if hir_module.uses_fetch { - ctx.needs_stdlib = true; - ctx.uses_fetch = true; - } - - // Issue #76 — auto-link the wasmi host runtime when any module - // references `WebAssembly.*`. Without this the user has to remember - // `--enable-wasm-runtime`; with it the flag is only needed when they - // want to override the auto-detection (e.g. force-link for plugins - // they'll dlopen later). - if hir_module.uses_webassembly { - ctx.needs_wasm_runtime = true; - } - - // Detect crypto.* builtin usage (randomBytes/randomUUID/sha256/md5 used - // without `import crypto`). The runtime symbols live behind the - // perry-stdlib `crypto` Cargo feature, so we need to flip that on for - // auto-optimize. Text-grep the serialized Debug form for the established - // dedicated HIR variants. The global WebCrypto namespace path below uses - // a structured walk because it is an ordinary `PropertyGet`. - { - let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); - let uses_global_crypto_namespace = module_uses_global_crypto_namespace(&hir_module); - if hir_debug.contains("CryptoRandomBytes") - || hir_debug.contains("CryptoRandomUUID") - || hir_debug.contains("CryptoSha256") - || hir_debug.contains("CryptoMd5") - // Web Crypto API (issue #561). The four WebCrypto* HIR - // variants lower to extern calls into perry-stdlib's - // webcrypto module, gated behind the `crypto` feature. - // Without flipping the gate, auto-optimize would build - // perry-stdlib without `crypto` and link would fail with - // "_js_webcrypto_digest" undefined. - || hir_debug.contains("WebCryptoDigest") - || hir_debug.contains("WebCryptoImportKey") - || hir_debug.contains("WebCryptoSign") - || hir_debug.contains("WebCryptoVerify") - || hir_debug.contains("WebCryptoEncrypt") - || hir_debug.contains("WebCryptoDecrypt") - || hir_debug.contains("WebCryptoGenerateKey") - || hir_debug.contains("WebCryptoWrapKey") - || hir_debug.contains("WebCryptoUnwrapKey") - // `globalThis.crypto` / bare `crypto` now materializes the - // WebCrypto singleton. Its `randomUUID` property dispatches - // through perry-stdlib's crypto bridge when called via a - // runtime property read rather than the direct HIR variant. - || uses_global_crypto_namespace - { - ctx.needs_stdlib = true; - ctx.uses_crypto_builtins = true; - } - } - - // Detect whether this module needs the regex engine. The engine - // (`regex`/`fancy-regex`, ~1.2 MB) is gated behind `perry-runtime/ - // regex-engine` and the RegExp object's identity/display layer stays - // always-compiled, so a program that can never produce a RegExp at - // runtime links none of the matching machinery. A regex value can only - // exist if a regex literal / `RegExp` was evaluated, OR a regex-coercing - // string method (`.match`/`.matchAll`/`.search`, which build a RegExp from - // even a string arg per spec) ran, OR a glob API was used (the runtime - // compiles globs to regexes internally). We grep the serialized Debug form - // for the unambiguous HIR variant tokens and the generic-dispatch method - // names. Over-matching only over-includes the engine (a size, not a - // correctness, cost); the goal is zero false negatives. `eval` is - // non-functional in Perry so it can't create a regex at runtime. - { - let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); - if hir_debug.contains("RegExp") // RegExp / RegExpDynamic / RegExpTest / RegExpExec / RegExpEscape / RegExpReplaceFn / RegExpExec{Index,Groups} - || hir_debug.contains("StringMatch") // dedicated .match / .matchAll variants - || hir_debug.contains("PathMatchesGlob") - || hir_debug.contains("property: \"search\"") - || hir_debug.contains("property: \"match\"") - || hir_debug.contains("property: \"matchAll\"") - || hir_debug.contains("property: \"glob\"") - || hir_debug.contains("property: \"globSync\"") - { - ctx.uses_regex = true; - } - } - - // Detect TC39 `Temporal.*` usage. The engine (`temporal_rs` + transitive - // tz/calendar deps, ~580 KB) is gated behind `perry-runtime/temporal`; - // the Temporal cell's identity layer stays always-compiled, so a program - // that never touches `Temporal` links none of the date-math machinery. - // `Temporal` is a global namespace (like `Intl`/`Math`): accessing it (even - // when aliased, e.g. `const now = Temporal.Now`) materializes a - // `PropertyGet { property: "Temporal" }`, so we match that exact token - // rather than a bare `"Temporal"` substring — the latter also fires on - // user identifiers like `myTemporal` / `temporalLog`, spuriously enabling - // the engine and undercutting the size win. JS `Date` is a separate impl. - { - let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); - if hir_debug.contains("property: \"Temporal\"") { - ctx.uses_temporal = true; - } - } - - // #5140 — detect native `EventEmitter` construction. The `EventEmitter` - // builtin-new path (`new EventEmitter()` / `EventEmitterAsyncResource`, - // routed by the local binding NAME — so it fires for `eventemitter3`'s - // default export too, not only `node:events`) emits `js_event_emitter_*` - // calls. Those helpers live in perry-stdlib's `events` module behind - // `bundled-events`; a program that uses native EventEmitter without - // importing `node:events` otherwise fails to link with undefined - // `_js_event_emitter_*` symbols. Match the lowered `Expr::New` token. - { - let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); - if hir_debug.contains("class_name: \"EventEmitter\"") - || hir_debug.contains("class_name: \"EventEmitterAsyncResource\"") - { - ctx.uses_event_emitter = true; - // Treat native EventEmitter use exactly like a `node:events` import - // so the full events wiring fires: the perry-ext-events well-known - // archive (which defines `js_event_emitter_*`) is linked, the - // `bundled-events` feature is enabled, and the construct dispatcher - // is registered (`external-events-construct`). Idempotent — a set. - ctx.native_module_imports.insert("events".to_string()); - } - } - - // Detect WHATWG URL API usage. The `url`+`idna` host-canonicalization - // engine (~195 KB) is gated behind `perry-runtime/url-engine`; Perry's URL - // parsing is otherwise hand-rolled, so a program with no URL API links none - // of it. Web `URL`/`URLPattern`/`URLSearchParams` lower to dedicated `Url*` - // HIR variants (always `Url` + an uppercase letter, e.g. `UrlNew`, - // `UrlSet…`, `UrlSearchParams…`); `node:url` lowers to a - // `NativeMethodCall { module: "url", … }`. We match those exact tokens - // instead of a bare `"Url"`/`"URL"` substring, which would also fire on - // common camelCase identifiers like `baseUrl` / `imageUrl` and spuriously - // link the engine. Over-matching within the URL family (e.g. enabling for a - // URLSearchParams-only program that doesn't strictly need the host parser) - // is a benign size cost; the rule is zero false negatives. - { - let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); - if hir_debug.contains("UrlNew") - || hir_debug.contains("UrlParse") - || hir_debug.contains("UrlCanParse") - || hir_debug.contains("UrlPattern") - || hir_debug.contains("UrlGet") - || hir_debug.contains("UrlSet") - || hir_debug.contains("UrlInstance") - || hir_debug.contains("UrlSearchParams") - || hir_debug.contains("module: \"url\"") - { - ctx.uses_url = true; - } - } - - // Detect `String.prototype.normalize` (gates `unicode-normalization`, - // ~113 KB) and `Intl.Segmenter` (gates `unicode-segmentation`, ~73 KB). - // Both lower to method/namespace nodes carrying the name as a `property`, - // so we match the exact `property: ""` token. (A bare `"Segmenter"` - // substring would also fire on a user identifier named `Segmenter`.) - { - let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); - if hir_debug.contains("property: \"normalize\"") { - ctx.uses_string_normalize = true; - } - if hir_debug.contains("property: \"Segmenter\"") { - ctx.uses_intl_segmenter = true; - } - } - - // Detect heap-snapshot / `process.report` usage, the only user-facing APIs - // behind the `diagnostics` feature (~95 KB of cold-path JSON serializers + - // the `serde_json` pulled only by them). `v8.getHeapSnapshot` / - // `v8.writeHeapSnapshot` lower to `NativeMethodCall { method: "…" }`; - // `process.report.*` surfaces as `property: "report"`. The env-driven dev - // diagnostics (GC-diag / typed-feedback JSON) ride the same feature and - // degrade gracefully when off, so they need no detection. - { - let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); - if hir_debug.contains("method: \"getHeapSnapshot\"") - || hir_debug.contains("method: \"writeHeapSnapshot\"") - || hir_debug.contains("property: \"report\"") - { - ctx.uses_diagnostics = true; - } - // `node:dgram` (UDP) → gates `perry-runtime/mod-dgram` (~43 KB; dgram - // lowers to `NativeMethodCall { module: "dgram" }`, runtime-only so not - // in `native_module_imports`). - if hir_debug.contains("module: \"dgram\"") { - ctx.uses_dgram = true; - } - } - - // Detect readline usage via process.stdin raw/lifecycle methods. These - // don't go through an `import 'readline'` statement, so the import-based - // needs_stdlib detection above misses them. - { - let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); - if hir_debug.contains("ProcessStdinSetRawMode") - || hir_debug.contains("ProcessStdinOn") - || hir_debug.contains("ProcessStdinRemoveListener") - || hir_debug.contains("ProcessStdinLifecycle") - { - ctx.needs_stdlib = true; - ctx.native_module_imports.insert("readline".to_string()); - } - } - - // Detect ioredis usage (detected by class name, not import path) - let mut found_ioredis = false; - for (_, module_name, _) in &hir_module.exported_native_instances { - if module_name == "ioredis" { - found_ioredis = true; - break; - } - } - if !found_ioredis { - for (_, module_name, _) in &hir_module.exported_func_return_native_instances { - if module_name == "ioredis" { - found_ioredis = true; - break; - } - } - } - if found_ioredis { - ctx.needs_stdlib = true; - ctx.native_module_imports.insert("ioredis".to_string()); - } + // Set optional-feature gates (regex/temporal/url/crypto/events/etc.) so + // auto-optimize links only the runtime subsystems this module can reach. + feature_detect::detect_optional_feature_usage(ctx, &hir_module); let collected_after_insert = ctx.native_modules.len() + ctx.js_modules.len() + 1; progress.record(ProgressSnapshot { diff --git a/crates/perry/src/commands/compile/collect_modules/feature_detect.rs b/crates/perry/src/commands/compile/collect_modules/feature_detect.rs new file mode 100644 index 000000000..e77981ee6 --- /dev/null +++ b/crates/perry/src/commands/compile/collect_modules/feature_detect.rs @@ -0,0 +1,241 @@ +//! Optional-feature usage detection (#5140 / size-optimize). +//! +//! Extracted from `collect_module_finish` to keep `collect_modules.rs` +//! under the 2000-line cap. Each block text-greps a module's lowered HIR +//! (or inspects structured fields) to flip a `ctx.uses_*` / `needs_*` gate +//! so auto-optimize links only the runtime subsystems the program can +//! actually reach. Over-matching only over-includes a subsystem (a size, +//! not a correctness, cost); the rule throughout is zero false negatives. + +use super::crypto_ns::module_uses_global_crypto_namespace; +use crate::commands::compile::CompilationContext; + +/// Inspect a lowered module and set the optional-feature gates it needs. +pub(super) fn detect_optional_feature_usage( + ctx: &mut CompilationContext, + hir_module: &perry_hir::Module, +) { + // Detect fetch() usage — js_fetch_with_options lives in perry-stdlib + if hir_module.uses_fetch { + ctx.needs_stdlib = true; + ctx.uses_fetch = true; + } + + // Issue #76 — auto-link the wasmi host runtime when any module + // references `WebAssembly.*`. Without this the user has to remember + // `--enable-wasm-runtime`; with it the flag is only needed when they + // want to override the auto-detection (e.g. force-link for plugins + // they'll dlopen later). + if hir_module.uses_webassembly { + ctx.needs_wasm_runtime = true; + } + + // Detect crypto.* builtin usage (randomBytes/randomUUID/sha256/md5 used + // without `import crypto`). The runtime symbols live behind the + // perry-stdlib `crypto` Cargo feature, so we need to flip that on for + // auto-optimize. Text-grep the serialized Debug form for the established + // dedicated HIR variants. The global WebCrypto namespace path below uses + // a structured walk because it is an ordinary `PropertyGet`. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + let uses_global_crypto_namespace = module_uses_global_crypto_namespace(hir_module); + if hir_debug.contains("CryptoRandomBytes") + || hir_debug.contains("CryptoRandomUUID") + || hir_debug.contains("CryptoSha256") + || hir_debug.contains("CryptoMd5") + // Web Crypto API (issue #561). The four WebCrypto* HIR + // variants lower to extern calls into perry-stdlib's + // webcrypto module, gated behind the `crypto` feature. + // Without flipping the gate, auto-optimize would build + // perry-stdlib without `crypto` and link would fail with + // "_js_webcrypto_digest" undefined. + || hir_debug.contains("WebCryptoDigest") + || hir_debug.contains("WebCryptoImportKey") + || hir_debug.contains("WebCryptoSign") + || hir_debug.contains("WebCryptoVerify") + || hir_debug.contains("WebCryptoEncrypt") + || hir_debug.contains("WebCryptoDecrypt") + || hir_debug.contains("WebCryptoGenerateKey") + || hir_debug.contains("WebCryptoWrapKey") + || hir_debug.contains("WebCryptoUnwrapKey") + // `globalThis.crypto` / bare `crypto` now materializes the + // WebCrypto singleton. Its `randomUUID` property dispatches + // through perry-stdlib's crypto bridge when called via a + // runtime property read rather than the direct HIR variant. + || uses_global_crypto_namespace + { + ctx.needs_stdlib = true; + ctx.uses_crypto_builtins = true; + } + } + + // Detect whether this module needs the regex engine. The engine + // (`regex`/`fancy-regex`, ~1.2 MB) is gated behind `perry-runtime/ + // regex-engine` and the RegExp object's identity/display layer stays + // always-compiled, so a program that can never produce a RegExp at + // runtime links none of the matching machinery. A regex value can only + // exist if a regex literal / `RegExp` was evaluated, OR a regex-coercing + // string method (`.match`/`.matchAll`/`.search`, which build a RegExp from + // even a string arg per spec) ran, OR a glob API was used (the runtime + // compiles globs to regexes internally). We grep the serialized Debug form + // for the unambiguous HIR variant tokens and the generic-dispatch method + // names. Over-matching only over-includes the engine (a size, not a + // correctness, cost); the goal is zero false negatives. `eval` is + // non-functional in Perry so it can't create a regex at runtime. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("RegExp") // RegExp / RegExpDynamic / RegExpTest / RegExpExec / RegExpEscape / RegExpReplaceFn / RegExpExec{Index,Groups} + || hir_debug.contains("StringMatch") // dedicated .match / .matchAll variants + || hir_debug.contains("PathMatchesGlob") + || hir_debug.contains("property: \"search\"") + || hir_debug.contains("property: \"match\"") + || hir_debug.contains("property: \"matchAll\"") + || hir_debug.contains("property: \"glob\"") + || hir_debug.contains("property: \"globSync\"") + { + ctx.uses_regex = true; + } + } + + // Detect TC39 `Temporal.*` usage. The engine (`temporal_rs` + transitive + // tz/calendar deps, ~580 KB) is gated behind `perry-runtime/temporal`; + // the Temporal cell's identity layer stays always-compiled, so a program + // that never touches `Temporal` links none of the date-math machinery. + // `Temporal` is a global namespace (like `Intl`/`Math`): accessing it (even + // when aliased, e.g. `const now = Temporal.Now`) materializes a + // `PropertyGet { property: "Temporal" }`, so we match that exact token + // rather than a bare `"Temporal"` substring — the latter also fires on + // user identifiers like `myTemporal` / `temporalLog`, spuriously enabling + // the engine and undercutting the size win. JS `Date` is a separate impl. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("property: \"Temporal\"") { + ctx.uses_temporal = true; + } + } + + // #5140 — detect native `EventEmitter` construction. The `EventEmitter` + // builtin-new path (`new EventEmitter()` / `EventEmitterAsyncResource`, + // routed by the local binding NAME — so it fires for `eventemitter3`'s + // default export too, not only `node:events`) emits `js_event_emitter_*` + // calls. Those helpers live in perry-stdlib's `events` module behind + // `bundled-events`; a program that uses native EventEmitter without + // importing `node:events` otherwise fails to link with undefined + // `_js_event_emitter_*` symbols. Match the lowered `Expr::New` token. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("class_name: \"EventEmitter\"") + || hir_debug.contains("class_name: \"EventEmitterAsyncResource\"") + { + ctx.uses_event_emitter = true; + // Treat native EventEmitter use exactly like a `node:events` import + // so the full events wiring fires: the perry-ext-events well-known + // archive (which defines `js_event_emitter_*`) is linked, the + // `bundled-events` feature is enabled, and the construct dispatcher + // is registered (`external-events-construct`). Idempotent — a set. + ctx.native_module_imports.insert("events".to_string()); + } + } + + // Detect WHATWG URL API usage. The `url`+`idna` host-canonicalization + // engine (~195 KB) is gated behind `perry-runtime/url-engine`; Perry's URL + // parsing is otherwise hand-rolled, so a program with no URL API links none + // of it. Web `URL`/`URLPattern`/`URLSearchParams` lower to dedicated `Url*` + // HIR variants (always `Url` + an uppercase letter, e.g. `UrlNew`, + // `UrlSet…`, `UrlSearchParams…`); `node:url` lowers to a + // `NativeMethodCall { module: "url", … }`. We match those exact tokens + // instead of a bare `"Url"`/`"URL"` substring, which would also fire on + // common camelCase identifiers like `baseUrl` / `imageUrl` and spuriously + // link the engine. Over-matching within the URL family (e.g. enabling for a + // URLSearchParams-only program that doesn't strictly need the host parser) + // is a benign size cost; the rule is zero false negatives. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("UrlNew") + || hir_debug.contains("UrlParse") + || hir_debug.contains("UrlCanParse") + || hir_debug.contains("UrlPattern") + || hir_debug.contains("UrlGet") + || hir_debug.contains("UrlSet") + || hir_debug.contains("UrlInstance") + || hir_debug.contains("UrlSearchParams") + || hir_debug.contains("module: \"url\"") + { + ctx.uses_url = true; + } + } + + // Detect `String.prototype.normalize` (gates `unicode-normalization`, + // ~113 KB) and `Intl.Segmenter` (gates `unicode-segmentation`, ~73 KB). + // Both lower to method/namespace nodes carrying the name as a `property`, + // so we match the exact `property: ""` token. (A bare `"Segmenter"` + // substring would also fire on a user identifier named `Segmenter`.) + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("property: \"normalize\"") { + ctx.uses_string_normalize = true; + } + if hir_debug.contains("property: \"Segmenter\"") { + ctx.uses_intl_segmenter = true; + } + } + + // Detect heap-snapshot / `process.report` usage, the only user-facing APIs + // behind the `diagnostics` feature (~95 KB of cold-path JSON serializers + + // the `serde_json` pulled only by them). `v8.getHeapSnapshot` / + // `v8.writeHeapSnapshot` lower to `NativeMethodCall { method: "…" }`; + // `process.report.*` surfaces as `property: "report"`. The env-driven dev + // diagnostics (GC-diag / typed-feedback JSON) ride the same feature and + // degrade gracefully when off, so they need no detection. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("method: \"getHeapSnapshot\"") + || hir_debug.contains("method: \"writeHeapSnapshot\"") + || hir_debug.contains("property: \"report\"") + { + ctx.uses_diagnostics = true; + } + // `node:dgram` (UDP) → gates `perry-runtime/mod-dgram` (~43 KB; dgram + // lowers to `NativeMethodCall { module: "dgram" }`, runtime-only so not + // in `native_module_imports`). + if hir_debug.contains("module: \"dgram\"") { + ctx.uses_dgram = true; + } + } + + // Detect readline usage via process.stdin raw/lifecycle methods. These + // don't go through an `import 'readline'` statement, so the import-based + // needs_stdlib detection above misses them. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("ProcessStdinSetRawMode") + || hir_debug.contains("ProcessStdinOn") + || hir_debug.contains("ProcessStdinRemoveListener") + || hir_debug.contains("ProcessStdinLifecycle") + { + ctx.needs_stdlib = true; + ctx.native_module_imports.insert("readline".to_string()); + } + } + + // Detect ioredis usage (detected by class name, not import path) + let mut found_ioredis = false; + for (_, module_name, _) in &hir_module.exported_native_instances { + if module_name == "ioredis" { + found_ioredis = true; + break; + } + } + if !found_ioredis { + for (_, module_name, _) in &hir_module.exported_func_return_native_instances { + if module_name == "ioredis" { + found_ioredis = true; + break; + } + } + } + if found_ioredis { + ctx.needs_stdlib = true; + ctx.native_module_imports.insert("ioredis".to_string()); + } +}