Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,20 @@ codegen-units = 16
[profile.release.package.perry-stdlib]
opt-level = "s" # Optimize for size in stdlib

# perry-ext-events exports #[no_mangle] EventEmitter FFI symbols
# (js_event_emitter_*) that generated native code calls directly when a program
# constructs a native EventEmitter by name (e.g. eventemitter3's default
# export). It is a non-tokio wrapper, so the auto-optimize linker uses its
# prebuilt standalone staticlib rather than rebuilding it alongside a crate that
# references those symbols. Thin-LTO then internalizes and drops the whole C API
# from that staticlib, so the program fails to link with undefined
# _js_event_emitter_* symbols (#5140). Mirror the UI crates' settings (which
# export #[no_mangle] extern "C" symbols for the same reason): more codegen
# units + no strip keeps the exported C API in the staticlib.
[profile.release.package.perry-ext-events]
strip = false
codegen-units = 16

[workspace.package]
version = "0.5.1171"
edition = "2021"
Expand Down
205 changes: 4 additions & 201 deletions crates/perry/src/commands/compile/collect_modules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ use super::{
mod create_require_transform;
mod crypto_ns;
mod dynamic_glob;
mod feature_detect;
mod native_addon;
mod parse_error;
#[cfg(test)]
mod tests;

use create_require_transform::transform_create_require_literal_requires;
use crypto_ns::module_uses_global_crypto_namespace;
use dynamic_glob::expand_dynamic_import_glob;
use native_addon::refuse_compile_package_native_addon;
use parse_error::annotate_parse_error;
Expand Down Expand Up @@ -1785,206 +1785,9 @@ fn collect_module_finish(
transform_generators(&mut hir_module);
}

// Detect fetch() usage — js_fetch_with_options lives in perry-stdlib
if hir_module.uses_fetch {
ctx.needs_stdlib = true;
ctx.uses_fetch = true;
}

// Issue #76 — auto-link the wasmi host runtime when any module
// references `WebAssembly.*`. Without this the user has to remember
// `--enable-wasm-runtime`; with it the flag is only needed when they
// want to override the auto-detection (e.g. force-link for plugins
// they'll dlopen later).
if hir_module.uses_webassembly {
ctx.needs_wasm_runtime = true;
}

// Detect crypto.* builtin usage (randomBytes/randomUUID/sha256/md5 used
// without `import crypto`). The runtime symbols live behind the
// perry-stdlib `crypto` Cargo feature, so we need to flip that on for
// auto-optimize. Text-grep the serialized Debug form for the established
// dedicated HIR variants. The global WebCrypto namespace path below uses
// a structured walk because it is an ordinary `PropertyGet`.
{
let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions);
let uses_global_crypto_namespace = module_uses_global_crypto_namespace(&hir_module);
if hir_debug.contains("CryptoRandomBytes")
|| hir_debug.contains("CryptoRandomUUID")
|| hir_debug.contains("CryptoSha256")
|| hir_debug.contains("CryptoMd5")
// Web Crypto API (issue #561). The four WebCrypto* HIR
// variants lower to extern calls into perry-stdlib's
// webcrypto module, gated behind the `crypto` feature.
// Without flipping the gate, auto-optimize would build
// perry-stdlib without `crypto` and link would fail with
// "_js_webcrypto_digest" undefined.
|| hir_debug.contains("WebCryptoDigest")
|| hir_debug.contains("WebCryptoImportKey")
|| hir_debug.contains("WebCryptoSign")
|| hir_debug.contains("WebCryptoVerify")
|| hir_debug.contains("WebCryptoEncrypt")
|| hir_debug.contains("WebCryptoDecrypt")
|| hir_debug.contains("WebCryptoGenerateKey")
|| hir_debug.contains("WebCryptoWrapKey")
|| hir_debug.contains("WebCryptoUnwrapKey")
// `globalThis.crypto` / bare `crypto` now materializes the
// WebCrypto singleton. Its `randomUUID` property dispatches
// through perry-stdlib's crypto bridge when called via a
// runtime property read rather than the direct HIR variant.
|| uses_global_crypto_namespace
{
ctx.needs_stdlib = true;
ctx.uses_crypto_builtins = true;
}
}

// Detect whether this module needs the regex engine. The engine
// (`regex`/`fancy-regex`, ~1.2 MB) is gated behind `perry-runtime/
// regex-engine` and the RegExp object's identity/display layer stays
// always-compiled, so a program that can never produce a RegExp at
// runtime links none of the matching machinery. A regex value can only
// exist if a regex literal / `RegExp` was evaluated, OR a regex-coercing
// string method (`.match`/`.matchAll`/`.search`, which build a RegExp from
// even a string arg per spec) ran, OR a glob API was used (the runtime
// compiles globs to regexes internally). We grep the serialized Debug form
// for the unambiguous HIR variant tokens and the generic-dispatch method
// names. Over-matching only over-includes the engine (a size, not a
// correctness, cost); the goal is zero false negatives. `eval` is
// non-functional in Perry so it can't create a regex at runtime.
{
let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions);
if hir_debug.contains("RegExp") // RegExp / RegExpDynamic / RegExpTest / RegExpExec / RegExpEscape / RegExpReplaceFn / RegExpExec{Index,Groups}
|| hir_debug.contains("StringMatch") // dedicated .match / .matchAll variants
|| hir_debug.contains("PathMatchesGlob")
|| hir_debug.contains("property: \"search\"")
|| hir_debug.contains("property: \"match\"")
|| hir_debug.contains("property: \"matchAll\"")
|| hir_debug.contains("property: \"glob\"")
|| hir_debug.contains("property: \"globSync\"")
{
ctx.uses_regex = true;
}
}

// Detect TC39 `Temporal.*` usage. The engine (`temporal_rs` + transitive
// tz/calendar deps, ~580 KB) is gated behind `perry-runtime/temporal`;
// the Temporal cell's identity layer stays always-compiled, so a program
// that never touches `Temporal` links none of the date-math machinery.
// `Temporal` is a global namespace (like `Intl`/`Math`): accessing it (even
// when aliased, e.g. `const now = Temporal.Now`) materializes a
// `PropertyGet { property: "Temporal" }`, so we match that exact token
// rather than a bare `"Temporal"` substring — the latter also fires on
// user identifiers like `myTemporal` / `temporalLog`, spuriously enabling
// the engine and undercutting the size win. JS `Date` is a separate impl.
{
let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions);
if hir_debug.contains("property: \"Temporal\"") {
ctx.uses_temporal = true;
}
}

// Detect WHATWG URL API usage. The `url`+`idna` host-canonicalization
// engine (~195 KB) is gated behind `perry-runtime/url-engine`; Perry's URL
// parsing is otherwise hand-rolled, so a program with no URL API links none
// of it. Web `URL`/`URLPattern`/`URLSearchParams` lower to dedicated `Url*`
// HIR variants (always `Url` + an uppercase letter, e.g. `UrlNew`,
// `UrlSet…`, `UrlSearchParams…`); `node:url` lowers to a
// `NativeMethodCall { module: "url", … }`. We match those exact tokens
// instead of a bare `"Url"`/`"URL"` substring, which would also fire on
// common camelCase identifiers like `baseUrl` / `imageUrl` and spuriously
// link the engine. Over-matching within the URL family (e.g. enabling for a
// URLSearchParams-only program that doesn't strictly need the host parser)
// is a benign size cost; the rule is zero false negatives.
{
let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions);
if hir_debug.contains("UrlNew")
|| hir_debug.contains("UrlParse")
|| hir_debug.contains("UrlCanParse")
|| hir_debug.contains("UrlPattern")
|| hir_debug.contains("UrlGet")
|| hir_debug.contains("UrlSet")
|| hir_debug.contains("UrlInstance")
|| hir_debug.contains("UrlSearchParams")
|| hir_debug.contains("module: \"url\"")
{
ctx.uses_url = true;
}
}

// Detect `String.prototype.normalize` (gates `unicode-normalization`,
// ~113 KB) and `Intl.Segmenter` (gates `unicode-segmentation`, ~73 KB).
// Both lower to method/namespace nodes carrying the name as a `property`,
// so we match the exact `property: "<name>"` token. (A bare `"Segmenter"`
// substring would also fire on a user identifier named `Segmenter`.)
{
let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions);
if hir_debug.contains("property: \"normalize\"") {
ctx.uses_string_normalize = true;
}
if hir_debug.contains("property: \"Segmenter\"") {
ctx.uses_intl_segmenter = true;
}
}

// Detect heap-snapshot / `process.report` usage, the only user-facing APIs
// behind the `diagnostics` feature (~95 KB of cold-path JSON serializers +
// the `serde_json` pulled only by them). `v8.getHeapSnapshot` /
// `v8.writeHeapSnapshot` lower to `NativeMethodCall { method: "…" }`;
// `process.report.*` surfaces as `property: "report"`. The env-driven dev
// diagnostics (GC-diag / typed-feedback JSON) ride the same feature and
// degrade gracefully when off, so they need no detection.
{
let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions);
if hir_debug.contains("method: \"getHeapSnapshot\"")
|| hir_debug.contains("method: \"writeHeapSnapshot\"")
|| hir_debug.contains("property: \"report\"")
{
ctx.uses_diagnostics = true;
}
// `node:dgram` (UDP) → gates `perry-runtime/mod-dgram` (~43 KB; dgram
// lowers to `NativeMethodCall { module: "dgram" }`, runtime-only so not
// in `native_module_imports`).
if hir_debug.contains("module: \"dgram\"") {
ctx.uses_dgram = true;
}
}

// Detect readline usage via process.stdin raw/lifecycle methods. These
// don't go through an `import 'readline'` statement, so the import-based
// needs_stdlib detection above misses them.
{
let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions);
if hir_debug.contains("ProcessStdinSetRawMode")
|| hir_debug.contains("ProcessStdinOn")
|| hir_debug.contains("ProcessStdinRemoveListener")
|| hir_debug.contains("ProcessStdinLifecycle")
{
ctx.needs_stdlib = true;
ctx.native_module_imports.insert("readline".to_string());
}
}

// Detect ioredis usage (detected by class name, not import path)
let mut found_ioredis = false;
for (_, module_name, _) in &hir_module.exported_native_instances {
if module_name == "ioredis" {
found_ioredis = true;
break;
}
}
if !found_ioredis {
for (_, module_name, _) in &hir_module.exported_func_return_native_instances {
if module_name == "ioredis" {
found_ioredis = true;
break;
}
}
}
if found_ioredis {
ctx.needs_stdlib = true;
ctx.native_module_imports.insert("ioredis".to_string());
}
// Set optional-feature gates (regex/temporal/url/crypto/events/etc.) so
// auto-optimize links only the runtime subsystems this module can reach.
feature_detect::detect_optional_feature_usage(ctx, &hir_module);

let collected_after_insert = ctx.native_modules.len() + ctx.js_modules.len() + 1;
progress.record(ProgressSnapshot {
Expand Down
Loading