From fc2bb4c10417980058dcac914c8c8f3a2d8229e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sun, 14 Jun 2026 19:58:02 +0200 Subject: [PATCH] feat(compile): per-app feature gating for regex/Temporal/URL/normalize/segmenter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gate five heavy runtime subsystems behind opt-in cargo features so a compiled binary links only what the program actually uses. Mirrors the existing `wasm-host` mechanism: the compiler detects usage in HIR and forwards `perry-runtime/` to the auto-optimize runtime build. regex-engine regex + fancy-regex ~1.2 MB temporal temporal_rs + tz/calendar deps ~580 KB (JS Date is a separate impl) url-engine url + idna + transitive percent_encoding ~195 KB string-normalize unicode-normalization ~113 KB intl-segmenter unicode-segmentation ~73 KB A `console.log` hello-world drops ~2.5 MB (all five gates off); each engine auto-enables on use with identical behavior. No speed trade-off — this is pure dead-code elimination for code paths a given program never reaches. Detection (collect_modules) sets ctx.uses_* flags by grepping the HIR Debug form for the relevant tokens (regex literal / RegExp / .match/.matchAll/.search / glob; Temporal; URL/URLSearchParams/URLPattern; .normalize; Intl.Segmenter), forwarded + baked into the auto-optimize cache key in optimized_libs. Each engine keeps its identity/display layer always compiled (so value formatting / JSON / instanceof on non-engine values still work); the engine code and the engine branches inside always-linked dispatchers are #[cfg]-gated with the existing non-engine path as the fallback. The workspace perry-runtime dependency is set to default-features=false so dependency edges (perry-stdlib, ext crates, the perry binary) don't force the heavy default features back on during the auto-optimize --no-default-features build (cargo unifies features additively). Plain `cargo build` / `cargo test --workspace` and the shipped prebuilt still build perry-runtime as a *selected* package, so its own `default` applies and they keep every engine. --- Cargo.toml | 12 ++- crates/perry-runtime/Cargo.toml | 58 ++++++++++-- .../perry-runtime/src/builtins/arithmetic.rs | 1 + .../perry-runtime/src/builtins/formatting.rs | 32 +++++-- crates/perry-runtime/src/builtins/globals.rs | 1 + crates/perry-runtime/src/date.rs | 1 + crates/perry-runtime/src/fs/dir_glob_watch.rs | 60 ++++++++++++- crates/perry-runtime/src/intl.rs | 23 +++++ crates/perry-runtime/src/json/stringify.rs | 2 + crates/perry-runtime/src/object/assert.rs | 15 ++++ .../src/object/class_registry.rs | 1 + .../perry-runtime/src/object/field_get_set.rs | 1 + .../perry-runtime/src/object/global_this.rs | 60 +++++++++++++ .../src/object/iterator_prototypes.rs | 1 + crates/perry-runtime/src/object/mod.rs | 1 + .../src/object/native_call_method.rs | 63 +++++++++---- crates/perry-runtime/src/object/object_ops.rs | 1 + .../src/object/regex_proto_thunks.rs | 5 ++ crates/perry-runtime/src/path.rs | 21 ++++- crates/perry-runtime/src/regex.rs | 73 +++++++++++++-- crates/perry-runtime/src/regex/match_all.rs | 4 +- crates/perry-runtime/src/regex/replace_fn.rs | 11 ++- crates/perry-runtime/src/string/compare.rs | 21 +++-- crates/perry-runtime/src/string/mod.rs | 1 + crates/perry-runtime/src/string/split.rs | 4 + crates/perry-runtime/src/symbol.rs | 1 + crates/perry-runtime/src/temporal/mod.rs | 46 ++++++++++ crates/perry-runtime/src/url/mod.rs | 13 ++- crates/perry-runtime/src/url/node_compat.rs | 28 +++++- crates/perry-runtime/src/url/url_class.rs | 33 ++++--- crates/perry-runtime/src/value/dyn_index.rs | 1 + crates/perry-runtime/src/value/to_string.rs | 2 + .../src/commands/compile/collect_modules.rs | 88 +++++++++++++++++++ .../src/commands/compile/optimized_libs.rs | 35 +++++++- crates/perry/src/commands/compile/types.rs | 35 ++++++++ scripts/check_file_size.sh | 11 +++ 36 files changed, 694 insertions(+), 72 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a775640a3e..754a3e22ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -283,7 +283,17 @@ perry-dispatch = { path = "crates/perry-dispatch" } # Cargo.toml, and external users never need perry-runtime in their # Cargo graph (Perry's compiler driver links `libperry_runtime.a` # into the final binary directly). -perry-runtime = { path = "crates/perry-runtime", version = "0.5.1011" } +# `default-features = false` so dependency EDGES (perry-stdlib, ext crates, the +# perry binary) don't force perry-runtime's heavy default features (`full`, +# `regex-engine`, `temporal`, `url-engine`, `string-normalize`, `intl-segmenter`) +# back on. The auto-optimize build selects perry-runtime with +# `--no-default-features` and re-adds only what a given program needs (see +# optimized_libs.rs), so an edge requesting defaults would defeat that (cargo +# unifies features additively). A plain `cargo build`/`cargo test --workspace` +# still builds perry-runtime as a *selected* package, so its own `default` +# applies — tests and the shipped prebuilt keep every engine. This mirrors why +# `wasm-host` must stay out of `default`, generalized to all heavy features. +perry-runtime = { path = "crates/perry-runtime", version = "0.5.1011", default-features = false } perry-ffi = { path = "crates/perry-ffi", version = "0.5.1011" } perry-ext-dotenv = { path = "crates/perry-ext-dotenv" } perry-ext-nanoid = { path = "crates/perry-ext-nanoid" } diff --git a/crates/perry-runtime/Cargo.toml b/crates/perry-runtime/Cargo.toml index 4079bd7eca..3d3f52b7ce 100644 --- a/crates/perry-runtime/Cargo.toml +++ b/crates/perry-runtime/Cargo.toml @@ -9,7 +9,49 @@ description = "Runtime library: GC, JSValue, builtins" crate-type = ["rlib", "staticlib"] [features] -default = ["full"] +# `default` keeps the shipped prebuilt libperry_runtime.a and plain +# `cargo build`/test full-featured. The auto-optimize path builds with +# `--no-default-features` and re-adds only the features a given program +# actually needs (see optimized_libs.rs), so the heavy subsystems below +# (regex engine, Temporal, URL/IDNA, normalize, segmenter) are *opt-in per +# app* and absent from binaries that never use them. +default = ["full", "regex-engine", "temporal", "url-engine", "string-normalize", "intl-segmenter"] +# The user's regular-expression engine (`regex` + `fancy-regex`, ~1.2 MB of +# DFA/NFA machinery). A program that never evaluates a regex literal, `RegExp`, +# a regex-coercing string method, or a glob API can't produce a RegExp at +# runtime, so the compiler leaves this off and the engine is never linked. The +# RegExp object's identity/display layer (header, `is_regex_pointer`, `toString`) +# stays always compiled, so console-formatting / value-to-string paths keep +# working with no engine present. +regex-engine = ["dep:regex", "dep:fancy-regex"] +# The TC39 `Temporal.*` API (`temporal_rs` + its transitive tz/calendar deps: +# jiff-tzdb, icu_calendar, timezone_provider, calendrical_calculations — +# ~580 KB). Independent of JS `Date` (which has its own `date.rs` impl), so a +# program that uses `Date` but never `Temporal.*` links none of this. The +# compiler enables it on `Temporal` usage; the Temporal cell's identity layer +# (is_temporal_value / GC class-id range) stays compiled so value +# formatting/JSON/instanceof on non-Temporal values keep working. +temporal = ["dep:temporal_rs"] +# WHATWG URL host canonicalization: the `url` crate's full host parser (IPv4 +# shorthand / numeric hosts) + `idna` (Unicode domain ↔ punycode), ~195 KB +# incl. the transitive `percent_encoding`. Perry's URL parsing is otherwise +# hand-rolled (url/parse.rs), so the crates are only reached from 4 host/IDNA +# sites behind URL APIs (`new URL`, hostname setter, url.domainTo*, legacy +# url.resolve). A program that never uses a URL API links none of it; each gated +# site has a benign hand-rolled fallback. (`unicode-normalization`/`-segmentation` +# are NOT gated here — they're shared with Intl / String.normalize.) +url-engine = ["dep:url", "dep:idna"] +# `String.prototype.normalize` (NFC/NFD/NFKC/NFKD) via `unicode-normalization` +# (~113 KB of Unicode decomposition/composition tables). Only `string/compare.rs` +# uses it; a program that never calls `.normalize()` links none of it. (This is +# the *old* `unicode-normalization` crate, distinct from `idna`'s `icu_normalizer`, +# so it's independent of `url-engine`.) +string-normalize = ["dep:unicode-normalization"] +# `Intl.Segmenter` (UAX #29 grapheme/word/sentence segmentation) via +# `unicode-segmentation` (~73 KB) — the grapheme path backs string-width@7+/ +# wrap-ansi@9+ (and thus ink). Only `intl.rs` uses it; a program that never +# constructs an `Intl.Segmenter` links none of it. +intl-segmenter = ["dep:unicode-segmentation"] # `full` only opt-ins the small Node-API helpers (os.hostname / os.homedir). # `postgres`, `redis`, `whoami` were previously listed here but were either # unimported (postgres, whoami) or only used by a now-deleted `redis_client.rs` @@ -50,11 +92,11 @@ thiserror.workspace = true anyhow.workspace = true libc = "0.2" rand = "0.8" -regex = "1" +regex = { version = "1", optional = true } # Taffy — flexbox / grid layout engine for the perry/tui module # (#358 Phase 3). Same crate Bevy and Dioxus use; pure Rust, no FFI. taffy = { version = "0.7", default-features = false, features = ["std", "flexbox", "taffy_tree"] } -fancy-regex = "0.18" +fancy-regex = { version = "0.18", optional = true } itoa = "1" ryu = "1" base64 = "0.22" @@ -67,16 +109,16 @@ lazy_static = "1.4" # our code is binding glue per type. `compiled_data` vendors the IANA tz DB # hermetically (needed by ZonedDateTime / Now), `sys-local` adds the # current-system-zone lookup used by Temporal.Now.*ISO() with no argument. -temporal_rs = { version = "0.2.3", default-features = false, features = ["std", "compiled_data", "sys-local"] } +temporal_rs = { version = "0.2.3", default-features = false, features = ["std", "compiled_data", "sys-local"], optional = true } serde_json = "1" -unicode-normalization = "0.1" +unicode-normalization = { version = "0.1", optional = true } # #4877: extended grapheme-cluster / word / sentence segmentation backing # Intl.Segmenter (the grapheme path is what string-width@7+/wrap-ansi@9+ use, # so it gates ink). Pure-Rust UAX #29 implementation, already in our lock graph. -unicode-segmentation = "1" -idna = "1" -url = "2" +unicode-segmentation = { version = "1", optional = true } +idna = { version = "1", optional = true } +url = { version = "2", optional = true } # #4911: real node:dns resolve*/reverse. hickory-proto provides DNS wire-format # encode/decode + record types; we drive it synchronously over a blocking # std::net::UdpSocket (TCP fallback on truncation), so — unlike hickory-resolver diff --git a/crates/perry-runtime/src/builtins/arithmetic.rs b/crates/perry-runtime/src/builtins/arithmetic.rs index 67e7b0a603..78fd07a777 100644 --- a/crates/perry-runtime/src/builtins/arithmetic.rs +++ b/crates/perry-runtime/src/builtins/arithmetic.rs @@ -243,6 +243,7 @@ unsafe fn rel_to_primitive(value: f64) -> f64 { // `TypeError` for every `Temporal.*` value (the spec bans relational ordering // of Temporal values: `plainDate < plainDate` throws). Without this the cell // fell through to the `DefaultString` arm and compared ISO strings silently. + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_value(value) { return crate::temporal::dispatch::call_method(value, "valueOf", &[]); } diff --git a/crates/perry-runtime/src/builtins/formatting.rs b/crates/perry-runtime/src/builtins/formatting.rs index f0767736b6..838600cf1a 100644 --- a/crates/perry-runtime/src/builtins/formatting.rs +++ b/crates/perry-runtime/src/builtins/formatting.rs @@ -725,6 +725,28 @@ unsafe fn date_inspect_string(value: f64) -> String { .to_string() } +/// `util.inspect` arm for a Temporal cell: `Temporal.X ` (or +/// `[object Object]` if the cell can't be read). Returns `None` when `addr` is +/// not a Temporal cell, so the caller's `else if let Some(..)` chain falls +/// through. Cfg-paired: with the Temporal engine gated off no cell can exist, so +/// the off twin is a constant `None` (and doesn't reference the gated module). +#[cfg(feature = "temporal")] +fn temporal_inspect_arm(addr: usize, value: f64) -> Option { + if crate::temporal::is_temporal_cell_addr(addr) { + Some( + crate::temporal::temporal_inspect_string(value) + .unwrap_or_else(|| "[object Object]".to_string()), + ) + } else { + None + } +} + +#[cfg(not(feature = "temporal"))] +fn temporal_inspect_arm(_addr: usize, _value: f64) -> Option { + None +} + /// Print multiple values from an array (console.log with spread support) /// Takes a pointer to an ArrayHeader containing f64 values /// Helper function to format a JSValue as a string (for spread arrays) @@ -819,12 +841,11 @@ pub(crate) fn format_jsvalue(value: f64, depth: usize) -> String { // `Invalid Date`). Handle before the GC-header object dispatch // below, which would deref the 8-byte cell as an ObjectHeader. date_inspect_string(value) - } else if crate::temporal::is_temporal_cell_addr(ptr as usize) { + } else if let Some(s) = temporal_inspect_arm(ptr as usize, value) { // Temporal (#4686): `util.inspect` prints `Temporal.Duration // `. Handle before the GC-header object dispatch (the cell // is smaller than an ObjectHeader). - crate::temporal::temporal_inspect_string(value) - .unwrap_or_else(|| "[object Object]".to_string()) + s } else if crate::value::addr_class::is_handle_band(ptr as usize) { // Refs #421: Web Fetch (and other) handles are NaN-boxed // POINTER_TAG values whose payload is a small registry id, NOT @@ -1560,10 +1581,9 @@ fn format_jsvalue_for_json(value: f64, depth: usize) -> String { // unquoted (or `Invalid Date`), not the 8-byte cell deref'd // as an object. date_inspect_string(value) - } else if crate::temporal::is_temporal_cell_addr(ptr as usize) { + } else if let Some(s) = temporal_inspect_arm(ptr as usize, value) { // Temporal value inside an inspected object → `Temporal.X `. - crate::temporal::temporal_inspect_string(value) - .unwrap_or_else(|| "[object Object]".to_string()) + s } else if crate::value::addr_class::is_handle_band(ptr as usize) { "[object Object]".to_string() } else if crate::symbol::is_registered_symbol(ptr as usize) diff --git a/crates/perry-runtime/src/builtins/globals.rs b/crates/perry-runtime/src/builtins/globals.rs index 7a1e1b2daf..56a0ad355e 100644 --- a/crates/perry-runtime/src/builtins/globals.rs +++ b/crates/perry-runtime/src/builtins/globals.rs @@ -751,6 +751,7 @@ fn js_structured_clone_inner(value: f64) -> f64 { // arena slot with GC_TYPE_OBJECT but tracked in // REGEX_POINTERS). Clone by reading source/flags and // building a fresh one via js_regexp_new. + #[cfg(feature = "regex-engine")] if crate::regex::is_regex_pointer(ptr as *const u8) { let re_ptr = ptr as *const crate::regex::RegExpHeader; let src = crate::regex::js_regexp_get_source(re_ptr); diff --git a/crates/perry-runtime/src/date.rs b/crates/perry-runtime/src/date.rs index dd2ba7ed9d..dd0af1da15 100644 --- a/crates/perry-runtime/src/date.rs +++ b/crates/perry-runtime/src/date.rs @@ -1059,6 +1059,7 @@ pub extern "C" fn js_date_value_of(timestamp: f64) -> f64 { // hard `TypeError` (the spec bans implicit numeric coercion / ordering), so // route a Temporal receiver to its brand dispatch, which throws — rather // than returning the opaque cell as a pseudo-Date timestamp. + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_value(timestamp) { return crate::temporal::dispatch::call_method(timestamp, "valueOf", &[]); } diff --git a/crates/perry-runtime/src/fs/dir_glob_watch.rs b/crates/perry-runtime/src/fs/dir_glob_watch.rs index c8d5b4bfed..1c7c15631b 100644 --- a/crates/perry-runtime/src/fs/dir_glob_watch.rs +++ b/crates/perry-runtime/src/fs/dir_glob_watch.rs @@ -5,7 +5,13 @@ use std::collections::{BTreeMap, HashMap, VecDeque}; use std::fs; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; -use std::path::{Path, PathBuf}; +use std::path::Path; +// `PathBuf` is only named by the regex-engine-gated glob helpers +// (`pathbuf_to_slashes`); the cp path at the bottom uses the fully-qualified +// `std::path::PathBuf`, so the bare import is gated to avoid an unused-import +// warning when the engine is off. +#[cfg(feature = "regex-engine")] +use std::path::PathBuf; use std::sync::Once; use crate::closure::{ @@ -15,6 +21,12 @@ use crate::closure::{ use super::*; +/// Compiled exclude-pattern type for `fs.glob`. Backed by `fancy_regex::Regex`. +/// Only referenced by the regex-engine-gated glob machinery (`FsGlobOptions`), +/// so it's defined only when that engine is linked. +#[cfg(feature = "regex-engine")] +type GlobExcludeRegex = fancy_regex::Regex; + /// `fs.opendirSync(path)` — codegen emits a direct call to the unmangled /// `js_fs_opendir_sync` symbol (runtime_decls/strings.rs). Without `#[no_mangle]` /// the symbol is Rust-mangled and the linker can't resolve it, so any program @@ -77,6 +89,9 @@ fn js_fs_opendir_value_inner(path_value: f64, include_path: bool) -> Result, + exclude_patterns: Vec, exclude_fn: Option<*const ClosureHeader>, } +#[cfg(feature = "regex-engine")] struct GlobCandidate { actual_path: String, kind: DirentKind, @@ -106,16 +123,19 @@ fn normalize_slashes(path: &str) -> String { path.replace('\\', "/") } +#[cfg(feature = "regex-engine")] fn pathbuf_to_slashes(path: PathBuf) -> String { normalize_slashes(&path.to_string_lossy()) } +#[cfg(feature = "regex-engine")] fn current_dir_slashes() -> String { std::env::current_dir() .map(pathbuf_to_slashes) .unwrap_or_else(|_| ".".to_string()) } +#[cfg(feature = "regex-engine")] fn trim_trailing_slashes(path: &str) -> &str { let trimmed = path.trim_end_matches('/'); if trimmed.is_empty() { @@ -125,6 +145,7 @@ fn trim_trailing_slashes(path: &str) -> &str { } } +#[cfg(feature = "regex-engine")] fn join_slash(base: &str, child: &str) -> String { if child.is_empty() || child == "." { return normalize_slashes(base); @@ -142,6 +163,7 @@ fn join_slash(base: &str, child: &str) -> String { } } +#[cfg(feature = "regex-engine")] fn absolutize_slash(path: &str) -> String { let normalized = normalize_slashes(path); if Path::new(&normalized).is_absolute() { @@ -151,6 +173,7 @@ fn absolutize_slash(path: &str) -> String { } } +#[cfg(feature = "regex-engine")] fn relative_to_base(path: &str, base: &str) -> String { let path = normalize_slashes(path); let base = normalize_slashes(base); @@ -166,6 +189,7 @@ fn relative_to_base(path: &str, base: &str) -> String { path.strip_prefix(&prefix).unwrap_or(&path).to_string() } +#[cfg(feature = "regex-engine")] fn parent_display_for_relative(cwd_display: &str, rel_parent: &str) -> String { if rel_parent == "." || rel_parent.is_empty() { if cwd_display.is_empty() { @@ -192,6 +216,7 @@ fn decode_string_value(value: f64) -> Option { ) } +#[cfg(feature = "regex-engine")] fn decode_string_or_file_url(value: f64) -> Option { if let Some(s) = decode_string_value(value) { return Some(s); @@ -279,10 +304,11 @@ fn glob_patterns_from_value_result(pattern_value: f64) -> Result, f6 Err(glob_pattern_string_error("patterns", pattern_value)) } +#[cfg(feature = "regex-engine")] fn compile_exclude_patterns_result( exclude_value: f64, cwd_actual: &str, -) -> Result, f64> { +) -> Result, f64> { let Some(arr) = array_ptr_from_value(exclude_value) else { let message = format!( "The \"options.exclude\" property must be of type function or string[]. Received {}", @@ -320,6 +346,7 @@ fn compile_exclude_patterns_result( Ok(patterns) } +#[cfg(feature = "regex-engine")] fn glob_options_from_value_result(options_value: f64) -> Result { if let Some(err) = validate::object_options_type_error_value("options", options_value) { return Err(err); @@ -373,6 +400,7 @@ fn glob_options_from_value_result(options_value: f64) -> Result Vec { let chars: Vec = input.chars().collect(); let mut parts = Vec::new(); @@ -414,6 +443,7 @@ fn split_top_level(input: &str, separator: char) -> Vec { parts } +#[cfg(feature = "regex-engine")] fn take_balanced(chars: &[char], pos: &mut usize, open: char, close: char) -> Option { let mut depth = 1i32; let start = *pos; @@ -442,6 +472,7 @@ fn take_balanced(chars: &[char], pos: &mut usize, open: char, close: char) -> Op None } +#[cfg(feature = "regex-engine")] fn parse_char_class(chars: &[char], pos: &mut usize) -> String { let start = pos.saturating_sub(1); let mut class = String::from("["); @@ -471,12 +502,14 @@ fn parse_char_class(chars: &[char], pos: &mut usize) -> String { regex::escape(&literal) } +#[cfg(feature = "regex-engine")] fn glob_fragment_to_regex(pattern: &str) -> Option { let chars: Vec = pattern.chars().collect(); let mut pos = 0usize; parse_glob_chars(&chars, &mut pos) } +#[cfg(feature = "regex-engine")] fn parse_glob_chars(chars: &[char], pos: &mut usize) -> Option { let mut out = String::new(); while *pos < chars.len() { @@ -531,12 +564,14 @@ fn parse_glob_chars(chars: &[char], pos: &mut usize) -> Option { Some(out) } +#[cfg(feature = "regex-engine")] pub(crate) fn glob_regex_from_pattern(pattern: &str) -> Option { let normalized = normalize_slashes(pattern); let body = glob_fragment_to_regex(&normalized)?; fancy_regex::Regex::new(&format!("^{body}$")).ok() } +#[cfg(feature = "regex-engine")] fn first_glob_meta(pattern: &str) -> usize { let chars: Vec<(usize, char)> = pattern.char_indices().collect(); for (idx, (byte_idx, ch)) in chars.iter().enumerate() { @@ -550,6 +585,7 @@ fn first_glob_meta(pattern: &str) -> usize { pattern.len() } +#[cfg(feature = "regex-engine")] pub(crate) fn glob_search_root(pattern: &str) -> String { let normalized = normalize_slashes(pattern); let first_meta = first_glob_meta(&normalized); @@ -561,6 +597,7 @@ pub(crate) fn glob_search_root(pattern: &str) -> String { } } +#[cfg(feature = "regex-engine")] fn walk_paths_for_glob(dir: &Path, follow_symlinks: bool, out: &mut Vec) { let Ok(entries) = fs::read_dir(dir) else { return; @@ -583,6 +620,7 @@ fn walk_paths_for_glob(dir: &Path, follow_symlinks: bool, out: &mut Vec bool { options .exclude_patterns @@ -631,6 +670,7 @@ fn excluded_by_patterns(path: &str, options: &FsGlobOptions) -> bool { .any(|re| re.is_match(path).unwrap_or(false)) } +#[cfg(feature = "regex-engine")] fn excluded_by_function(entry: &FsGlobMatch, options: &FsGlobOptions) -> bool { let Some(callback) = options.exclude_fn else { return false; @@ -651,6 +691,7 @@ fn glob_entry_value(entry: &FsGlobMatch, with_file_types: bool) -> f64 { } } +#[cfg(feature = "regex-engine")] fn run_fs_glob_result(pattern_value: f64, options_value: f64) -> Result { let patterns = glob_patterns_from_value_result(pattern_value)?; let options = glob_options_from_value_result(options_value)?; @@ -704,6 +745,19 @@ fn run_fs_glob_result(pattern_value: f64, options_value: f64) -> Result Result { + glob_patterns_from_value_result(pattern_value)?; + Ok(FsGlobRun { + matches: Vec::new(), + with_file_types: false, + }) +} + fn run_fs_glob(pattern_value: f64, options_value: f64) -> FsGlobRun { match run_fs_glob_result(pattern_value, options_value) { Ok(run) => run, diff --git a/crates/perry-runtime/src/intl.rs b/crates/perry-runtime/src/intl.rs index 9da829dea9..d20baed2f7 100644 --- a/crates/perry-runtime/src/intl.rs +++ b/crates/perry-runtime/src/intl.rs @@ -14,6 +14,7 @@ use crate::object::{ use crate::string::{js_string_from_bytes, str_bytes_from_jsvalue}; use crate::value::{js_jsvalue_to_string, js_nanbox_pointer, JSValue}; use crate::StringHeader; +#[cfg(feature = "intl-segmenter")] use unicode_segmentation::UnicodeSegmentation; const KIND_NUMBER: &str = "NumberFormat"; @@ -566,6 +567,7 @@ fn normalize_granularity(value: Option) -> String { /// A segment is "word-like" when it contains at least one alphanumeric /// character — i.e. it is not pure whitespace/punctuation. This mirrors the /// `isWordLike` flag the spec attaches to word-granularity segments. +#[cfg(feature = "intl-segmenter")] fn segment_is_word_like(segment: &str) -> bool { segment.chars().any(|c| c.is_alphanumeric()) } @@ -601,6 +603,7 @@ fn build_segments(granularity: &str, value: f64) -> f64 { let input_value = string_value(&input); let mut arr = js_array_alloc(0); let mut index = 0u32; + #[cfg(feature = "intl-segmenter")] match granularity { "word" => { for segment in input.split_word_bounds() { @@ -631,6 +634,26 @@ fn build_segments(granularity: &str, value: f64) -> f64 { } } } + // Segmenter engine gated off: no UAX #29 tables. Fall back to per-code-point + // segmentation (one segment per `char`) for every granularity — enough to + // keep iteration / spread working without the segmentation crate. + #[cfg(not(feature = "intl-segmenter"))] + { + // Preserve the `isWordLike` field for word granularity so the record + // shape matches the engine-enabled path (this block is dead in practice + // — the compiler enables `intl-segmenter` on any `Intl.Segmenter` use). + let is_word = granularity == "word"; + for segment in input.chars().map(|c| c.to_string()).collect::>() { + let word_like = if is_word { + Some(segment.chars().any(|c| c.is_alphanumeric())) + } else { + None + }; + let record = make_segment_record(&segment, index, input_value, word_like); + arr = js_array_push_f64(arr, record); + index += utf16_len(&segment); + } + } js_nanbox_pointer(arr as i64) } diff --git a/crates/perry-runtime/src/json/stringify.rs b/crates/perry-runtime/src/json/stringify.rs index f96d3b1dbd..d21e9c69be 100644 --- a/crates/perry-runtime/src/json/stringify.rs +++ b/crates/perry-runtime/src/json/stringify.rs @@ -697,6 +697,7 @@ pub(crate) unsafe fn stringify_value(value: f64, type_hint: u32, buf: &mut Strin // Temporal (#4686): `JSON.stringify(temporal)` calls `toJSON`, which // returns the canonical ISO string — emitted quoted. Detect before the // generic object path (the cell is not an enumerable ObjectHeader). + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_cell_addr(ptr as usize) { if let Some(s) = crate::temporal::temporal_iso_string(value) { write_escaped_string(buf, &s); @@ -926,6 +927,7 @@ pub(crate) unsafe fn stringify_value_depth( } // Temporal (#4686): `toJSON` → quoted ISO string. See the matching // branch in `stringify_value`. + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_cell_addr(ptr as usize) { if let Some(s) = crate::temporal::temporal_iso_string(value) { write_escaped_string(buf, &s); diff --git a/crates/perry-runtime/src/object/assert.rs b/crates/perry-runtime/src/object/assert.rs index 578b1ed26c..2c973b8228 100644 --- a/crates/perry-runtime/src/object/assert.rs +++ b/crates/perry-runtime/src/object/assert.rs @@ -84,6 +84,7 @@ fn regexp_ptr(pattern: f64) -> Option<*const crate::regex::RegExpHeader> { Some(ptr as *const crate::regex::RegExpHeader) } +#[cfg(feature = "regex-engine")] fn regex_test_value(pattern: f64, input: f64) -> Option { let re = regexp_ptr(pattern)?; let input_string = value_to_string(input); @@ -92,12 +93,26 @@ fn regex_test_value(pattern: f64, input: f64) -> Option { Some(crate::regex::js_regexp_test(re, input_ptr) != 0) } +/// Regex engine gated off: no RegExp value can exist, so `expected` is never a +/// RegExp — report "not a RegExp matcher" so callers fall to their non-regex +/// comparison path. +#[cfg(not(feature = "regex-engine"))] +fn regex_test_value(_pattern: f64, _input: f64) -> Option { + None +} + +#[cfg(feature = "regex-engine")] fn regex_test_string(re: *const crate::regex::RegExpHeader, input: f64) -> bool { let input_ptr = crate::value::js_get_string_pointer_unified(input) as *const crate::StringHeader; !input_ptr.is_null() && crate::regex::js_regexp_test(re, input_ptr) != 0 } +#[cfg(not(feature = "regex-engine"))] +fn regex_test_string(_re: *const crate::regex::RegExpHeader, _input: f64) -> bool { + false +} + fn validate_regexp_argument(regexp: f64) -> *const crate::regex::RegExpHeader { if let Some(re) = regexp_ptr(regexp) { return re; diff --git a/crates/perry-runtime/src/object/class_registry.rs b/crates/perry-runtime/src/object/class_registry.rs index 770e66dfdf..f9bb8d51b2 100644 --- a/crates/perry-runtime/src/object/class_registry.rs +++ b/crates/perry-runtime/src/object/class_registry.rs @@ -2156,6 +2156,7 @@ pub unsafe extern "C" fn js_new_function_construct( return crate::value::js_nanbox_pointer(error as i64); } // #2889: `new (rebound RegExp)(pattern, flags)`. + #[cfg(feature = "regex-engine")] "RegExp" => { let pattern = if args.is_empty() { std::ptr::null_mut() diff --git a/crates/perry-runtime/src/object/field_get_set.rs b/crates/perry-runtime/src/object/field_get_set.rs index 23594f5572..37f5a5045a 100644 --- a/crates/perry-runtime/src/object/field_get_set.rs +++ b/crates/perry-runtime/src/object/field_get_set.rs @@ -3191,6 +3191,7 @@ pub extern "C" fn js_object_get_field_by_name( // bare value is rare; the `value.method()` call form is handled in // `js_native_call_method`). `obj` may be NaN-boxed (top16 0x7FFD) or a // raw-I64 pointer (top16 0). + #[cfg(feature = "temporal")] { let bits = obj as u64; let top16 = bits >> 48; diff --git a/crates/perry-runtime/src/object/global_this.rs b/crates/perry-runtime/src/object/global_this.rs index 1b502d4a5c..4871803189 100644 --- a/crates/perry-runtime/src/object/global_this.rs +++ b/crates/perry-runtime/src/object/global_this.rs @@ -3129,6 +3129,7 @@ fn install_math_namespace(ns_obj: *mut ObjectHeader) { // (`from`, `compare`) are installed on the constructor closure with call-arity // 0 so every argument lands in the rest array the thunk reads. +#[cfg(feature = "temporal")] extern "C" fn temporal_duration_ctor_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3136,6 +3137,7 @@ extern "C" fn temporal_duration_ctor_thunk( crate::temporal::duration::construct(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_duration_from_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3143,6 +3145,7 @@ extern "C" fn temporal_duration_from_thunk( crate::temporal::duration::from_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_duration_compare_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3150,6 +3153,7 @@ extern "C" fn temporal_duration_compare_thunk( crate::temporal::duration::compare_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_instant_ctor_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3157,6 +3161,7 @@ extern "C" fn temporal_instant_ctor_thunk( crate::temporal::instant::construct(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_instant_from_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3164,6 +3169,7 @@ extern "C" fn temporal_instant_from_thunk( crate::temporal::instant::from_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_instant_from_epoch_ms_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3171,6 +3177,7 @@ extern "C" fn temporal_instant_from_epoch_ms_thunk( crate::temporal::instant::from_epoch_milliseconds_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_instant_from_epoch_ns_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3178,6 +3185,7 @@ extern "C" fn temporal_instant_from_epoch_ns_thunk( crate::temporal::instant::from_epoch_nanoseconds_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_instant_compare_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3185,6 +3193,7 @@ extern "C" fn temporal_instant_compare_thunk( crate::temporal::instant::compare_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_date_ctor_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3192,6 +3201,7 @@ extern "C" fn temporal_plain_date_ctor_thunk( crate::temporal::plain_date::construct(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_date_from_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3199,6 +3209,7 @@ extern "C" fn temporal_plain_date_from_thunk( crate::temporal::plain_date::from_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_date_compare_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3206,6 +3217,7 @@ extern "C" fn temporal_plain_date_compare_thunk( crate::temporal::plain_date::compare_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_time_ctor_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3213,6 +3225,7 @@ extern "C" fn temporal_plain_time_ctor_thunk( crate::temporal::plain_time::construct(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_time_from_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3220,6 +3233,7 @@ extern "C" fn temporal_plain_time_from_thunk( crate::temporal::plain_time::from_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_time_compare_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3227,6 +3241,7 @@ extern "C" fn temporal_plain_time_compare_thunk( crate::temporal::plain_time::compare_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_date_time_ctor_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3234,6 +3249,7 @@ extern "C" fn temporal_plain_date_time_ctor_thunk( crate::temporal::plain_date_time::construct(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_date_time_from_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3241,6 +3257,7 @@ extern "C" fn temporal_plain_date_time_from_thunk( crate::temporal::plain_date_time::from_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_date_time_compare_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3248,6 +3265,7 @@ extern "C" fn temporal_plain_date_time_compare_thunk( crate::temporal::plain_date_time::compare_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_year_month_ctor_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3255,6 +3273,7 @@ extern "C" fn temporal_plain_year_month_ctor_thunk( crate::temporal::plain_year_month::construct(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_year_month_from_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3262,6 +3281,7 @@ extern "C" fn temporal_plain_year_month_from_thunk( crate::temporal::plain_year_month::from_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_year_month_compare_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3269,6 +3289,7 @@ extern "C" fn temporal_plain_year_month_compare_thunk( crate::temporal::plain_year_month::compare_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_month_day_ctor_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3276,6 +3297,7 @@ extern "C" fn temporal_plain_month_day_ctor_thunk( crate::temporal::plain_month_day::construct(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_plain_month_day_from_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3283,6 +3305,7 @@ extern "C" fn temporal_plain_month_day_from_thunk( crate::temporal::plain_month_day::from_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_zoned_date_time_ctor_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3290,6 +3313,7 @@ extern "C" fn temporal_zoned_date_time_ctor_thunk( crate::temporal::zoned_date_time::construct(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_zoned_date_time_from_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3297,6 +3321,7 @@ extern "C" fn temporal_zoned_date_time_from_thunk( crate::temporal::zoned_date_time::from_static(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_zoned_date_time_compare_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3306,6 +3331,7 @@ extern "C" fn temporal_zoned_date_time_compare_thunk( // Temporal.Now is a namespace (not a constructor) — method thunks on a plain // object, installed like Math. Each reads the host clock fresh. +#[cfg(feature = "temporal")] extern "C" fn temporal_now_instant_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3313,6 +3339,7 @@ extern "C" fn temporal_now_instant_thunk( crate::temporal::now::instant(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_now_timezone_id_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3320,6 +3347,7 @@ extern "C" fn temporal_now_timezone_id_thunk( crate::temporal::now::time_zone_id(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_now_plain_date_time_iso_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3327,6 +3355,7 @@ extern "C" fn temporal_now_plain_date_time_iso_thunk( crate::temporal::now::plain_date_time_iso(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_now_plain_date_iso_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3334,6 +3363,7 @@ extern "C" fn temporal_now_plain_date_iso_thunk( crate::temporal::now::plain_date_iso(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_now_plain_time_iso_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3341,6 +3371,7 @@ extern "C" fn temporal_now_plain_time_iso_thunk( crate::temporal::now::plain_time_iso(&global_this_rest_array_values(rest)) } +#[cfg(feature = "temporal")] extern "C" fn temporal_now_zoned_date_time_iso_thunk( _closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3349,6 +3380,7 @@ extern "C" fn temporal_now_zoned_date_time_iso_thunk( } /// Build the `Temporal.Now` namespace object (a plain object of method thunks). +#[cfg(feature = "temporal")] fn build_temporal_now_namespace() -> f64 { let now_obj = js_object_alloc(0, 0); if now_obj.is_null() { @@ -3395,6 +3427,7 @@ fn build_temporal_now_namespace() -> f64 { /// instance (`__tname` / `__tkind`); the receiver comes from `IMPLICIT_THIS`. /// Throws `TypeError` on a non-Temporal or wrong-brand receiver (the getter /// `branding.js` tests: `blank.call(undefined)`, `years.call({})`, …). +#[cfg(feature = "temporal")] extern "C" fn temporal_proto_getter_thunk(closure: *const crate::closure::ClosureHeader) -> f64 { let recv = super::js_implicit_this_get(); let cl = closure as usize; @@ -3418,6 +3451,7 @@ extern "C" fn temporal_proto_getter_thunk(closure: *const crate::closure::Closur /// used when a prototype method is invoked through indirection /// (`Temporal.Duration.prototype.add.call(d, x)`); the normal `d.add(x)` path /// is the brand arm in `js_native_call_method`. +#[cfg(feature = "temporal")] extern "C" fn temporal_proto_method_thunk( closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3442,6 +3476,7 @@ extern "C" fn temporal_proto_method_thunk( /// Install a brand-checked accessor getter (`{ get, set: undefined, /// enumerable: false, configurable: true }`) on a Temporal prototype. +#[cfg(feature = "temporal")] fn install_temporal_proto_getter(proto: *mut ObjectHeader, kind: u8, name: &str) { let c = crate::closure::js_closure_alloc(temporal_proto_getter_thunk as *const u8, 0); if c.is_null() { @@ -3470,6 +3505,7 @@ fn install_temporal_proto_getter(proto: *mut ObjectHeader, kind: u8, name: &str) /// Install a brand-checked method (`{ writable: true, enumerable: false, /// configurable: true }`, non-constructable, with spec `.name`/`.length`) on a /// Temporal prototype. +#[cfg(feature = "temporal")] fn install_temporal_proto_method(proto: *mut ObjectHeader, kind: u8, name: &str, spec_length: u32) { let c = crate::closure::js_closure_alloc(temporal_proto_method_thunk as *const u8, 0); if c.is_null() { @@ -3510,6 +3546,7 @@ fn install_temporal_proto_method(proto: *mut ObjectHeader, kind: u8, name: &str, /// Build and wire a `Temporal..prototype` object: a real object carrying /// the type's accessor getters and methods (for reflection + indirect `.call`), /// linked to its constructor via `ctor.prototype` / `proto.constructor`. +#[cfg(feature = "temporal")] fn install_temporal_prototype( ctor: *mut crate::closure::ClosureHeader, kind: u8, @@ -3549,6 +3586,7 @@ fn install_temporal_prototype( ); } +#[cfg(feature = "temporal")] fn install_temporal_constructor( ns_obj: *mut ObjectHeader, name: &str, @@ -3576,6 +3614,7 @@ fn install_temporal_constructor( /// Read a built-in closure's installed `name` dynamic prop as a Rust `String` /// (used by the shared Temporal prototype thunks to recover which getter / /// method they back). Empty string if absent. +#[cfg(feature = "temporal")] fn temporal_closure_name(closure: *const crate::closure::ClosureHeader) -> String { let v = crate::closure::closure_get_dynamic_prop(closure as usize, "name"); if !JSValue::from_bits(v.to_bits()).is_string() { @@ -3596,6 +3635,7 @@ fn temporal_closure_name(closure: *const crate::closure::ClosureHeader) -> Strin /// for a Temporal prototype getter / method invoked on a non-branded `this` /// (the spec brand check). Used by the reflective `.call`/`.apply` paths; /// normal `zdt.foo()` dispatches via the brand arm and never reaches here. +#[cfg(feature = "temporal")] fn temporal_brand_type_error(type_name: &str, member: &str) -> ! { crate::object::throw_object_type_error( format!("{type_name}.prototype.{member} called on incompatible receiver").as_bytes(), @@ -3605,6 +3645,7 @@ fn temporal_brand_type_error(type_name: &str, member: &str) -> ! { /// Shared body for a `Temporal.ZonedDateTime.prototype` accessor getter invoked /// reflectively. Resolves `this` from `IMPLICIT_THIS`, brand-checks it is a /// `ZonedDateTime`, and returns the getter's value. +#[cfg(feature = "temporal")] extern "C" fn temporal_zdt_proto_getter_thunk( closure: *const crate::closure::ClosureHeader, ) -> f64 { @@ -3622,6 +3663,7 @@ extern "C" fn temporal_zdt_proto_getter_thunk( /// Shared body for a `Temporal.ZonedDateTime.prototype` method invoked /// reflectively (`.prototype.equals.call(zdt, …)`). Brand-checks `this` then /// dispatches to the per-type method router. +#[cfg(feature = "temporal")] extern "C" fn temporal_zdt_proto_method_thunk( closure: *const crate::closure::ClosureHeader, rest: f64, @@ -3638,6 +3680,7 @@ extern "C" fn temporal_zdt_proto_method_thunk( /// descriptor (`enumerable:false, configurable:true`, `set:undefined`) and the /// proper getter `name` (`"get "`) / `length` (0). Mirrors the RegExp /// prototype getter install. +#[cfg(feature = "temporal")] fn install_temporal_getter(proto: *mut ObjectHeader, prop: &str, func_ptr: *const u8) { unsafe { crate::closure::js_register_closure_arity(func_ptr, 0); @@ -3683,6 +3726,7 @@ fn install_temporal_getter(proto: *mut ObjectHeader, prop: &str, func_ptr: *cons /// These satisfy the reflective test262 cases (branding / prop-desc / length / /// name / not-a-constructor / builtin); ordinary `zdt.foo()` calls still /// dispatch via the Temporal brand arm and never touch this object. +#[cfg(feature = "temporal")] fn build_zoned_date_time_prototype() -> *mut ObjectHeader { let proto = js_object_alloc(0, 0); if proto.is_null() { @@ -3762,6 +3806,7 @@ fn build_zoned_date_time_prototype() -> *mut ObjectHeader { /// same-named user closure never matches). Used by `instanceof` to make /// `zdt instanceof Temporal.ZonedDateTime` resolve to `true` even though /// Temporal values dispatch via brand arms, not a real prototype chain. +#[cfg(feature = "temporal")] pub(crate) fn temporal_ctor_kind(type_ref: f64) -> Option { use crate::temporal::TemporalKind; let jv = JSValue::from_bits(type_ref.to_bits()); @@ -3817,7 +3862,16 @@ pub(crate) fn temporal_ctor_kind(type_ref: f64) -> Option Option { + None +} + /// `Temporal.PlainDate.prototype` accessor getters and method shapes (#4691). +#[cfg(feature = "temporal")] const PLAIN_DATE_GETTERS: &[&str] = &[ "calendarId", "era", @@ -3836,6 +3890,7 @@ const PLAIN_DATE_GETTERS: &[&str] = &[ "monthsInYear", "inLeapYear", ]; +#[cfg(feature = "temporal")] const PLAIN_DATE_METHODS: &[(&str, u32)] = &[ ("toPlainYearMonth", 0), ("toPlainMonthDay", 0), @@ -3855,6 +3910,7 @@ const PLAIN_DATE_METHODS: &[(&str, u32)] = &[ ]; /// `Temporal.PlainDateTime.prototype` accessor getters and method shapes (#4693). +#[cfg(feature = "temporal")] const PLAIN_DATE_TIME_GETTERS: &[&str] = &[ "calendarId", "era", @@ -3879,6 +3935,7 @@ const PLAIN_DATE_TIME_GETTERS: &[&str] = &[ "microsecond", "nanosecond", ]; +#[cfg(feature = "temporal")] const PLAIN_DATE_TIME_METHODS: &[(&str, u32)] = &[ ("with", 1), ("withPlainTime", 0), @@ -3898,6 +3955,7 @@ const PLAIN_DATE_TIME_METHODS: &[(&str, u32)] = &[ ("toPlainTime", 0), ]; +#[cfg(feature = "temporal")] fn install_temporal_namespace(ns_obj: *mut ObjectHeader) { if ns_obj.is_null() { return; @@ -4415,6 +4473,7 @@ fn install_temporal_namespace(ns_obj: *mut ObjectHeader) { /// Install the standard `from` (spec length 1) and `compare` (spec length 2) /// statics — both variadic with call-arity 0 — on a Temporal constructor. +#[cfg(feature = "temporal")] fn install_temporal_from_compare( ctor: *mut crate::closure::ClosureHeader, from_thunk: *const u8, @@ -4943,6 +5002,7 @@ pub(crate) fn populate_global_this_builtins(singleton: *mut ObjectHeader) { set_intrinsic_to_string_tag(ns_obj, "Atomics"); } "Intl" => crate::intl::install_intl_namespace(ns_obj), + #[cfg(feature = "temporal")] "Temporal" => { install_temporal_namespace(ns_obj); set_intrinsic_to_string_tag(ns_obj, "Temporal"); diff --git a/crates/perry-runtime/src/object/iterator_prototypes.rs b/crates/perry-runtime/src/object/iterator_prototypes.rs index 21d6144df8..862a612c79 100644 --- a/crates/perry-runtime/src/object/iterator_prototypes.rs +++ b/crates/perry-runtime/src/object/iterator_prototypes.rs @@ -75,6 +75,7 @@ unsafe fn dispatch_on_implicit_this(method: &str) -> f64 { crate::string::STRING_ITERATOR_CLASS_ID => { crate::string::dispatch_string_iterator_method(obj, method) } + #[cfg(feature = "regex-engine")] crate::regex::REGEXP_STRING_ITERATOR_CLASS_ID => { crate::regex::dispatch_regexp_string_iterator_method(obj, method) } diff --git a/crates/perry-runtime/src/object/mod.rs b/crates/perry-runtime/src/object/mod.rs index 6035b01c19..2ef0e04135 100644 --- a/crates/perry-runtime/src/object/mod.rs +++ b/crates/perry-runtime/src/object/mod.rs @@ -68,6 +68,7 @@ mod prototype_helpers; mod reflect_support; mod regex_proto_thunks; mod string_proto_thunks; +#[cfg(feature = "temporal")] mod temporal_proto; mod typed_array_define; mod typed_array_proto_thunks; diff --git a/crates/perry-runtime/src/object/native_call_method.rs b/crates/perry-runtime/src/object/native_call_method.rs index 8d729ab53a..0b119d1e5b 100644 --- a/crates/perry-runtime/src/object/native_call_method.rs +++ b/crates/perry-runtime/src/object/native_call_method.rs @@ -1698,6 +1698,7 @@ pub unsafe extern "C" fn js_native_call_method( // `Temporal.*` value is a NaN-boxed pointer to a custom cell with no // codegen fast-path, so every method call funnels through here. The router // throws `TypeError` for an unknown method name on a real Temporal receiver. + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_value(object) { let args = refreshed_args(); return crate::temporal::dispatch::call_method(object, method_name, &args); @@ -1968,6 +1969,7 @@ pub unsafe extern "C" fn js_native_call_method( // function result the codegen `Expr::RegExpTest` fast path can't see; without // this it throws `test is not a function`, breaking Hono `app.use('*', …)` // (#1731). The helper returns None for non-regex so generic dispatch resumes. + #[cfg(feature = "regex-engine")] if matches!(method_name, "test" | "exec" | "toString") && jsval.is_pointer() { let undef = f64::from_bits(crate::value::TAG_UNDEFINED); let arg0 = refreshed_args().first().copied().unwrap_or(undef); @@ -1980,6 +1982,7 @@ pub unsafe extern "C" fn js_native_call_method( // `RegExp.prototype.compile(pattern, flags)` (Annex B) re-initializes the // receiver in place. Needs both args, so it is dispatched here rather than // through the single-arg `dispatch_regex_receiver_method`. + #[cfg(feature = "regex-engine")] if method_name == "compile" && jsval.is_pointer() { let p = jsval.as_pointer::(); if crate::regex::is_regex_pointer(p) { @@ -2375,34 +2378,49 @@ pub unsafe extern "C" fn js_native_call_method( // function`) because no runtime arm handled `match`. "match" | "matchAll" => { // Missing arg ⇒ `undefined` (→ empty `/(?:)/` regex). - let pattern_val = + let _pattern_val = arg_at(0).unwrap_or_else(|| f64::from_bits(JSValue::undefined().bits())); - if method_name == "matchAll" { - let result_ptr = - crate::regex::js_string_match_all_value(s_ptr, pattern_val); + #[cfg(feature = "regex-engine")] + { + let pattern_val = _pattern_val; + if method_name == "matchAll" { + let result_ptr = + crate::regex::js_string_match_all_value(s_ptr, pattern_val); + if result_ptr.is_null() { + return f64::from_bits(JSValue::null().bits()); + } + return f64::from_bits(JSValue::pointer(result_ptr as *mut u8).bits()); + } + // Coerce a non-RegExp arg via `RegExpCreate(ToString(arg))` + // (a string pattern / `undefined` / `{ toString }` object), + // matching the codegen path. + let result_ptr = crate::regex::js_string_match_value(s_ptr, pattern_val); if result_ptr.is_null() { return f64::from_bits(JSValue::null().bits()); } return f64::from_bits(JSValue::pointer(result_ptr as *mut u8).bits()); } - // Coerce a non-RegExp arg via `RegExpCreate(ToString(arg))` - // (a string pattern / `undefined` / `{ toString }` object), - // matching the codegen path. - let result_ptr = crate::regex::js_string_match_value(s_ptr, pattern_val); - if result_ptr.is_null() { - return f64::from_bits(JSValue::null().bits()); - } - return f64::from_bits(JSValue::pointer(result_ptr as *mut u8).bits()); + // Engine gated off: a string `.match`/`.matchAll` can only + // be reached by a program that uses regex (which forces the + // engine on), so this is dead — `null` (no match) is benign. + #[cfg(not(feature = "regex-engine"))] + return f64::from_bits(JSValue::null().bits()); } "search" => { - let regex_val = + let _regex_val = arg_at(0).unwrap_or_else(|| f64::from_bits(JSValue::undefined().bits())); - let i32_v = crate::regex::js_string_search_value(s_ptr, regex_val); - // Return a RAW `f64` (not NaN-boxed INT32_TAG): a boxed-int - // result fails `aString.search(x) === 5` strict-equality - // against a plain number literal. Mirrors the `indexOf` - // arm's `as f64` convention. - return i32_v as f64; + #[cfg(feature = "regex-engine")] + { + let i32_v = crate::regex::js_string_search_value(s_ptr, _regex_val); + // Return a RAW `f64` (not NaN-boxed INT32_TAG): a boxed-int + // result fails `aString.search(x) === 5` strict-equality + // against a plain number literal. Mirrors the `indexOf` + // arm's `as f64` convention. + return i32_v as f64; + } + // Engine gated off: dead (see `match` arm) — `-1` (not found). + #[cfg(not(feature = "regex-engine"))] + return -1.0_f64; } // Refs #421 — common string methods on any-typed receivers. // Hono's compiled JS (and most npm packages with stripped TS @@ -2668,11 +2686,15 @@ pub unsafe extern "C" fn js_native_call_method( .unwrap_or(std::ptr::null()) }; if let (Some(pat_val), Some(repl_val)) = (arg_at(0), arg_at(1)) { + // `pat_jsv` is only consulted by the regex-engine-gated + // branch below (RegExp pattern + callback replacer). + #[cfg_attr(not(feature = "regex-engine"), allow(unused_variables))] let pat_jsv = JSValue::from_bits(pat_val.to_bits()); let repl_jsv = JSValue::from_bits(repl_val.to_bits()); if repl_jsv.is_pointer() { let repl_raw = (repl_val.to_bits() & 0x0000_FFFF_FFFF_FFFF) as usize; if crate::closure::is_closure_ptr(repl_raw) { + #[cfg(feature = "regex-engine")] if pat_jsv.is_pointer() { let regex_ptr = pat_jsv.as_pointer::(); @@ -2713,6 +2735,7 @@ pub unsafe extern "C" fn js_native_call_method( } } // Detect RegExp pattern: NaN-boxed pointer to a RegExpHeader. + #[cfg(feature = "regex-engine")] if let Some(v) = arg_at(0) { let jsv = JSValue::from_bits(v.to_bits()); if jsv.is_pointer() { @@ -3528,6 +3551,7 @@ pub unsafe extern "C" fn js_native_call_method( method_name, ); } + #[cfg(feature = "regex-engine")] if (*obj).class_id == crate::regex::REGEXP_STRING_ITERATOR_CLASS_ID { return crate::regex::dispatch_regexp_string_iterator_method( obj as *mut ObjectHeader, @@ -4071,6 +4095,7 @@ pub unsafe extern "C" fn js_native_call_method( method_name, ); } + #[cfg(feature = "regex-engine")] if (*obj).class_id == crate::regex::REGEXP_STRING_ITERATOR_CLASS_ID { return crate::regex::dispatch_regexp_string_iterator_method( obj as *mut ObjectHeader, diff --git a/crates/perry-runtime/src/object/object_ops.rs b/crates/perry-runtime/src/object/object_ops.rs index 55d4d586ce..43d79fddb9 100644 --- a/crates/perry-runtime/src/object/object_ops.rs +++ b/crates/perry-runtime/src/object/object_ops.rs @@ -2459,6 +2459,7 @@ pub extern "C" fn js_object_get_prototype_of(obj_value: f64) -> f64 { // and crash. The reflective prototype is reachable directly as // `Temporal..prototype`, so for a cell receiver return `null` rather // than faulting on the cell. + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_value(obj_value) { return f64::from_bits(TAG_NULL); } diff --git a/crates/perry-runtime/src/object/regex_proto_thunks.rs b/crates/perry-runtime/src/object/regex_proto_thunks.rs index 0dc9eb6b2a..6fdd264d27 100644 --- a/crates/perry-runtime/src/object/regex_proto_thunks.rs +++ b/crates/perry-runtime/src/object/regex_proto_thunks.rs @@ -230,6 +230,7 @@ fn install_getter(proto_obj: *mut ObjectHeader, name: &str, func_ptr: *const u8) /// (a registered RegExp; `RegExp.prototype` itself throws), `ToString`s the /// argument, and runs the match. Reflective: `RegExp.prototype.exec.call(re, s)` /// and `re.exec(s)` extracted off the prototype both route here. +#[cfg(feature = "regex-engine")] pub(super) extern "C" fn regex_proto_exec_thunk( _c: *const crate::closure::ClosureHeader, arg: f64, @@ -246,6 +247,7 @@ pub(super) extern "C" fn regex_proto_exec_thunk( /// `RegExp.prototype.test(string)` — brand-checks `this`, `ToString`s the arg, /// returns a boolean. +#[cfg(feature = "regex-engine")] pub(super) extern "C" fn regex_proto_test_thunk( _c: *const crate::closure::ClosureHeader, arg: f64, @@ -293,6 +295,7 @@ pub(super) extern "C" fn regex_proto_to_string_thunk( /// Resolve `IMPLICIT_THIS` to a live RegExp instance (with `[[RegExpMatcher]]`), /// throwing `TypeError` otherwise. Unlike the flag/`source` getters, this does /// NOT treat `RegExp.prototype` specially — `exec`/`test` require a real matcher. +#[cfg(feature = "regex-engine")] fn regex_instance_or_throw(method: &str) -> *const crate::regex::RegExpHeader { let receiver = crate::value::JSValue::from_bits(IMPLICIT_THIS.with(|c| c.get())); if receiver.is_pointer() { @@ -313,7 +316,9 @@ fn regex_instance_or_throw(method: &str) -> *const crate::regex::RegExpHeader { /// `compile` stays a no-op (Annex B, rarely exercised). pub(super) fn install_regex_proto_methods(proto_obj: *mut ObjectHeader) { use super::global_this::install_proto_method as ipm; + #[cfg(feature = "regex-engine")] ipm(proto_obj, "exec", regex_proto_exec_thunk as *const u8, 1); + #[cfg(feature = "regex-engine")] ipm(proto_obj, "test", regex_proto_test_thunk as *const u8, 1); ipm( proto_obj, diff --git a/crates/perry-runtime/src/path.rs b/crates/perry-runtime/src/path.rs index 680578ebcf..e1cafb4a50 100644 --- a/crates/perry-runtime/src/path.rs +++ b/crates/perry-runtime/src/path.rs @@ -870,6 +870,7 @@ pub extern "C" fn js_path_to_namespaced_path_value(value: f64) -> f64 { string_value_to_namespaced_path(value, false) } +#[cfg(feature = "regex-engine")] fn brace_alternation<'a>(pattern: &'a str, open: usize) -> Option<(usize, Vec<&'a str>)> { let bytes = pattern.as_bytes(); let mut depth = 0usize; @@ -900,6 +901,7 @@ fn brace_alternation<'a>(pattern: &'a str, open: usize) -> Option<(usize, Vec<&' None } +#[cfg(feature = "regex-engine")] fn extglob_alternation<'a>(pattern: &'a str, open: usize) -> Option<(usize, char, Vec<&'a str>)> { let bytes = pattern.as_bytes(); if open + 1 >= bytes.len() || bytes[open + 1] != b'(' { @@ -939,6 +941,7 @@ fn extglob_alternation<'a>(pattern: &'a str, open: usize) -> Option<(usize, char None } +#[cfg(feature = "regex-engine")] fn push_regex_literal(c: char, out: &mut String) { match c { '.' | '+' | '(' | ')' | '|' | '^' | '$' | '}' | '\\' => { @@ -949,6 +952,7 @@ fn push_regex_literal(c: char, out: &mut String) { } } +#[cfg(feature = "regex-engine")] fn push_glob_regex(pattern: &str, out: &mut String) { let bytes = pattern.as_bytes(); let mut i = 0; @@ -1041,6 +1045,7 @@ fn push_glob_regex(pattern: &str, out: &mut String) { /// minimatch with `windowsPathsNoEscape`, so backslashes in the pattern are /// path separators, not escapes. `**` is a globstar only as a whole path /// segment; embedded `**` has ordinary `*` segment-wildcard behavior. +#[cfg(feature = "regex-engine")] fn glob_to_regex(pattern: &str) -> String { let mut out = String::from("^"); let normalized = pattern.replace('\\', "/"); @@ -1056,6 +1061,7 @@ pub extern "C" fn js_path_matches_glob( path_ptr: *const StringHeader, pattern_ptr: *const StringHeader, ) -> i32 { + #[cfg(feature = "regex-engine")] unsafe { let path_str = string_from_header(path_ptr).unwrap_or_default(); let pattern = string_from_header(pattern_ptr).unwrap_or_default(); @@ -1071,6 +1077,13 @@ pub extern "C" fn js_path_matches_glob( Err(_) => 0, } } + // Glob matching is built on the regex engine; with it gated off, report + // "no match" (a program that calls `path.matchesGlob` forces the engine on). + #[cfg(not(feature = "regex-engine"))] + { + let _ = (path_ptr, pattern_ptr); + 0 + } } // =================================================================== @@ -1428,6 +1441,7 @@ pub extern "C" fn js_path_win32_matches_glob( path_ptr: *const StringHeader, pattern_ptr: *const StringHeader, ) -> i32 { + #[cfg(feature = "regex-engine")] unsafe { let path_str = string_from_header(path_ptr) .unwrap_or_default() @@ -1445,6 +1459,11 @@ pub extern "C" fn js_path_win32_matches_glob( Err(_) => 0, } } + #[cfg(not(feature = "regex-engine"))] + { + let _ = (path_ptr, pattern_ptr); + 0 + } } #[no_mangle] @@ -1651,7 +1670,7 @@ mod posix_parse_tests { } } -#[cfg(test)] +#[cfg(all(test, feature = "regex-engine"))] mod glob_tests { use super::glob_to_regex; diff --git a/crates/perry-runtime/src/regex.rs b/crates/perry-runtime/src/regex.rs index e03791be06..e5e909dfd9 100644 --- a/crates/perry-runtime/src/regex.rs +++ b/crates/perry-runtime/src/regex.rs @@ -3,41 +3,70 @@ //! Provides JavaScript-compatible regular expression operations using the Rust regex crate. //! RegExp objects are heap-allocated and store the compiled pattern and flags. +#[cfg(feature = "regex-engine")] use regex::Regex; use std::cell::RefCell; use std::collections::{HashMap, HashSet}; use std::ptr; +#[cfg(feature = "regex-engine")] use std::sync::Arc; +#[cfg(feature = "regex-engine")] use crate::array::ArrayHeader; use crate::string::StringHeader; +#[cfg(feature = "regex-engine")] use crate::value::js_nanbox_string; use crate::object::ObjectHeader; +/// The compiled standard-engine regex type. When the regex engine is gated +/// off, `RegExpHeader::regex_ptr` is typed `*mut ()` (a never-dereferenced +/// dangling field) so the identity/display layer keeps the same struct +/// layout without pulling in the `regex` crate. +#[cfg(feature = "regex-engine")] +type CompiledRegex = regex::Regex; +#[cfg(not(feature = "regex-engine"))] +type CompiledRegex = (); + +#[cfg(feature = "regex-engine")] mod compile; mod escape; +#[cfg(feature = "regex-engine")] mod exec_array; +#[cfg(feature = "regex-engine")] mod grammar; +#[cfg(feature = "regex-engine")] mod match_all; +#[cfg(feature = "regex-engine")] mod replace_expand; mod replace_fn; +#[cfg(feature = "regex-engine")] pub use compile::js_regexp_compile_value; pub use escape::js_regexp_escape; +#[cfg(feature = "regex-engine")] use exec_array::{ byte_index_to_char_index, char_index_to_byte, set_exec_array_groups, set_exec_array_indices, set_exec_array_indices_fancy, set_exec_array_metadata, }; +#[cfg(feature = "regex-engine")] use grammar::{has_invalid_repeated_quantifier, js_regex_to_rust}; +#[cfg(feature = "regex-engine")] pub use match_all::{ dispatch_regexp_string_iterator_method, js_string_match_all, js_string_match_all_value, - REGEXP_STRING_ITERATOR_CLASS_ID, }; + +/// Class id for `RegExp String Iterator` exotic objects. Referenced by the +/// always-linked iterator-prototype dispatch, so it stays ungated even when +/// the regex engine (which produces these iterators) is compiled out. +pub const REGEXP_STRING_ITERATOR_CLASS_ID: u32 = 0xFFFF_000A; +#[cfg(feature = "regex-engine")] use replace_expand::{expand_js_replacement, replace_regex_fn_fancy}; +#[cfg(feature = "regex-engine")] pub use replace_expand::{ js_string_replace_all_regex_fn, js_string_replace_all_regex_named, js_string_replace_regex_fn, js_string_replace_regex_named, }; +#[cfg(feature = "regex-engine")] use replace_fn::call_replace_callback; pub use replace_fn::{ js_string_replace_all_string, js_string_replace_all_string_fn, js_string_replace_string, @@ -83,6 +112,7 @@ pub(crate) fn is_regex_pointer(ptr: *const u8) -> bool { REGEX_POINTERS.with(|s| s.borrow().contains(&(ptr as usize))) } +#[cfg(feature = "regex-engine")] thread_local! { /// Cache of compiled regex objects, keyed by (pattern, flags). static REGEX_CACHE: RefCell>> = RefCell::new(HashMap::new()); @@ -90,6 +120,7 @@ thread_local! { static FANCY_CACHE: RefCell>> = RefCell::new(HashMap::new()); } +#[cfg(feature = "regex-engine")] fn get_or_compile_regex(pattern: &str, flags: &str) -> Arc { REGEX_CACHE.with(|cache| { let mut cache = cache.borrow_mut(); @@ -149,8 +180,11 @@ fn get_or_compile_regex(pattern: &str, flags: &str) -> Arc { /// Header for heap-allocated RegExp objects #[repr(C)] pub struct RegExpHeader { - /// Pointer to the compiled Regex object (boxed) - regex_ptr: *mut Regex, + /// Pointer to the compiled Regex object (boxed). Typed via the + /// `CompiledRegex` alias so the struct layout is identical whether or not + /// the regex engine is linked (it's `*mut ()` when gated off and never + /// dereferenced — all dereferencing sites are themselves engine-gated). + regex_ptr: *mut CompiledRegex, /// Original pattern string (for debugging/serialization) pattern_ptr: *const StringHeader, /// Flags string (e.g., "gi" for global+ignoreCase) @@ -177,6 +211,7 @@ pub struct RegExpHeader { /// stored value may be any JSValue (e.g. `re.lastIndex = { valueOf() {…} }`), so /// coerce via `ToNumber` (which invokes `valueOf`/`toString`), then `ToInteger`, /// clamped to ≥ 0. +#[cfg(feature = "regex-engine")] pub(crate) fn regex_last_index_offset(re: *const RegExpHeader) -> usize { let stored = f64::from_bits(unsafe { (*re).last_index }); let n = crate::builtins::js_number_coerce(stored); @@ -187,6 +222,7 @@ pub(crate) fn regex_last_index_offset(re: *const RegExpHeader) -> usize { } } +#[cfg(feature = "regex-engine")] #[inline] fn store_last_index_number(re: *mut RegExpHeader, n: usize) { unsafe { @@ -238,6 +274,7 @@ pub(super) fn js_string_from_str(s: &str) -> *mut StringHeader { crate::string::js_string_from_bytes(s.as_ptr(), s.len() as u32) } +#[cfg(feature = "regex-engine")] fn throw_replace_all_non_global_regex() -> ! { let message = b"String.prototype.replaceAll called with a non-global RegExp argument"; let msg = crate::string::js_string_from_bytes(message.as_ptr(), message.len() as u32); @@ -245,6 +282,7 @@ fn throw_replace_all_non_global_regex() -> ! { crate::exception::js_throw(crate::value::js_nanbox_pointer(err as i64)) } +#[cfg(feature = "regex-engine")] fn throw_match_all_non_global_regex() -> ! { let message = b"String.prototype.matchAll called with a non-global RegExp argument"; let msg = crate::string::js_string_from_bytes(message.as_ptr(), message.len() as u32); @@ -252,6 +290,7 @@ fn throw_match_all_non_global_regex() -> ! { crate::exception::js_throw(crate::value::js_nanbox_pointer(err as i64)) } +#[cfg(feature = "regex-engine")] #[inline] fn ensure_replace_all_regex_global(re: *const RegExpHeader) { unsafe { @@ -262,6 +301,7 @@ fn ensure_replace_all_regex_global(re: *const RegExpHeader) { } /// Throw a `SyntaxError` with the given message and never return. +#[cfg(feature = "regex-engine")] fn throw_regexp_syntax_error(message: &str) -> ! { let msg = js_string_from_str(message); let err = crate::error::js_syntaxerror_new(msg); @@ -277,6 +317,7 @@ fn throw_regexp_syntax_error(message: &str) -> ! { /// Note: the `v` flag (unicodeSets) is accepted as a valid flag for parity but /// its set-notation matching semantics are not implemented (the regex crate /// has no equivalent); it behaves like an ordinary unicode pattern. +#[cfg(feature = "regex-engine")] fn validate_and_canonicalize_flags(flags: &str) -> String { // Spec order of the flag bits: d g i m s u v y. const FLAG_ORDER: &[char] = &['d', 'g', 'i', 'm', 's', 'u', 'v', 'y']; @@ -314,6 +355,7 @@ fn validate_and_canonicalize_flags(flags: &str) -> String { /// Uses the thread-local REGEX_CACHE so repeated regex literals (e.g. in a /// loop) reuse the same compiled Regex instead of leaking a fresh one each /// time. The raw pointer stored in RegExpHeader is kept alive by the cache. +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_regexp_new( pattern: *const StringHeader, @@ -437,6 +479,7 @@ pub extern "C" fn js_regexp_new( /// /// `ToString` runs through the coercing method path so a throwing /// `toString`/`valueOf` propagates. +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_regexp_construct(pattern: f64, flags: f64) -> *mut RegExpHeader { let pv = crate::value::JSValue::from_bits(pattern.to_bits()); @@ -484,6 +527,7 @@ pub extern "C" fn js_regexp_construct(pattern: f64, flags: f64) -> *mut RegExpHe /// Test if a string matches the regex pattern /// regex.test(string) -> boolean +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_regexp_test(re: *const RegExpHeader, s: *const StringHeader) -> i32 { if !is_valid_regex_ptr(re) || !is_valid_ptr(s) { @@ -521,6 +565,7 @@ pub extern "C" fn js_regexp_test(re: *const RegExpHeader, s: *const StringHeader /// Look up a fancy-regex fallback for the given header, if one was /// registered at compile-time because the `regex` crate rejected the /// pattern (backreferences, lookbehind, etc.). +#[cfg(feature = "regex-engine")] fn lookup_fancy_regex(re: *const RegExpHeader) -> Option> { unsafe { let pat = string_as_str((*re).pattern_ptr); @@ -539,6 +584,7 @@ fn lookup_fancy_regex(re: *const RegExpHeader) -> Option /// is `ToString(arg)` (running user `toString`/`valueOf`, which may throw), /// with `undefined` mapped to the empty pattern (the `/(?:)/` regex that /// matches at index 0). Flags default to none. +#[cfg(feature = "regex-engine")] fn coerce_search_arg_to_regex(arg: f64) -> *const RegExpHeader { let jv = crate::value::JSValue::from_bits(arg.to_bits()); if jv.is_pointer() { @@ -565,6 +611,7 @@ fn coerce_search_arg_to_regex(arg: f64) -> *const RegExpHeader { /// coercion: a non-RegExp arg is turned into `RegExpCreate(ToString(arg))` /// (so `"x".search("pat")`, `.search(undefined)`, and `.search({toString})` /// all work). `s` is the already-`ToString`-coerced `this`. +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_search_value(s: *const StringHeader, arg: f64) -> i32 { // Root the receiver across the (possibly allocating / GC-triggering) @@ -579,6 +626,7 @@ pub extern "C" fn js_string_search_value(s: *const StringHeader, arg: f64) -> i3 /// `String.prototype.match(regexp)` (ECMA-262 §22.1.3.11) with full argument /// coercion (see [`js_string_search_value`]). Returns the match array pointer, /// or null on no match. +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_match_value(s: *const StringHeader, arg: f64) -> *mut ArrayHeader { let scope = crate::gc::RuntimeHandleScope::new(); @@ -590,6 +638,7 @@ pub extern "C" fn js_string_match_value(s: *const StringHeader, arg: f64) -> *mu /// Find matches in a string /// string.match(regex) -> string[] | null (returns array pointer, null if no match) +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_match( s: *const StringHeader, @@ -856,6 +905,7 @@ pub extern "C" fn js_string_match( /// `js_string_replace_regex_fn` pairing already in this file. Used so a pattern /// the `regex` crate can't compile (lookbehind/backreferences) still gets full /// `$1`/`$`/`$&`/`` $` ``/`$'`/`$$` substitution. +#[cfg(feature = "regex-engine")] fn expand_js_replacement_fancy( repl: &str, caps: &fancy_regex::Captures, @@ -963,6 +1013,7 @@ fn expand_js_replacement_fancy( /// (fresh per-result object + by-name setters so each match grows its own /// shape). The returned object must be stored into a GC-visible slot by the /// caller immediately; it is rooted via `scope` until then. +#[cfg(feature = "regex-engine")] pub(crate) unsafe fn build_fancy_groups( fre: &fancy_regex::Regex, caps: &fancy_regex::Captures, @@ -996,6 +1047,7 @@ pub(crate) unsafe fn build_fancy_groups( /// match loop with `fancy_regex` and expands the replacement string via /// [`expand_js_replacement_fancy`]. Used when the pattern needs /// lookbehind/backreferences the `regex` crate can't compile. +#[cfg(feature = "regex-engine")] unsafe fn replace_regex_str_fancy( str_data: &str, fre: &fancy_regex::Regex, @@ -1029,6 +1081,7 @@ unsafe fn replace_regex_str_fancy( } /// string.replace(regex, replacement) -> string +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_replace_regex( s: *const StringHeader, @@ -1085,6 +1138,7 @@ pub extern "C" fn js_string_replace_regex( } /// string.replaceAll(regex, replacement) -> string +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_replace_all_regex( s: *const StringHeader, @@ -1106,6 +1160,7 @@ pub extern "C" fn js_string_replace_all_regex( /// Split a string by a regex delimiter /// string.split(regex) -> string[] (array of NaN-boxed string pointers) +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_split_regex( s: *const StringHeader, @@ -1116,6 +1171,7 @@ pub extern "C" fn js_string_split_regex( /// string.split(regex, limit) — limit<0 means no limit, limit==0 means empty /// (issue #567). +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_split_regex_n( s: *const StringHeader, @@ -1201,6 +1257,7 @@ pub extern "C" fn js_string_split_regex_n( /// Search for a regex match in a string /// string.search(regex) -> number (index of first match, -1 if none) +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_search_regex(s: *const StringHeader, re: *const RegExpHeader) -> i32 { if !is_valid_ptr(s) || !is_valid_regex_ptr(re) { @@ -1236,6 +1293,7 @@ pub extern "C" fn js_string_search_regex(s: *const StringHeader, re: *const RegE /// For global regexes, starts matching at lastIndex and updates it. /// Returns *mut ArrayHeader (null for no match). Stores .index and .groups /// in thread-locals, retrieved via js_regexp_exec_get_index / js_regexp_exec_get_groups. +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_regexp_exec( re: *mut RegExpHeader, @@ -1500,6 +1558,7 @@ pub extern "C" fn js_regexp_exec( /// AND `method` is `test`/`exec`; `None` otherwise so the generic method /// dispatch in `js_native_call_method` continues. The argument is coerced to a /// string (`re.test(123)` tests against `"123"`). (#1731) +#[cfg(feature = "regex-engine")] pub(crate) fn dispatch_regex_receiver_method( ptr: *const u8, method: &str, @@ -1536,6 +1595,7 @@ pub(crate) fn dispatch_regex_receiver_method( } /// Get the .index from the last exec() call +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_regexp_exec_get_index() -> f64 { LAST_EXEC_INDEX.with(|idx| *idx.borrow()) @@ -1543,6 +1603,7 @@ pub extern "C" fn js_regexp_exec_get_index() -> f64 { /// Get the .groups object from the last exec() call /// Returns I64 pointer (0 for no groups) +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_regexp_exec_get_groups() -> i64 { LAST_EXEC_GROUPS.with(|g| { @@ -1574,14 +1635,14 @@ pub fn scan_last_exec_groups_root_mut(visitor: &mut crate::gc::RuntimeRootVisito }); } -#[cfg(test)] +#[cfg(all(test, feature = "regex-engine"))] pub(crate) fn test_set_last_exec_groups(ptr: *mut ObjectHeader) { LAST_EXEC_GROUPS.with(|g| { *g.borrow_mut() = ptr; }); } -#[cfg(test)] +#[cfg(all(test, feature = "regex-engine"))] pub(crate) fn test_last_exec_groups() -> usize { LAST_EXEC_GROUPS.with(|g| *g.borrow() as usize) } @@ -1713,7 +1774,7 @@ pub extern "C" fn js_regexp_set_last_index(re: *mut RegExpHeader, value: f64) { } } -#[cfg(test)] +#[cfg(all(test, feature = "regex-engine"))] mod tests { use super::*; use crate::string::js_string_from_bytes; diff --git a/crates/perry-runtime/src/regex/match_all.rs b/crates/perry-runtime/src/regex/match_all.rs index eb07d470c1..f147329822 100644 --- a/crates/perry-runtime/src/regex/match_all.rs +++ b/crates/perry-runtime/src/regex/match_all.rs @@ -13,7 +13,9 @@ use crate::value::{ }; /// Class id for `String.prototype.matchAll`'s RegExp String Iterator object. -pub const REGEXP_STRING_ITERATOR_CLASS_ID: u32 = 0xFFFF_000A; +/// Re-exported from the parent (kept ungated there so always-linked iterator +/// dispatch can reference it even when this engine module is gated out). +use super::REGEXP_STRING_ITERATOR_CLASS_ID; fn build_match_all_groups( regex: &Regex, diff --git a/crates/perry-runtime/src/regex/replace_fn.rs b/crates/perry-runtime/src/regex/replace_fn.rs index a7c2366a98..cbda791d79 100644 --- a/crates/perry-runtime/src/regex/replace_fn.rs +++ b/crates/perry-runtime/src/regex/replace_fn.rs @@ -1,4 +1,8 @@ use super::*; +// `js_nanbox_string` is re-exported from the parent only when the regex engine +// is on (it's part of the gated engine `use` cluster). The string-replacement +// helpers below are always compiled, so import it directly here. +use crate::value::js_nanbox_string; pub(super) unsafe fn call_replace_callback(callback: f64, args: &[f64]) -> String { let prev = crate::object::js_implicit_this_set(f64::from_bits(crate::value::TAG_UNDEFINED)); @@ -301,6 +305,7 @@ pub extern "C" fn js_string_replace_all_string_dyn( /// Resolve a runtime-dynamic `searchValue` (an object-property read, call /// result, destructured loop binding, …) to a registered RegExp pointer, or /// `None` when the value isn't a RegExp. +#[cfg(feature = "regex-engine")] fn needle_regex_ptr(needle: f64) -> Option<*const crate::regex::RegExpHeader> { let bits = needle.to_bits(); let top16 = bits >> 48; @@ -332,6 +337,7 @@ pub extern "C" fn js_string_replace_search_dyn( needle: f64, replacement: f64, ) -> *mut StringHeader { + #[cfg(feature = "regex-engine")] if let Some(re) = needle_regex_ptr(needle) { return js_string_replace_regex_dyn(s, re, replacement); } @@ -345,12 +351,14 @@ pub extern "C" fn js_string_replace_all_search_dyn( needle: f64, replacement: f64, ) -> *mut StringHeader { + #[cfg(feature = "regex-engine")] if let Some(re) = needle_regex_ptr(needle) { return js_string_replace_all_regex_dyn(s, re, replacement); } js_string_replace_all_string_dyn(s, crate::builtins::js_string_coerce(needle), replacement) } +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_replace_regex_dyn( s: *const StringHeader, @@ -368,6 +376,7 @@ pub extern "C" fn js_string_replace_regex_dyn( ) } +#[cfg(feature = "regex-engine")] #[no_mangle] pub extern "C" fn js_string_replace_all_regex_dyn( s: *const StringHeader, @@ -384,7 +393,7 @@ pub extern "C" fn js_string_replace_all_regex_dyn( ) } -#[cfg(test)] +#[cfg(all(test, feature = "regex-engine"))] mod tests { use super::*; diff --git a/crates/perry-runtime/src/string/compare.rs b/crates/perry-runtime/src/string/compare.rs index 28bfc22126..eef62b0385 100644 --- a/crates/perry-runtime/src/string/compare.rs +++ b/crates/perry-runtime/src/string/compare.rs @@ -413,12 +413,23 @@ pub extern "C" fn js_string_normalize( } }; - use unicode_normalization::UnicodeNormalization; + #[cfg(feature = "string-normalize")] + let normalized: String = { + use unicode_normalization::UnicodeNormalization; + match form_owned.as_str() { + "NFC" => str_data.nfc().collect(), + "NFD" => str_data.nfd().collect(), + "NFKC" => str_data.nfkc().collect(), + "NFKD" => str_data.nfkd().collect(), + _ => throw_invalid_normalize_form(), + } + }; + // Normalize engine gated off: still validate the form (so a bad form throws + // the spec RangeError), but pass the string through unchanged for the four + // valid forms (no Unicode decomposition tables linked). + #[cfg(not(feature = "string-normalize"))] let normalized: String = match form_owned.as_str() { - "NFC" => str_data.nfc().collect(), - "NFD" => str_data.nfd().collect(), - "NFKC" => str_data.nfkc().collect(), - "NFKD" => str_data.nfkd().collect(), + "NFC" | "NFD" | "NFKC" | "NFKD" => str_data.to_string(), _ => throw_invalid_normalize_form(), }; let bytes = normalized.as_bytes(); diff --git a/crates/perry-runtime/src/string/mod.rs b/crates/perry-runtime/src/string/mod.rs index f0fd650965..c4d79dfa72 100644 --- a/crates/perry-runtime/src/string/mod.rs +++ b/crates/perry-runtime/src/string/mod.rs @@ -34,6 +34,7 @@ mod pad; mod raw; mod slice_ops; mod split; +#[cfg(feature = "regex-engine")] pub(crate) use split::spec_regex_split; #[cfg(test)] diff --git a/crates/perry-runtime/src/string/split.rs b/crates/perry-runtime/src/string/split.rs index aeab7f5072..c93ae46aeb 100644 --- a/crates/perry-runtime/src/string/split.rs +++ b/crates/perry-runtime/src/string/split.rs @@ -4,6 +4,7 @@ use super::*; use crate::array::ArrayHeader; /// Advance to the next UTF-8 character boundary strictly after `i`. +#[cfg(feature = "regex-engine")] fn next_char_boundary(s: &str, i: usize) -> usize { let mut j = i + 1; while j < s.len() && !s.is_char_boundary(j) { @@ -19,6 +20,7 @@ fn next_char_boundary(s: &str, i: usize) -> usize { /// (unmatched groups → `undefined`/`None`) after each segment. Honors `limit` /// (`< 0` ⇒ unbounded) by stopping once `limit` elements have been produced. /// Each element is `Some(substring)` or `None` for a spliced unmatched group. +#[cfg(feature = "regex-engine")] pub(crate) fn spec_regex_split(regex: ®ex::Regex, s: &str, limit: i32) -> Vec> { let mut out: Vec> = Vec::new(); let unbounded = limit < 0; @@ -104,6 +106,7 @@ pub extern "C" fn js_string_split_n( // recorded by `js_regexp_new` and delegate to `js_string_split_regex` // on a match. Otherwise the regex header would be read as a // StringHeader and segfault on the first byte of its `regex_ptr`. + #[cfg(feature = "regex-engine")] if crate::regex::is_regex_pointer(delimiter as *const u8) { return crate::regex::js_string_split_regex_n( s, @@ -238,6 +241,7 @@ pub extern "C" fn js_string_split_value( let lim_jv = JSValue::from_bits(limit.to_bits()); // Step 2: a separator with a `[Symbol.split]` method (a RegExp) takes over. + #[cfg(feature = "regex-engine")] if sep_jv.is_pointer() { let ptr = crate::value::js_nanbox_get_pointer(separator) as *const u8; if crate::regex::is_regex_pointer(ptr) { diff --git a/crates/perry-runtime/src/symbol.rs b/crates/perry-runtime/src/symbol.rs index 3e54148d43..40d971bd61 100644 --- a/crates/perry-runtime/src/symbol.rs +++ b/crates/perry-runtime/src/symbol.rs @@ -2461,6 +2461,7 @@ pub unsafe extern "C" fn js_to_primitive(value: f64, hint: i32) -> f64 { // `"string"`/`"default"` — which is exactly what `"x" + plainDateTime` and // template interpolation need. (Direct `String(x)` already brand-checks; the // `+`/template coercion routed here did not.) + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_value(value) { if hint == 1 { crate::object::throw_object_type_error(b"Cannot convert a Temporal value to a number"); diff --git a/crates/perry-runtime/src/temporal/mod.rs b/crates/perry-runtime/src/temporal/mod.rs index 6d781c3df1..13b4d0d352 100644 --- a/crates/perry-runtime/src/temporal/mod.rs +++ b/crates/perry-runtime/src/temporal/mod.rs @@ -26,16 +26,27 @@ use crate::value::JSValue; +#[cfg(feature = "temporal")] pub mod dispatch; +#[cfg(feature = "temporal")] pub mod duration; +#[cfg(feature = "temporal")] pub mod instant; +#[cfg(feature = "temporal")] pub mod now; +#[cfg(feature = "temporal")] pub mod options; +#[cfg(feature = "temporal")] pub mod plain_date; +#[cfg(feature = "temporal")] pub mod plain_date_time; +#[cfg(feature = "temporal")] pub mod plain_month_day; +#[cfg(feature = "temporal")] pub mod plain_time; +#[cfg(feature = "temporal")] pub mod plain_year_month; +#[cfg(feature = "temporal")] pub mod zoned_date_time; const NANBOX_PTR_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; @@ -43,6 +54,7 @@ const NANBOX_PTR_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; /// The concrete `temporal_rs` value carried by a [`TemporalCell`]. The active /// variant is the type's brand sub-kind; [`TemporalValue::kind`] exposes it /// cheaply for dispatch without re-matching. +#[cfg(feature = "temporal")] pub enum TemporalValue { Duration(temporal_rs::Duration), Instant(temporal_rs::Instant), @@ -54,6 +66,12 @@ pub enum TemporalValue { ZonedDateTime(temporal_rs::ZonedDateTime), } +/// Temporal gated off: no `temporal_rs` linked, so a Temporal value can never be +/// constructed. The uninhabited twin keeps `TemporalCell` / the identity layer +/// type-checking without the engine dependency. +#[cfg(not(feature = "temporal"))] +pub enum TemporalValue {} + /// Stable sub-kind discriminator for a Temporal cell. Used by the brand checks /// in `js_native_call_method` / `js_object_get_field_by_name` to route to the /// right per-type dispatch without matching the whole enum. @@ -97,6 +115,7 @@ pub fn temporal_value_matches_class_id(value: f64, class_id: u32) -> bool { } } +#[cfg(feature = "temporal")] impl TemporalValue { #[inline] pub fn kind(&self) -> TemporalKind { @@ -129,6 +148,22 @@ impl TemporalValue { } } +/// Temporal gated off: `TemporalValue` is uninhabited, so these methods can only +/// be reached via an impossible value — the `match *self {}` arms are exhaustive +/// over zero variants. +#[cfg(not(feature = "temporal"))] +impl TemporalValue { + #[inline] + pub fn kind(&self) -> TemporalKind { + match *self {} + } + + #[inline] + pub fn type_name(&self) -> &'static str { + match *self {} + } +} + /// 1-slot heap cell holding a `temporal_rs` value behind a shared GC tag. See /// the module docs for the non-movable / pointer-free rationale. /// @@ -147,6 +182,7 @@ pub struct TemporalCell { /// Allocate a fresh Temporal cell wrapping `value` and return it as a NaN-boxed /// pointer (an f64 carrying `POINTER_TAG`). +#[cfg(feature = "temporal")] pub fn alloc_temporal_cell(value: TemporalValue) -> f64 { let boxed = Box::new(value); unsafe { @@ -222,6 +258,7 @@ pub fn temporal_kind(value: f64) -> Option { /// # Safety /// `cell` must point at a live, fully-initialized `TemporalCell` that the GC is /// about to reclaim; it is not read again afterwards. +#[cfg(feature = "temporal")] pub unsafe fn finalize_temporal_cell_for_gc(cell: *mut TemporalCell) { if cell.is_null() { return; @@ -229,9 +266,16 @@ pub unsafe fn finalize_temporal_cell_for_gc(cell: *mut TemporalCell) { std::ptr::drop_in_place(cell); } +/// Temporal gated off: no Temporal cell is ever allocated, so the GC never +/// reaches this finalize hook. Kept as a no-op so `gc/types.rs`'s registration +/// resolves without the engine. +#[cfg(not(feature = "temporal"))] +pub unsafe fn finalize_temporal_cell_for_gc(_cell: *mut TemporalCell) {} + /// Render a Temporal value as its canonical ISO-8601 / IXDTF string — the form /// `toString` and `toJSON` use. Returns `None` only if `value` is not a /// Temporal cell. +#[cfg(feature = "temporal")] pub fn temporal_iso_string(value: f64) -> Option { temporal_value_ref(value).map(temporal_value_iso_string) } @@ -239,6 +283,7 @@ pub fn temporal_iso_string(value: f64) -> Option { /// `console.log` / `util.inspect` form: `Temporal.Duration ` — the brand /// tag followed by the canonical string in angle brackets, matching V8's custom /// Temporal inspect output. Returns `None` if `value` is not a Temporal cell. +#[cfg(feature = "temporal")] pub fn temporal_inspect_string(value: f64) -> Option { temporal_value_ref(value) .map(|v| format!("{} <{}>", v.type_name(), temporal_value_iso_string(v))) @@ -247,6 +292,7 @@ pub fn temporal_inspect_string(value: f64) -> Option { /// ISO/IXDTF string for an already-borrowed [`TemporalValue`]. `temporal_rs` /// implements `Display` for each type as its canonical string form, so we defer /// to that (no formatting options = spec-default precision). +#[cfg(feature = "temporal")] pub fn temporal_value_iso_string(v: &TemporalValue) -> String { match v { TemporalValue::Duration(d) => d.to_string(), diff --git a/crates/perry-runtime/src/url/mod.rs b/crates/perry-runtime/src/url/mod.rs index 3e5f050259..74d95f7d77 100644 --- a/crates/perry-runtime/src/url/mod.rs +++ b/crates/perry-runtime/src/url/mod.rs @@ -181,9 +181,16 @@ pub(crate) fn string_header_to_string(value: *mut crate::StringHeader) -> String /// means for them (the hostname setter leaves the host unchanged; the /// `domainTo*` helpers return `""`), matching Node. pub(crate) fn whatwg_canonicalize_host(host: &str) -> Option { - url::Url::parse(&format!("http://{host}/")) - .ok() - .and_then(|u| u.host_str().map(str::to_string)) + #[cfg(feature = "url-engine")] + { + url::Url::parse(&format!("http://{host}/")) + .ok() + .and_then(|u| u.host_str().map(str::to_string)) + } + // URL engine gated off: no WHATWG host parser, so pass the host through + // unchanged (the hand-rolled URL paths handle the common cases). + #[cfg(not(feature = "url-engine"))] + Some(host.to_string()) } /// True when `host` is a canonical dotted-quad IPv4 literal. Used by diff --git a/crates/perry-runtime/src/url/node_compat.rs b/crates/perry-runtime/src/url/node_compat.rs index 21188232a0..1fc45e2f28 100644 --- a/crates/perry-runtime/src/url/node_compat.rs +++ b/crates/perry-runtime/src/url/node_compat.rs @@ -457,7 +457,11 @@ pub extern "C" fn js_url_domain_to_unicode(input_f64: f64) -> f64 { // Numeric / IPv4-shorthand → canonical IPv4 address (Node yields the IP). Some(canon) if is_ipv4_host(&canon) => canon, // Registrable hostname → Unicode IDN form. + #[cfg(feature = "url-engine")] Some(_) => idna::domain_to_unicode(&input).0, + // URL engine gated off: no IDNA, so return the input host unchanged. + #[cfg(not(feature = "url-engine"))] + Some(_) => input.clone(), }; create_string_f64(&out) } @@ -859,6 +863,24 @@ fn protocol_null_or_slashes(input: &str, protocol_is_null: bool, host: &str) -> protocol_is_null || input.starts_with("//") || input.contains("://") || !host.is_empty() } +/// WHATWG `URL.join` of `to` onto base `from`, or `None` when `from` isn't a +/// parseable absolute URL. Cfg-paired: the off twin returns `None` (no `url` +/// crate), so the caller falls back to the hand-rolled `resolve_url`. +#[cfg(feature = "url-engine")] +fn legacy_url_join(from: &str, to: &str) -> Option { + let base = url::Url::parse(from).ok()?; + Some( + base.join(to) + .map(|u| u.to_string()) + .unwrap_or_else(|_| resolve_url(to, from)), + ) +} + +#[cfg(not(feature = "url-engine"))] +fn legacy_url_join(_from: &str, _to: &str) -> Option { + None +} + #[no_mangle] pub extern "C" fn js_url_legacy_resolve(from: f64, to: f64) -> f64 { if !is_js_string_value(from) { @@ -871,10 +893,8 @@ pub extern "C" fn js_url_legacy_resolve(from: f64, to: f64) -> f64 { let to_s = get_string_content(to); let resolved = if to_s.starts_with('/') && !is_valid_absolute_url(&from_s) { to_s - } else if let Ok(base) = url::Url::parse(&from_s) { - base.join(&to_s) - .map(|u| u.to_string()) - .unwrap_or_else(|_| resolve_url(&to_s, &from_s)) + } else if let Some(j) = legacy_url_join(&from_s, &to_s) { + j } else { resolve_url(&to_s, &from_s) }; diff --git a/crates/perry-runtime/src/url/url_class.rs b/crates/perry-runtime/src/url/url_class.rs index 7e57dc17d7..952bd94061 100644 --- a/crates/perry-runtime/src/url/url_class.rs +++ b/crates/perry-runtime/src/url/url_class.rs @@ -153,21 +153,28 @@ fn normalize_hostname_value(raw: &str) -> Option { { return None; } - match idna::domain_to_ascii(raw) { - Ok(ascii) if !ascii.is_empty() => { - // #3056: apply the WHATWG numeric/IPv4-shorthand host parser as a - // post-step. `idna::domain_to_ascii` only runs IDNA, so a numeric - // host like `123` survives as `"123"` instead of canonicalizing to - // the IPv4 address `"0.0.0.123"`. The `url` crate's WHATWG host - // parser does this correctly; for ordinary hostnames it returns - // the same string (no change). When it rejects the host (e.g. - // out-of-range numeric `999999999999`) Node leaves the hostname - // unchanged — `None` propagates that, since `js_url_set_hostname` - // is a no-op on `None`. - super::whatwg_canonicalize_host(&ascii) + #[cfg(feature = "url-engine")] + { + match idna::domain_to_ascii(raw) { + Ok(ascii) if !ascii.is_empty() => { + // #3056: apply the WHATWG numeric/IPv4-shorthand host parser as a + // post-step. `idna::domain_to_ascii` only runs IDNA, so a numeric + // host like `123` survives as `"123"` instead of canonicalizing to + // the IPv4 address `"0.0.0.123"`. The `url` crate's WHATWG host + // parser does this correctly; for ordinary hostnames it returns + // the same string (no change). When it rejects the host (e.g. + // out-of-range numeric `999999999999`) Node leaves the hostname + // unchanged — `None` propagates that, since `js_url_set_hostname` + // is a no-op on `None`. + super::whatwg_canonicalize_host(&ascii) + } + _ => None, } - _ => None, } + // URL engine gated off: no IDNA. Fall back to the hand-rolled host + // canonicalizer (which, also gated off, passes the host through unchanged). + #[cfg(not(feature = "url-engine"))] + super::whatwg_canonicalize_host(raw) } fn percent_encode_path(raw: &str) -> String { diff --git a/crates/perry-runtime/src/value/dyn_index.rs b/crates/perry-runtime/src/value/dyn_index.rs index 41495f01a7..ca3a78a686 100644 --- a/crates/perry-runtime/src/value/dyn_index.rs +++ b/crates/perry-runtime/src/value/dyn_index.rs @@ -254,6 +254,7 @@ pub extern "C" fn js_dyn_index_set(obj: f64, index: f64, value: f64) -> f64 { } // A `Temporal.*` value is an opaque immutable cell — a dynamic property // write (`temporalValue[key] = v`) is a no-op, never an ObjectHeader write. + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_value(obj) { return value; } diff --git a/crates/perry-runtime/src/value/to_string.rs b/crates/perry-runtime/src/value/to_string.rs index 318fe54660..521860d45f 100644 --- a/crates/perry-runtime/src/value/to_string.rs +++ b/crates/perry-runtime/src/value/to_string.rs @@ -662,6 +662,7 @@ pub extern "C" fn js_jsvalue_to_string(value: f64) -> *mut crate::string::String // `temporal.toString()` produce the value's canonical ISO-8601 / // IXDTF string, not "[object Object]". Detected here for the same // reason as Date — the cell is smaller than an ObjectHeader. + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_cell_addr(ptr as usize) { if let Some(s) = crate::temporal::temporal_iso_string(value) { return crate::string::js_string_from_bytes(s.as_ptr(), s.len() as u32); @@ -1051,6 +1052,7 @@ pub extern "C" fn js_jsvalue_to_string_radix( // the codegen routes any single-arg `.toString(x)` here. Dispatch back to // the Temporal method router so the options bag flows through, instead of // ToNumber-coercing it as a radix (which throws a spurious RangeError). + #[cfg(feature = "temporal")] if crate::temporal::is_temporal_value(value) { let result = crate::temporal::dispatch::call_method(value, "toString", &[radix_value]); let rv = JSValue::from_bits(result.to_bits()); diff --git a/crates/perry/src/commands/compile/collect_modules.rs b/crates/perry/src/commands/compile/collect_modules.rs index ef3c440f08..b3dbc254de 100644 --- a/crates/perry/src/commands/compile/collect_modules.rs +++ b/crates/perry/src/commands/compile/collect_modules.rs @@ -1839,6 +1839,94 @@ fn collect_module_finish( } } + // Detect whether this module needs the regex engine. The engine + // (`regex`/`fancy-regex`, ~1.2 MB) is gated behind `perry-runtime/ + // regex-engine` and the RegExp object's identity/display layer stays + // always-compiled, so a program that can never produce a RegExp at + // runtime links none of the matching machinery. A regex value can only + // exist if a regex literal / `RegExp` was evaluated, OR a regex-coercing + // string method (`.match`/`.matchAll`/`.search`, which build a RegExp from + // even a string arg per spec) ran, OR a glob API was used (the runtime + // compiles globs to regexes internally). We grep the serialized Debug form + // for the unambiguous HIR variant tokens and the generic-dispatch method + // names. Over-matching only over-includes the engine (a size, not a + // correctness, cost); the goal is zero false negatives. `eval` is + // non-functional in Perry so it can't create a regex at runtime. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("RegExp") // RegExp / RegExpDynamic / RegExpTest / RegExpExec / RegExpEscape / RegExpReplaceFn / RegExpExec{Index,Groups} + || hir_debug.contains("StringMatch") // dedicated .match / .matchAll variants + || hir_debug.contains("PathMatchesGlob") + || hir_debug.contains("property: \"search\"") + || hir_debug.contains("property: \"match\"") + || hir_debug.contains("property: \"matchAll\"") + || hir_debug.contains("property: \"glob\"") + || hir_debug.contains("property: \"globSync\"") + { + ctx.uses_regex = true; + } + } + + // Detect TC39 `Temporal.*` usage. The engine (`temporal_rs` + transitive + // tz/calendar deps, ~580 KB) is gated behind `perry-runtime/temporal`; + // the Temporal cell's identity layer stays always-compiled, so a program + // that never touches `Temporal` links none of the date-math machinery. + // `Temporal` is a global namespace (like `Intl`/`Math`): accessing it (even + // when aliased, e.g. `const now = Temporal.Now`) materializes a + // `PropertyGet { property: "Temporal" }`, so we match that exact token + // rather than a bare `"Temporal"` substring — the latter also fires on + // user identifiers like `myTemporal` / `temporalLog`, spuriously enabling + // the engine and undercutting the size win. JS `Date` is a separate impl. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("property: \"Temporal\"") { + ctx.uses_temporal = true; + } + } + + // Detect WHATWG URL API usage. The `url`+`idna` host-canonicalization + // engine (~195 KB) is gated behind `perry-runtime/url-engine`; Perry's URL + // parsing is otherwise hand-rolled, so a program with no URL API links none + // of it. Web `URL`/`URLPattern`/`URLSearchParams` lower to dedicated `Url*` + // HIR variants (always `Url` + an uppercase letter, e.g. `UrlNew`, + // `UrlSet…`, `UrlSearchParams…`); `node:url` lowers to a + // `NativeMethodCall { module: "url", … }`. We match those exact tokens + // instead of a bare `"Url"`/`"URL"` substring, which would also fire on + // common camelCase identifiers like `baseUrl` / `imageUrl` and spuriously + // link the engine. Over-matching within the URL family (e.g. enabling for a + // URLSearchParams-only program that doesn't strictly need the host parser) + // is a benign size cost; the rule is zero false negatives. + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("UrlNew") + || hir_debug.contains("UrlParse") + || hir_debug.contains("UrlCanParse") + || hir_debug.contains("UrlPattern") + || hir_debug.contains("UrlGet") + || hir_debug.contains("UrlSet") + || hir_debug.contains("UrlInstance") + || hir_debug.contains("UrlSearchParams") + || hir_debug.contains("module: \"url\"") + { + ctx.uses_url = true; + } + } + + // Detect `String.prototype.normalize` (gates `unicode-normalization`, + // ~113 KB) and `Intl.Segmenter` (gates `unicode-segmentation`, ~73 KB). + // Both lower to method/namespace nodes carrying the name as a `property`, + // so we match the exact `property: ""` token. (A bare `"Segmenter"` + // substring would also fire on a user identifier named `Segmenter`.) + { + let hir_debug: String = format!("{:?}{:?}", &hir_module.init, &hir_module.functions); + if hir_debug.contains("property: \"normalize\"") { + ctx.uses_string_normalize = true; + } + if hir_debug.contains("property: \"Segmenter\"") { + ctx.uses_intl_segmenter = true; + } + } + // Detect readline usage via process.stdin raw/lifecycle methods. These // don't go through an `import 'readline'` statement, so the import-based // needs_stdlib detection above misses them. diff --git a/crates/perry/src/commands/compile/optimized_libs.rs b/crates/perry/src/commands/compile/optimized_libs.rs index 37b707bdfc..256e1cf47b 100644 --- a/crates/perry/src/commands/compile/optimized_libs.rs +++ b/crates/perry/src/commands/compile/optimized_libs.rs @@ -670,11 +670,16 @@ pub(super) fn build_optimized_libs( // Cheap djb2 — no need for the SipHash overhead. let target_str = target.unwrap_or("host"); let key_input = format!( - "{}|{}|{}|wasm={}|v={}", + "{}|{}|{}|wasm={}|regex={}|temporal={}|url={}|norm={}|seg={}|v={}", feature_arg, panic_abort_safe, target_str, ctx.needs_wasm_runtime, + ctx.uses_regex, + ctx.uses_temporal, + ctx.uses_url, + ctx.uses_string_normalize, + ctx.uses_intl_segmenter, env!("CARGO_PKG_VERSION"), ); let mut hash: u64 = 5381; @@ -763,6 +768,34 @@ pub(super) fn build_optimized_libs( if ctx.needs_wasm_runtime { cross_features.push("perry-runtime/wasm-host".to_string()); } + // Enable the regex engine (`regex` + `fancy-regex`, ~1.2 MB) only when the + // program can actually produce or use a RegExp — detected in + // collect_modules. A program that never evaluates a regex literal/`RegExp`, + // a regex-coercing string method, or a glob API links none of it. The + // RegExp identity/display layer is always compiled, so non-regex programs + // still format/compare values correctly with the engine absent. + if ctx.uses_regex { + cross_features.push("perry-runtime/regex-engine".to_string()); + } + // Enable the TC39 Temporal engine (`temporal_rs` + tz/calendar deps, + // ~580 KB) only when the program references `Temporal.*`. JS `Date` is a + // separate implementation and does not require this. + if ctx.uses_temporal { + cross_features.push("perry-runtime/temporal".to_string()); + } + // Enable the WHATWG URL host/IDNA engine (`url`+`idna`+transitive + // `percent_encoding`, ~195 KB) only when the program uses a URL API. + if ctx.uses_url { + cross_features.push("perry-runtime/url-engine".to_string()); + } + // `String.prototype.normalize` tables (~113 KB) and `Intl.Segmenter` + // UAX #29 tables (~73 KB) — each enabled only on its specific usage. + if ctx.uses_string_normalize { + cross_features.push("perry-runtime/string-normalize".to_string()); + } + if ctx.uses_intl_segmenter { + cross_features.push("perry-runtime/intl-segmenter".to_string()); + } if !cross_features.is_empty() { cargo_cmd.arg("--features").arg(cross_features.join(",")); } diff --git a/crates/perry/src/commands/compile/types.rs b/crates/perry/src/commands/compile/types.rs index fbae35066d..19338354d9 100644 --- a/crates/perry/src/commands/compile/types.rs +++ b/crates/perry/src/commands/compile/types.rs @@ -540,6 +540,36 @@ pub struct CompilationContext { /// `CryptoSha256`/`CryptoMd5` which dispatch to runtime symbols that /// live behind the perry-stdlib `crypto` feature. pub uses_crypto_builtins: bool, + /// Whether any TS module needs the regular-expression engine — a regex + /// literal / `RegExp`, a regex-coercing string method (`.match` / + /// `.matchAll` / `.search`), or a glob API (`path.matchesGlob` / + /// `fs.glob*`, which compile a glob to a regex internally). When false, + /// the auto-optimize build leaves `perry-runtime/regex-engine` off and the + /// ~1.2 MB `regex`/`fancy-regex` machinery never links. The RegExp object's + /// identity/display layer stays compiled, so non-regex programs still + /// format/compare values correctly. + pub uses_regex: bool, + /// Whether any TS module uses the TC39 `Temporal.*` API. Gates + /// `perry-runtime/temporal` (the `temporal_rs` engine + its transitive + /// tz/calendar deps, ~580 KB). Independent of JS `Date`, which has its own + /// implementation — so a program using `Date` but never `Temporal.*` links + /// none of this. + pub uses_temporal: bool, + /// Whether any TS module uses a WHATWG URL API (`new URL`, the hostname + /// setter, `url.domainToASCII/Unicode`, legacy `url.resolve`, + /// `URLSearchParams`, `URLPattern`). Gates `perry-runtime/url-engine` (the + /// `url` + `idna` crates + transitive `percent_encoding`, ~195 KB). Perry's + /// URL parsing is otherwise hand-rolled, so a program with no URL API links + /// none of the host-canonicalization/IDNA machinery. + pub uses_url: bool, + /// Whether any TS module calls `String.prototype.normalize`. Gates + /// `perry-runtime/string-normalize` (`unicode-normalization`, ~113 KB of + /// NFC/NFD/NFKC/NFKD tables). + pub uses_string_normalize: bool, + /// Whether any TS module constructs an `Intl.Segmenter`. Gates + /// `perry-runtime/intl-segmenter` (`unicode-segmentation`, ~73 KB of UAX #29 + /// grapheme/word/sentence tables). Other `Intl.*` APIs don't need it. + pub uses_intl_segmenter: bool, /// Whether `perry/thread` is imported. When true, the runtime must /// keep `panic = "unwind"` so that worker-thread panics translate to /// promise rejections via `catch_unwind` in `perry-runtime/src/thread.rs` @@ -783,6 +813,11 @@ impl CompilationContext { native_module_imports: BTreeSet::new(), uses_fetch: false, uses_crypto_builtins: false, + uses_regex: false, + uses_temporal: false, + uses_url: false, + uses_string_normalize: false, + uses_intl_segmenter: false, needs_thread: false, cross_module_class_field_types: HashMap::new(), min_windows_version: "10".to_string(), diff --git a/scripts/check_file_size.sh b/scripts/check_file_size.sh index f420141c96..07a080bcbf 100755 --- a/scripts/check_file_size.sh +++ b/scripts/check_file_size.sh @@ -54,6 +54,17 @@ THRESHOLD="${PERRY_FILE_SIZE_THRESHOLD:-2000}" # Allowlist (one file per line; blank lines + `#` comments OK). ALLOWLIST=$(cat <<'EOF' crates/perry-runtime/src/gc/tests.rs +# RegExp runtime trunk. Crossed 2000 LOC (2041) when the user's regex engine +# was gated behind the `regex-engine` cargo feature — the per-fn `#[cfg]` +# attributes, the no-engine fallbacks, and the `CompiledRegex` header type alias +# added ~60 lines. The engine itself is already split across the +# regex/{compile,exec_array,grammar,match_all,replace_expand,replace_fn,escape} +# submodules; the trunk that remains is the always-compiled identity/display +# layer (RegExpHeader + accessors + `is_regex_pointer`, referenced by +# always-linked formatting/dispatch) plus the shared exec/cache state, which +# can't move without scattering the thread-local last-match state. Further +# trunk extraction is a reasonable follow-up. +crates/perry-runtime/src/regex.rs crates/perry-codegen-arkts/src/tests.rs crates/perry-api-manifest/src/entries.rs crates/perry/src/commands/compile.rs