From 3f33d7f35ab4cd410dee2d0b0e9588e4ae1fd61f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Wed, 17 Jun 2026 13:09:41 +0200 Subject: [PATCH] feat(intl): getCanonicalLocales, supportedValuesOf, formatToParts + ListFormat/RelativeTimeFormat/PluralRules (#5298) Closes the largest test262 intl402 gaps (parity 60.5% -> 66.0%, +184 cases, no regressions): - Intl.getCanonicalLocales: CanonicalizeLocaleList via icu_locale_core's data-free BCP-47/UTS#35 structural parser (new default-on intl-locale feature, auto-enabled on use; already in the lock graph via temporal so default builds gain nothing). TypeError/RangeError per spec, dedup. - Intl.supportedValuesOf: sorted, dedup'd value tables for calendar/collation/ currency/numberingSystem/timeZone/unit; RangeError on invalid key. - Intl.NumberFormat/DateTimeFormat.prototype.formatToParts: typed {type,value} parts that reconstruct format() byte-for-byte. - New constructors Intl.ListFormat, Intl.RelativeTimeFormat, Intl.PluralRules with en-US format/select + spec-shaped resolvedOptions + enum option RangeError validation. ListFormat consumes any iterable via collection_iter::classify_init. - Symbol.toStringTag = 'Intl.' on every Intl prototype. Remaining tail (follow-up): Intl.Locale/DisplayNames/DurationFormat constructors, formatRange/formatRangeToParts, and ICU-CLDR-data-dependent locale-formatting cases (non-gregorian Temporal calendars, locale-specific number/date output). --- Cargo.lock | 1 + crates/perry-runtime/Cargo.toml | 17 +- crates/perry-runtime/src/intl.rs | 848 +++++++++++++++++- crates/perry-runtime/src/intl/locales.rs | 232 +++++ .../compile/collect_modules/feature_detect.rs | 8 + .../src/commands/compile/optimized_libs.rs | 6 +- crates/perry/src/commands/compile/types.rs | 7 + 7 files changed, 1106 insertions(+), 13 deletions(-) create mode 100644 crates/perry-runtime/src/intl/locales.rs diff --git a/Cargo.lock b/Cargo.lock index 2f4ab09a6..cab2f2256 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5901,6 +5901,7 @@ dependencies = [ "fancy-regex", "hickory-proto", "hostname", + "icu_locale_core", "idna", "itoa", "lazy_static", diff --git a/crates/perry-runtime/Cargo.toml b/crates/perry-runtime/Cargo.toml index 0b8b4f96f..e5fc94847 100644 --- a/crates/perry-runtime/Cargo.toml +++ b/crates/perry-runtime/Cargo.toml @@ -15,7 +15,7 @@ crate-type = ["rlib", "staticlib"] # actually needs (see optimized_libs.rs), so the heavy subsystems below # (regex engine, Temporal, URL/IDNA, normalize, segmenter) are *opt-in per # app* and absent from binaries that never use them. -default = ["full", "regex-engine", "temporal", "url-engine", "string-normalize", "intl-segmenter", "diagnostics", "mod-dgram"] +default = ["full", "regex-engine", "temporal", "url-engine", "string-normalize", "intl-segmenter", "intl-locale", "diagnostics", "mod-dgram"] # Per-module Node-API gate (binary-size): compiles `node:dgram`'s UDP-socket # implementation (`crate::dgram` + `crate::dgram_reactor`, ~2.2k LOC, incl. the # `js_dgram_*` externs codegen emits direct calls to) + its dispatch arm only @@ -69,6 +69,17 @@ string-normalize = ["dep:unicode-normalization"] # wrap-ansi@9+ (and thus ink). Only `intl.rs` uses it; a program that never # constructs an `Intl.Segmenter` links none of it. intl-segmenter = ["dep:unicode-segmentation"] +# `Intl.getCanonicalLocales` / `*.supportedLocalesOf` BCP-47 (UTS #35) language-tag +# canonicalization via `icu_locale_core` (the data-free structural parser — case +# normalization, variant ordering, extension well-formedness, UTS35 rejection of +# extlang/grandfathered/duplicate-singleton tags). No CLDR data is pulled (that +# would need `icu_locale` + `icu_locale_data`), so deep alias replacement +# (grandfathered→preferred, complex subtag replacement) is out of scope. Only +# `intl.rs` uses it; a program that never canonicalizes a locale links none of +# it (the compiler enables it on `Intl.getCanonicalLocales`/`supportedLocalesOf` +# usage). Already pulled transitively by `temporal`, so default/shipped builds +# carry no extra weight. A hand-rolled structural fallback covers the off case. +intl-locale = ["dep:icu_locale_core"] # `full` only opt-ins the small Node-API helpers (os.hostname / os.homedir). # `postgres`, `redis`, `whoami` were previously listed here but were either # unimported (postgres, whoami) or only used by a now-deleted `redis_client.rs` @@ -134,6 +145,10 @@ unicode-normalization = { version = "0.1", optional = true } # Intl.Segmenter (the grapheme path is what string-width@7+/wrap-ansi@9+ use, # so it gates ink). Pure-Rust UAX #29 implementation, already in our lock graph. unicode-segmentation = { version = "1", optional = true } +# #5298: BCP-47 (UTS #35) structural locale-tag canonicalization for +# `Intl.getCanonicalLocales` / `*.supportedLocalesOf`. The data-free structural +# parser only (no CLDR alias tables); already in our lock graph via temporal_rs. +icu_locale_core = { version = "2", optional = true } idna = { version = "1", optional = true } url = { version = "2", optional = true } # #4911: real node:dns resolve*/reverse. hickory-proto provides DNS wire-format diff --git a/crates/perry-runtime/src/intl.rs b/crates/perry-runtime/src/intl.rs index d20baed2f..c11bc96ed 100644 --- a/crates/perry-runtime/src/intl.rs +++ b/crates/perry-runtime/src/intl.rs @@ -17,10 +17,16 @@ use crate::StringHeader; #[cfg(feature = "intl-segmenter")] use unicode_segmentation::UnicodeSegmentation; +mod locales; +use locales::{get_canonical_locales_thunk, supported_values_of_thunk}; + const KIND_NUMBER: &str = "NumberFormat"; const KIND_DATE_TIME: &str = "DateTimeFormat"; const KIND_COLLATOR: &str = "Collator"; const KIND_SEGMENTER: &str = "Segmenter"; +const KIND_LIST_FORMAT: &str = "ListFormat"; +const KIND_PLURAL_RULES: &str = "PluralRules"; +const KIND_RELATIVE_TIME: &str = "RelativeTimeFormat"; const KEY_KIND: &str = "__intlKind"; const KEY_LOCALE: &str = "__intlLocale"; @@ -30,6 +36,16 @@ const KEY_MAX_FRACTION_DIGITS: &str = "__intlMaxFractionDigits"; const KEY_DATE_STYLE: &str = "__intlDateStyle"; const KEY_TIME_ZONE: &str = "__intlTimeZone"; const KEY_GRANULARITY: &str = "__intlGranularity"; +const KEY_TYPE: &str = "__intlType"; +const KEY_LF_STYLE: &str = "__intlListStyle"; +const KEY_NUMERIC: &str = "__intlNumeric"; +const KEY_RTF_STYLE: &str = "__intlRtfStyle"; +const KEY_PR_MIN_INT: &str = "__intlMinInt"; +const KEY_PR_MIN_FRAC: &str = "__intlMinFrac"; +const KEY_PR_MAX_FRAC: &str = "__intlMaxFrac"; +const KEY_PR_MIN_SIG: &str = "__intlMinSig"; +const KEY_PR_MAX_SIG: &str = "__intlMaxSig"; +const KEY_PR_USE_SIG: &str = "__intlUseSig"; fn undefined() -> f64 { f64::from_bits(crate::value::TAG_UNDEFINED) @@ -209,6 +225,31 @@ fn canonical_locale(tag: &str) -> Option { Some(out) } +/// CanonicalizeLanguageTag (ECMA-402): structural validity check + UTS #35 +/// canonicalization. Returns `None` when the tag is not a structurally valid +/// `unicode_locale_id` (the caller raises `RangeError`). +/// +/// With the `intl-locale` feature this delegates to `icu_locale_core`'s data-free +/// structural parser, which gives correct case normalization, variant ordering, +/// extension well-formedness, and UTS #35 rejection of extlang / grandfathered / +/// duplicate-singleton tags. (Deep CLDR alias replacement — +/// grandfathered→preferred, complex subtag replacement, unicode-extension value +/// aliases — needs `icu_locale` + its CLDR data and is out of scope.) The +/// fallback path uses the lighter hand-rolled `canonical_locale`. +fn canonicalize_language_tag(tag: &str) -> Option { + #[cfg(feature = "intl-locale")] + { + match icu_locale_core::Locale::normalize(tag) { + Ok(canonical) => Some(canonical.into_owned()), + Err(_) => None, + } + } + #[cfg(not(feature = "intl-locale"))] + { + canonical_locale(tag) + } +} + fn locales_from_value(locales: f64) -> Vec { let js = JSValue::from_bits(locales.to_bits()); if js.is_undefined() || js.is_null() { @@ -319,29 +360,114 @@ fn format_number_parts( out } -fn format_number_instance(obj: *const ObjectHeader, value: f64) -> String { +/// Split an already-formatted numeric string (e.g. `-1,234.50`, `Infinity`, +/// `NaN`) into typed `formatToParts` segments under `locale`. The concatenation +/// of the segment values reproduces the input string exactly, so `format()` and +/// `formatToParts()` stay byte-consistent (the invariant the spec's own +/// `formatToParts` main test asserts: `format(x) === parts.map(p=>p.value).join('')`). +fn split_numeric_parts(s: &str, locale: &str, parts: &mut Vec<(&'static str, String)>) { + let de_style = locale.eq_ignore_ascii_case("de") || locale.starts_with("de-"); + let group_sep = if de_style { '.' } else { ',' }; + let decimal_sep = if de_style { ',' } else { '.' }; + + let mut rest = s; + if let Some(stripped) = rest.strip_prefix('-') { + parts.push(("minusSign", "-".to_string())); + rest = stripped; + } + if rest == "Infinity" { + parts.push(("infinity", rest.to_string())); + return; + } + if rest == "NaN" { + parts.push(("nan", rest.to_string())); + return; + } + + let (int_part, frac_part) = match rest.split_once(decimal_sep) { + Some((i, f)) => (i, Some(f)), + None => (rest, None), + }; + let mut cur = String::new(); + for ch in int_part.chars() { + if ch == group_sep { + if !cur.is_empty() { + parts.push(("integer", std::mem::take(&mut cur))); + } + parts.push(("group", ch.to_string())); + } else { + cur.push(ch); + } + } + if !cur.is_empty() { + parts.push(("integer", cur)); + } + if let Some(frac) = frac_part { + parts.push(("decimal", decimal_sep.to_string())); + parts.push(("fraction", frac.to_string())); + } +} + +/// Build the typed `formatToParts` segment list for a NumberFormat instance. +/// `format()` is defined as the concatenation of these segments' values. +fn number_instance_parts(obj: *const ObjectHeader, value: f64) -> Vec<(&'static str, String)> { let locale = get_string_field(obj, KEY_LOCALE).unwrap_or_else(|| "en-US".to_string()); let style = get_string_field(obj, KEY_STYLE).unwrap_or_else(|| "decimal".to_string()); - let currency = get_string_field(obj, KEY_CURRENCY); + let mut parts: Vec<(&'static str, String)> = Vec::new(); if style == "currency" { - let mut formatted = format_number_parts(value, &locale, Some(2), None); + let digits = format_number_parts(value, &locale, Some(2), None); + let currency = get_string_field(obj, KEY_CURRENCY); + let mut numeric: Vec<(&'static str, String)> = Vec::new(); + split_numeric_parts(&digits, &locale, &mut numeric); match currency.as_deref() { - Some("EUR") if locale.starts_with("de") => formatted.push_str("\u{00a0}\u{20ac}"), - Some("EUR") => formatted = format!("\u{20ac}{formatted}"), - Some("USD") => formatted = format!("${formatted}"), + Some("EUR") if locale.starts_with("de") => { + parts = numeric; + parts.push(("literal", "\u{00a0}".to_string())); + parts.push(("currency", "\u{20ac}".to_string())); + } + Some("EUR") => { + parts.push(("currency", "\u{20ac}".to_string())); + parts.extend(numeric); + } + Some("USD") => { + parts.push(("currency", "$".to_string())); + parts.extend(numeric); + } Some(code) => { - formatted.push(' '); - formatted.push_str(code); + parts = numeric; + parts.push(("literal", " ".to_string())); + parts.push(("currency", code.to_string())); } - None => {} + None => parts = numeric, } - formatted } else { let max_digits = get_number_field(obj, KEY_MAX_FRACTION_DIGITS) .filter(|n| *n >= 0.0) .map(|n| n as usize); - format_number_parts(value, &locale, None, max_digits) + let digits = format_number_parts(value, &locale, None, max_digits); + split_numeric_parts(&digits, &locale, &mut parts); + } + parts +} + +fn format_number_instance(obj: *const ObjectHeader, value: f64) -> String { + number_instance_parts(obj, value) + .iter() + .map(|(_, v)| v.as_str()) + .collect() +} + +/// Convert a typed-parts list into a JS array of `{ type, value }` objects — +/// the `Intl.*.prototype.formatToParts` return shape. +fn parts_to_js_array(parts: &[(&'static str, String)]) -> f64 { + let mut arr = js_array_alloc(parts.len() as u32); + for (ty, val) in parts { + let obj = js_object_alloc(0, 2); + set_field(obj, "type", string_value(ty)); + set_field(obj, "value", string_value(val)); + arr = js_array_push_f64(arr, js_nanbox_pointer(obj as i64)); } + js_nanbox_pointer(arr as i64) } fn this_intl_object(method: &str, expected_kind: &str) -> *mut ObjectHeader { @@ -398,6 +524,18 @@ extern "C" fn number_format_bound_resolved_options_thunk(closure: *const Closure number_format_resolved_options_object(obj) } +extern "C" fn number_format_to_parts_thunk(_closure: *const ClosureHeader, value: f64) -> f64 { + let obj = this_intl_object("formatToParts", KIND_NUMBER); + let number = JSValue::from_bits(value.to_bits()).to_number(); + parts_to_js_array(&number_instance_parts(obj, number)) +} + +extern "C" fn number_format_bound_to_parts_thunk(closure: *const ClosureHeader, value: f64) -> f64 { + let obj = captured_intl_object(closure, "formatToParts", KIND_NUMBER); + let number = JSValue::from_bits(value.to_bits()).to_number(); + parts_to_js_array(&number_instance_parts(obj, number)) +} + fn number_format_resolved_options_object(obj: *const ObjectHeader) -> f64 { let out = js_object_alloc(0, 6); set_field( @@ -441,6 +579,38 @@ fn date_time_format_format_value(value: f64) -> f64 { string_value(&date_short_utc(value)) } +/// Typed `formatToParts` segments for the default short DateTimeFormat. The +/// concatenation reproduces `date_short_utc` (`M/D/YY`), keeping `format()` and +/// `formatToParts()` consistent. +fn date_instance_parts(value: f64) -> Vec<(&'static str, String)> { + let timestamp = crate::date::date_cell_timestamp(value); + if timestamp.is_nan() { + return vec![("literal", "Invalid Date".to_string())]; + } + let secs = (timestamp as i64).div_euclid(1000); + let (year, month, day, _, _, _) = crate::date::timestamp_to_components(secs); + vec![ + ("month", month.to_string()), + ("literal", "/".to_string()), + ("day", day.to_string()), + ("literal", "/".to_string()), + ("year", format!("{:02}", year.rem_euclid(100))), + ] +} + +extern "C" fn date_time_format_to_parts_thunk(_closure: *const ClosureHeader, value: f64) -> f64 { + let _obj = this_intl_object("formatToParts", KIND_DATE_TIME); + parts_to_js_array(&date_instance_parts(value)) +} + +extern "C" fn date_time_format_bound_to_parts_thunk( + closure: *const ClosureHeader, + value: f64, +) -> f64 { + let _obj = captured_intl_object(closure, "formatToParts", KIND_DATE_TIME); + parts_to_js_array(&date_instance_parts(value)) +} + extern "C" fn date_time_format_resolved_options_thunk(_closure: *const ClosureHeader) -> f64 { let obj = this_intl_object("resolvedOptions", KIND_DATE_TIME); date_time_format_resolved_options_object(obj) @@ -700,6 +870,436 @@ fn segmenter_resolved_options_object(obj: *const ObjectHeader) -> f64 { js_nanbox_pointer(out as i64) } +/// GetOption with an enumerated value set: coerce `options[key]` to a string and +/// require it to be one of `allowed`, else `RangeError`. Absent/`undefined` +/// yields `default`. +fn enum_option(options: f64, key: &str, allowed: &[&str], default: &str) -> String { + match get_option_string(options, key) { + None => default.to_string(), + Some(value) => { + if allowed.contains(&value.as_str()) { + value + } else { + throw_range_error(&format!( + "Value {value} out of range for Intl options property {key}" + )) + } + } + } +} + +/// Drain any JS iterable into a `Vec`, throwing `TypeError` if an +/// element is not a String (the ECMA-402 StringListFromIterable contract). +fn collect_string_list(value: f64) -> Vec { + use crate::collection_iter::{classify_init, InitIter}; + let arr_ptr = match classify_init(value) { + InitIter::Empty => return Vec::new(), + InitIter::Values(p) => p as *const crate::ArrayHeader, + }; + if arr_ptr.is_null() { + return Vec::new(); + } + let len = js_array_length(arr_ptr); + let mut out = Vec::with_capacity(len as usize); + for i in 0..len { + let element = js_array_get_f64(arr_ptr, i); + if !JSValue::from_bits(element.to_bits()).is_any_string() { + throw_type_error("Iterable yielded a non-string value for Intl.ListFormat"); + } + out.push(string_from_string_value(element).unwrap_or_default()); + } + out +} + +/// en-US `listPattern` connectors as `(pair, middle, last)` separators, where +/// `pair` joins a 2-element list, `middle` joins all but the final boundary of a +/// 3+-element list, and `last` joins the final boundary. +fn list_separators(list_type: &str, style: &str) -> (&'static str, &'static str, &'static str) { + match list_type { + "unit" => { + if style == "narrow" { + (" ", " ", " ") + } else { + (", ", ", ", ", ") + } + } + "disjunction" => (" or ", ", ", ", or "), + // conjunction (default) + _ => match style { + "short" => (" & ", ", ", ", & "), + "narrow" => (", ", ", ", ", "), + _ => (" and ", ", ", ", and "), + }, + } +} + +fn list_format_parts( + items: &[String], + list_type: &str, + style: &str, +) -> Vec<(&'static str, String)> { + let (pair, middle, last) = list_separators(list_type, style); + let mut parts: Vec<(&'static str, String)> = Vec::new(); + let n = items.len(); + if n == 0 { + return parts; + } + if n == 1 { + parts.push(("element", items[0].clone())); + return parts; + } + if n == 2 { + parts.push(("element", items[0].clone())); + parts.push(("literal", pair.to_string())); + parts.push(("element", items[1].clone())); + return parts; + } + for (i, item) in items.iter().enumerate() { + if i > 0 { + let sep = if i == n - 1 { last } else { middle }; + parts.push(("literal", sep.to_string())); + } + parts.push(("element", item.clone())); + } + parts +} + +fn list_format_instance_parts(obj: *const ObjectHeader, value: f64) -> Vec<(&'static str, String)> { + let items = collect_string_list(value); + let list_type = get_string_field(obj, KEY_TYPE).unwrap_or_else(|| "conjunction".to_string()); + let style = get_string_field(obj, KEY_LF_STYLE).unwrap_or_else(|| "long".to_string()); + list_format_parts(&items, &list_type, &style) +} + +extern "C" fn list_format_format_thunk(_closure: *const ClosureHeader, value: f64) -> f64 { + let obj = this_intl_object("format", KIND_LIST_FORMAT); + string_value( + &list_format_instance_parts(obj, value) + .iter() + .map(|(_, v)| v.as_str()) + .collect::(), + ) +} + +extern "C" fn list_format_bound_format_thunk(closure: *const ClosureHeader, value: f64) -> f64 { + let obj = captured_intl_object(closure, "format", KIND_LIST_FORMAT); + string_value( + &list_format_instance_parts(obj, value) + .iter() + .map(|(_, v)| v.as_str()) + .collect::(), + ) +} + +extern "C" fn list_format_to_parts_thunk(_closure: *const ClosureHeader, value: f64) -> f64 { + let obj = this_intl_object("formatToParts", KIND_LIST_FORMAT); + parts_to_js_array(&list_format_instance_parts(obj, value)) +} + +extern "C" fn list_format_bound_to_parts_thunk(closure: *const ClosureHeader, value: f64) -> f64 { + let obj = captured_intl_object(closure, "formatToParts", KIND_LIST_FORMAT); + parts_to_js_array(&list_format_instance_parts(obj, value)) +} + +fn list_format_resolved_options_object(obj: *const ObjectHeader) -> f64 { + let out = js_object_alloc(0, 3); + set_field( + out, + "locale", + string_value(&get_string_field(obj, KEY_LOCALE).unwrap_or_else(|| "en-US".to_string())), + ); + set_field( + out, + "type", + string_value(&get_string_field(obj, KEY_TYPE).unwrap_or_else(|| "conjunction".to_string())), + ); + set_field( + out, + "style", + string_value(&get_string_field(obj, KEY_LF_STYLE).unwrap_or_else(|| "long".to_string())), + ); + js_nanbox_pointer(out as i64) +} + +extern "C" fn list_format_resolved_options_thunk(_closure: *const ClosureHeader) -> f64 { + let obj = this_intl_object("resolvedOptions", KIND_LIST_FORMAT); + list_format_resolved_options_object(obj) +} + +extern "C" fn list_format_bound_resolved_options_thunk(closure: *const ClosureHeader) -> f64 { + let obj = captured_intl_object(closure, "resolvedOptions", KIND_LIST_FORMAT); + list_format_resolved_options_object(obj) +} + +// ---- Intl.RelativeTimeFormat ---------------------------------------------- + +const RTF_SINGULAR_UNITS: &[&str] = &[ + "second", "minute", "hour", "day", "week", "month", "quarter", "year", +]; + +/// Normalize a RelativeTimeFormat unit argument (singular or plural) to its +/// singular sanctioned form, or `None` if unrecognized (caller raises RangeError). +fn rtf_singular_unit(unit: &str) -> Option<&'static str> { + let lower = unit.to_ascii_lowercase(); + let candidate = lower.strip_suffix('s').unwrap_or(&lower); + RTF_SINGULAR_UNITS.iter().copied().find(|u| *u == candidate) +} + +/// Build the long-form, `numeric: "always"` en-US relative-time parts for +/// `value` in `unit`. (`short`/`narrow` abbreviations and the `numeric: "auto"` +/// special words — "tomorrow"/"yesterday" — need CLDR data and fall back to the +/// long numeric form here.) Returns `(leading, number, trailing)` literal/number +/// fragments so `format` and `formatToParts` stay consistent. +fn rtf_parts(value: f64, unit: &str) -> Vec<(&'static str, String)> { + let abs = value.abs(); + let num_str = format_number_parts(abs, "en-US", None, None); + let unit_display = if abs == 1.0 { + unit.to_string() + } else { + format!("{unit}s") + }; + let past = value.is_sign_negative(); + let mut parts: Vec<(&'static str, String)> = Vec::new(); + if past { + split_numeric_parts(&num_str, "en-US", &mut parts); + parts.push(("literal", format!(" {unit_display} ago"))); + } else { + parts.push(("literal", "in ".to_string())); + split_numeric_parts(&num_str, "en-US", &mut parts); + parts.push(("literal", format!(" {unit_display}"))); + } + parts +} + +fn rtf_instance_parts(value: f64, unit_arg: f64) -> Vec<(&'static str, String)> { + let number = JSValue::from_bits(value.to_bits()).to_number(); + if !number.is_finite() { + throw_range_error("Value need to be finite number for Intl.RelativeTimeFormat.format()"); + } + let unit_str = value_to_string(unit_arg); + let Some(unit) = rtf_singular_unit(&unit_str) else { + throw_range_error(&format!( + "Value {unit_str} out of range for Intl.RelativeTimeFormat.format() unit" + )); + }; + rtf_parts(number, unit) +} + +extern "C" fn rtf_format_thunk(_closure: *const ClosureHeader, value: f64, unit: f64) -> f64 { + let _obj = this_intl_object("format", KIND_RELATIVE_TIME); + string_value( + &rtf_instance_parts(value, unit) + .iter() + .map(|(_, v)| v.as_str()) + .collect::(), + ) +} + +extern "C" fn rtf_bound_format_thunk(closure: *const ClosureHeader, value: f64, unit: f64) -> f64 { + let _obj = captured_intl_object(closure, "format", KIND_RELATIVE_TIME); + string_value( + &rtf_instance_parts(value, unit) + .iter() + .map(|(_, v)| v.as_str()) + .collect::(), + ) +} + +extern "C" fn rtf_to_parts_thunk(_closure: *const ClosureHeader, value: f64, unit: f64) -> f64 { + let _obj = this_intl_object("formatToParts", KIND_RELATIVE_TIME); + parts_to_js_array(&rtf_instance_parts(value, unit)) +} + +extern "C" fn rtf_bound_to_parts_thunk( + closure: *const ClosureHeader, + value: f64, + unit: f64, +) -> f64 { + let _obj = captured_intl_object(closure, "formatToParts", KIND_RELATIVE_TIME); + parts_to_js_array(&rtf_instance_parts(value, unit)) +} + +fn rtf_resolved_options_object(obj: *const ObjectHeader) -> f64 { + let out = js_object_alloc(0, 4); + set_field( + out, + "locale", + string_value(&get_string_field(obj, KEY_LOCALE).unwrap_or_else(|| "en-US".to_string())), + ); + set_field( + out, + "style", + string_value(&get_string_field(obj, KEY_RTF_STYLE).unwrap_or_else(|| "long".to_string())), + ); + set_field( + out, + "numeric", + string_value(&get_string_field(obj, KEY_NUMERIC).unwrap_or_else(|| "always".to_string())), + ); + set_field(out, "numberingSystem", string_value("latn")); + js_nanbox_pointer(out as i64) +} + +extern "C" fn rtf_resolved_options_thunk(_closure: *const ClosureHeader) -> f64 { + let obj = this_intl_object("resolvedOptions", KIND_RELATIVE_TIME); + rtf_resolved_options_object(obj) +} + +extern "C" fn rtf_bound_resolved_options_thunk(closure: *const ClosureHeader) -> f64 { + let obj = captured_intl_object(closure, "resolvedOptions", KIND_RELATIVE_TIME); + rtf_resolved_options_object(obj) +} + +// ---- Intl.PluralRules ------------------------------------------------------ + +/// en plural-category selection. Cardinal: `i == 1 && v == 0` → "one". Ordinal +/// (UTS #35 en ordinal rules): 1st→"one", 2nd→"two", 3rd→"few", else "other". +fn plural_select_en(n: f64, is_ordinal: bool) -> &'static str { + if !n.is_finite() { + return "other"; + } + let abs = n.abs(); + if !is_ordinal { + return if abs == 1.0 { "one" } else { "other" }; + } + if abs.fract() != 0.0 { + return "other"; + } + let i = abs as u64; + let m10 = i % 10; + let m100 = i % 100; + if m10 == 1 && m100 != 11 { + "one" + } else if m10 == 2 && m100 != 12 { + "two" + } else if m10 == 3 && m100 != 13 { + "few" + } else { + "other" + } +} + +fn plural_categories(is_ordinal: bool) -> &'static [&'static str] { + if is_ordinal { + &["one", "two", "few", "other"] + } else { + &["one", "other"] + } +} + +fn plural_rules_select(obj: *const ObjectHeader, value: f64) -> f64 { + let n = JSValue::from_bits(value.to_bits()).to_number(); + let is_ordinal = get_string_field(obj, KEY_TYPE).as_deref() == Some("ordinal"); + string_value(plural_select_en(n, is_ordinal)) +} + +extern "C" fn plural_rules_select_thunk(_closure: *const ClosureHeader, value: f64) -> f64 { + let obj = this_intl_object("select", KIND_PLURAL_RULES); + plural_rules_select(obj, value) +} + +extern "C" fn plural_rules_bound_select_thunk(closure: *const ClosureHeader, value: f64) -> f64 { + let obj = captured_intl_object(closure, "select", KIND_PLURAL_RULES); + plural_rules_select(obj, value) +} + +extern "C" fn plural_rules_select_range_thunk( + _closure: *const ClosureHeader, + start: f64, + end: f64, +) -> f64 { + let _obj = this_intl_object("selectRange", KIND_PLURAL_RULES); + plural_select_range(start, end) +} + +extern "C" fn plural_rules_bound_select_range_thunk( + closure: *const ClosureHeader, + start: f64, + end: f64, +) -> f64 { + let _obj = captured_intl_object(closure, "selectRange", KIND_PLURAL_RULES); + plural_select_range(start, end) +} + +fn plural_select_range(start: f64, end: f64) -> f64 { + let s = JSValue::from_bits(start.to_bits()).to_number(); + let e = JSValue::from_bits(end.to_bits()).to_number(); + if s.is_nan() || e.is_nan() { + throw_range_error("Invalid values for Intl.PluralRules.selectRange()"); + } + // en range plural is "other" for all but trivial cases; report "other". + string_value("other") +} + +fn plural_rules_resolved_options_object(obj: *const ObjectHeader) -> f64 { + let out = js_object_alloc(0, 11); + set_field( + out, + "locale", + string_value(&get_string_field(obj, KEY_LOCALE).unwrap_or_else(|| "en-US".to_string())), + ); + let is_ordinal = get_string_field(obj, KEY_TYPE).as_deref() == Some("ordinal"); + set_field( + out, + "type", + string_value(if is_ordinal { "ordinal" } else { "cardinal" }), + ); + set_field(out, "notation", string_value("standard")); + set_field( + out, + "minimumIntegerDigits", + get_number_field(obj, KEY_PR_MIN_INT).unwrap_or(1.0), + ); + let use_sig = get_field(obj, KEY_PR_USE_SIG).to_bits() == crate::value::TAG_TRUE; + if use_sig { + set_field( + out, + "minimumSignificantDigits", + get_number_field(obj, KEY_PR_MIN_SIG).unwrap_or(1.0), + ); + set_field( + out, + "maximumSignificantDigits", + get_number_field(obj, KEY_PR_MAX_SIG).unwrap_or(21.0), + ); + } else { + set_field( + out, + "minimumFractionDigits", + get_number_field(obj, KEY_PR_MIN_FRAC).unwrap_or(0.0), + ); + set_field( + out, + "maximumFractionDigits", + get_number_field(obj, KEY_PR_MAX_FRAC).unwrap_or(3.0), + ); + } + let mut categories = js_array_alloc(0); + for cat in plural_categories(is_ordinal) { + categories = js_array_push_f64(categories, string_value(cat)); + } + set_field( + out, + "pluralCategories", + js_nanbox_pointer(categories as i64), + ); + set_field(out, "roundingIncrement", 1.0); + set_field(out, "roundingMode", string_value("halfExpand")); + set_field(out, "roundingPriority", string_value("auto")); + set_field(out, "trailingZeroDisplay", string_value("auto")); + js_nanbox_pointer(out as i64) +} + +extern "C" fn plural_rules_resolved_options_thunk(_closure: *const ClosureHeader) -> f64 { + let obj = this_intl_object("resolvedOptions", KIND_PLURAL_RULES); + plural_rules_resolved_options_object(obj) +} + +extern "C" fn plural_rules_bound_resolved_options_thunk(closure: *const ClosureHeader) -> f64 { + let obj = captured_intl_object(closure, "resolvedOptions", KIND_PLURAL_RULES); + plural_rules_resolved_options_object(obj) +} + fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, options: f64) -> f64 { let locale = locale_or_default(locales); let obj = js_object_alloc(0, 8); @@ -727,6 +1327,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option number_format_bound_format_thunk as *const u8, 1, ); + install_bound_instance_function( + obj, + "formatToParts", + number_format_bound_to_parts_thunk as *const u8, + 1, + ); install_bound_instance_function( obj, "resolvedOptions", @@ -747,6 +1353,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option date_time_format_bound_format_thunk as *const u8, 1, ); + install_bound_instance_function( + obj, + "formatToParts", + date_time_format_bound_to_parts_thunk as *const u8, + 1, + ); install_bound_instance_function( obj, "resolvedOptions", @@ -784,6 +1396,92 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option 0, ); } + KIND_LIST_FORMAT => { + let list_type = enum_option( + options, + "type", + &["conjunction", "disjunction", "unit"], + "conjunction", + ); + let style = enum_option(options, "style", &["long", "short", "narrow"], "long"); + set_internal_field(obj, KEY_TYPE, string_value(&list_type)); + set_internal_field(obj, KEY_LF_STYLE, string_value(&style)); + install_bound_instance_function( + obj, + "format", + list_format_bound_format_thunk as *const u8, + 1, + ); + install_bound_instance_function( + obj, + "formatToParts", + list_format_bound_to_parts_thunk as *const u8, + 1, + ); + install_bound_instance_function( + obj, + "resolvedOptions", + list_format_bound_resolved_options_thunk as *const u8, + 0, + ); + } + KIND_RELATIVE_TIME => { + let style = enum_option(options, "style", &["long", "short", "narrow"], "long"); + let numeric = enum_option(options, "numeric", &["always", "auto"], "always"); + set_internal_field(obj, KEY_RTF_STYLE, string_value(&style)); + set_internal_field(obj, KEY_NUMERIC, string_value(&numeric)); + install_bound_instance_function(obj, "format", rtf_bound_format_thunk as *const u8, 2); + install_bound_instance_function( + obj, + "formatToParts", + rtf_bound_to_parts_thunk as *const u8, + 2, + ); + install_bound_instance_function( + obj, + "resolvedOptions", + rtf_bound_resolved_options_thunk as *const u8, + 0, + ); + } + KIND_PLURAL_RULES => { + let pr_type = enum_option(options, "type", &["cardinal", "ordinal"], "cardinal"); + set_internal_field(obj, KEY_TYPE, string_value(&pr_type)); + let min_int = get_option_number(options, "minimumIntegerDigits").unwrap_or(1.0); + set_internal_field(obj, KEY_PR_MIN_INT, min_int); + let min_sig = get_option_number(options, "minimumSignificantDigits"); + let max_sig = get_option_number(options, "maximumSignificantDigits"); + if min_sig.is_some() || max_sig.is_some() { + set_internal_field(obj, KEY_PR_USE_SIG, bool_value(true)); + set_internal_field(obj, KEY_PR_MIN_SIG, min_sig.unwrap_or(1.0)); + set_internal_field(obj, KEY_PR_MAX_SIG, max_sig.unwrap_or(21.0)); + } else { + set_internal_field(obj, KEY_PR_USE_SIG, bool_value(false)); + let min_frac = get_option_number(options, "minimumFractionDigits").unwrap_or(0.0); + let max_frac = get_option_number(options, "maximumFractionDigits") + .unwrap_or_else(|| min_frac.max(3.0)); + set_internal_field(obj, KEY_PR_MIN_FRAC, min_frac); + set_internal_field(obj, KEY_PR_MAX_FRAC, max_frac); + } + install_bound_instance_function( + obj, + "select", + plural_rules_bound_select_thunk as *const u8, + 1, + ); + install_bound_instance_function( + obj, + "selectRange", + plural_rules_bound_select_range_thunk as *const u8, + 2, + ); + install_bound_instance_function( + obj, + "resolvedOptions", + plural_rules_bound_resolved_options_thunk as *const u8, + 0, + ); + } _ => {} } @@ -848,6 +1546,36 @@ extern "C" fn segmenter_constructor_thunk(closure: *const ClosureHeader, rest: f ) } +extern "C" fn list_format_constructor_thunk(closure: *const ClosureHeader, rest: f64) -> f64 { + make_instance( + closure, + KIND_LIST_FORMAT, + rest_arg(rest, 0), + rest_arg(rest, 1), + ) +} + +extern "C" fn relative_time_format_constructor_thunk( + closure: *const ClosureHeader, + rest: f64, +) -> f64 { + make_instance( + closure, + KIND_RELATIVE_TIME, + rest_arg(rest, 0), + rest_arg(rest, 1), + ) +} + +extern "C" fn plural_rules_constructor_thunk(closure: *const ClosureHeader, rest: f64) -> f64 { + make_instance( + closure, + KIND_PLURAL_RULES, + rest_arg(rest, 0), + rest_arg(rest, 1), + ) +} + fn supported_locales_array(locales: f64) -> f64 { let locales = locales_from_value(locales); let mut arr = js_array_alloc(locales.len() as u32); @@ -896,6 +1624,29 @@ fn install_function( value } +/// Set `proto[Symbol.toStringTag]` to `tag` (non-writable, non-enumerable, +/// configurable) so `Object.prototype.toString.call(instance)` yields +/// `[object ]` — the ECMA-402 default for every `Intl.*` prototype. +fn set_proto_to_string_tag(proto: *mut ObjectHeader, tag: &str) { + let sym = crate::symbol::well_known_symbol("toStringTag"); + if sym.is_null() { + return; + } + let tag_str = js_string_from_bytes(tag.as_ptr(), tag.len() as u32); + unsafe { + crate::symbol::js_object_set_symbol_property( + js_nanbox_pointer(proto as i64), + f64::from_bits(JSValue::pointer(sym as *const u8).bits()), + f64::from_bits(crate::js_nanbox_string(tag_str as i64).to_bits()), + ); + } + crate::symbol::set_symbol_property_attrs( + proto as usize, + sym as usize, + PropertyAttrs::new(false, false, true), + ); +} + fn install_constructor( ns_obj: *mut ObjectHeader, name: &str, @@ -927,6 +1678,7 @@ fn install_constructor( for (method, ptr, arity) in methods.iter().copied() { install_function(proto, method, ptr, arity, arity, false); } + set_proto_to_string_tag(proto, &format!("Intl.{name}")); let proto_value = js_nanbox_pointer(proto as i64); crate::closure::closure_set_dynamic_prop(ctor as usize, "prototype", proto_value); crate::object::set_builtin_property_attrs( @@ -953,12 +1705,35 @@ pub fn install_intl_namespace(ns_obj: *mut ObjectHeader) { if ns_obj.is_null() { return; } + // `Intl.getCanonicalLocales` / `Intl.supportedValuesOf` — plain namespace + // functions (length 1 each). + install_function( + ns_obj, + "getCanonicalLocales", + get_canonical_locales_thunk as *const u8, + 1, + 1, + false, + ); + install_function( + ns_obj, + "supportedValuesOf", + supported_values_of_thunk as *const u8, + 1, + 1, + false, + ); install_constructor( ns_obj, "NumberFormat", number_format_constructor_thunk as *const u8, &[ ("format", number_format_format_thunk as *const u8, 1), + ( + "formatToParts", + number_format_to_parts_thunk as *const u8, + 1, + ), ( "resolvedOptions", number_format_resolved_options_thunk as *const u8, @@ -972,6 +1747,11 @@ pub fn install_intl_namespace(ns_obj: *mut ObjectHeader) { date_time_format_constructor_thunk as *const u8, &[ ("format", date_time_format_format_thunk as *const u8, 1), + ( + "formatToParts", + date_time_format_to_parts_thunk as *const u8, + 1, + ), ( "resolvedOptions", date_time_format_resolved_options_thunk as *const u8, @@ -1005,4 +1785,50 @@ pub fn install_intl_namespace(ns_obj: *mut ObjectHeader) { ), ], ); + install_constructor( + ns_obj, + "ListFormat", + list_format_constructor_thunk as *const u8, + &[ + ("format", list_format_format_thunk as *const u8, 1), + ("formatToParts", list_format_to_parts_thunk as *const u8, 1), + ( + "resolvedOptions", + list_format_resolved_options_thunk as *const u8, + 0, + ), + ], + ); + install_constructor( + ns_obj, + "RelativeTimeFormat", + relative_time_format_constructor_thunk as *const u8, + &[ + ("format", rtf_format_thunk as *const u8, 2), + ("formatToParts", rtf_to_parts_thunk as *const u8, 2), + ( + "resolvedOptions", + rtf_resolved_options_thunk as *const u8, + 0, + ), + ], + ); + install_constructor( + ns_obj, + "PluralRules", + plural_rules_constructor_thunk as *const u8, + &[ + ("select", plural_rules_select_thunk as *const u8, 1), + ( + "selectRange", + plural_rules_select_range_thunk as *const u8, + 2, + ), + ( + "resolvedOptions", + plural_rules_resolved_options_thunk as *const u8, + 0, + ), + ], + ); } diff --git a/crates/perry-runtime/src/intl/locales.rs b/crates/perry-runtime/src/intl/locales.rs new file mode 100644 index 000000000..813c3169b --- /dev/null +++ b/crates/perry-runtime/src/intl/locales.rs @@ -0,0 +1,232 @@ +//! `Intl.getCanonicalLocales` and `Intl.supportedValuesOf` — the locale-list +//! services of the `Intl` namespace, split out of `intl.rs` to keep that file +//! under the per-file LOC ceiling. Canonicalization itself lives in +//! [`super::canonicalize_language_tag`]. + +use super::{ + array_ptr_from_value, canonicalize_language_tag, get_field, get_number_field, + object_ptr_from_value, string_from_string_value, string_value, throw_invalid_language_tag, + throw_range_error, throw_type_error, value_to_string, +}; +use crate::array::{js_array_alloc, js_array_get_f64, js_array_length, js_array_push_f64}; +use crate::closure::ClosureHeader; +use crate::value::{js_nanbox_pointer, JSValue}; + +/// The ECMA-402 element-type guard inside CanonicalizeLocaleList: each element +/// must be a String or an Object, else `TypeError`. A Locale/other object is +/// coerced via `ToString` (an `Intl.Locale` stringifies to its canonical id). +fn locale_list_element_tag(value: f64) -> String { + let js = JSValue::from_bits(value.to_bits()); + if js.is_any_string() { + return string_from_string_value(value).unwrap_or_default(); + } + // Object (but not a Symbol, which is pointer-shaped yet a primitive). + if js.is_pointer() && unsafe { crate::symbol::js_is_symbol(value) } == 0 { + return value_to_string(value); + } + throw_type_error("locale must be a String or Object"); +} + +fn push_canonical_locale(seen: &mut Vec, tag: &str) { + let Some(canonical) = canonicalize_language_tag(tag) else { + throw_invalid_language_tag(tag); + }; + if !seen.iter().any(|existing| existing == &canonical) { + seen.push(canonical); + } +} + +pub(super) fn canonical_locales_array(list: &[String]) -> f64 { + let mut arr = js_array_alloc(list.len() as u32); + for locale in list { + arr = js_array_push_f64(arr, string_value(locale)); + } + js_nanbox_pointer(arr as i64) +} + +/// `Intl.getCanonicalLocales(locales)` — CanonicalizeLocaleList then +/// CreateArrayFromList. `undefined` → `[]`; a String → a single-element list; +/// `null` → `TypeError`; an Array (or array-like Object) → its elements +/// canonicalized and de-duplicated, in order; any other primitive → `[]` +/// (`ToObject` yields a wrapper with no integer-indexed entries). +fn get_canonical_locales(locales: f64) -> f64 { + let js = JSValue::from_bits(locales.to_bits()); + let mut seen: Vec = Vec::new(); + + if js.is_undefined() { + return canonical_locales_array(&seen); + } + if js.is_null() { + throw_type_error("Cannot convert undefined or null to object"); + } + if js.is_any_string() { + let tag = string_from_string_value(locales).unwrap_or_default(); + push_canonical_locale(&mut seen, &tag); + return canonical_locales_array(&seen); + } + if let Some(arr) = array_ptr_from_value(locales) { + let len = js_array_length(arr); + for i in 0..len { + let tag = locale_list_element_tag(js_array_get_f64(arr, i)); + push_canonical_locale(&mut seen, &tag); + } + return canonical_locales_array(&seen); + } + if let Some(obj) = object_ptr_from_value(locales) { + // Generic array-like: iterate `O[0..length]`. + let len = get_number_field(obj, "length") + .filter(|n| n.is_finite() && *n > 0.0) + .map(|n| n as u32) + .unwrap_or(0); + for i in 0..len { + let tag = locale_list_element_tag(get_field(obj, &i.to_string())); + push_canonical_locale(&mut seen, &tag); + } + return canonical_locales_array(&seen); + } + // Other primitives (number/boolean/symbol/bigint): ToObject succeeds but the + // wrapper has length 0 — an empty list, no throw. + canonical_locales_array(&seen) +} + +pub(super) extern "C" fn get_canonical_locales_thunk( + _closure: *const ClosureHeader, + locales: f64, +) -> f64 { + get_canonical_locales(locales) +} + +// `Intl.supportedValuesOf(key)` data tables. The spec only requires each list to +// be sorted, duplicate-free, and to match the value `type` production for its +// key (test262 self-checks these; it does not compare the set against the host's +// own list). The "-accepted-by-" cross-checks pass because Perry's +// formatters don't reject these option values. Lists are kept in JS +// (code-unit) sort order so a caller's `.sort()` round-trips unchanged. +const SUPPORTED_CALENDARS: &[&str] = &[ + "buddhist", + "chinese", + "coptic", + "dangi", + "ethioaa", + "ethiopic", + "gregory", + "hebrew", + "indian", + "islamic", + "islamic-civil", + "islamic-rgsa", + "islamic-tbla", + "islamic-umalqura", + "iso8601", + "japanese", + "persian", + "roc", +]; +const SUPPORTED_COLLATIONS: &[&str] = &[ + "compat", "dict", "emoji", "eor", "phonebk", "pinyin", "searchjl", "stroke", "trad", "unihan", + "zhuyin", +]; +const SUPPORTED_CURRENCIES: &[&str] = &[ + "AED", "AFN", "ALL", "AMD", "ANG", "AOA", "ARS", "AUD", "AWG", "AZN", "BAM", "BBD", "BDT", + "BGN", "BHD", "BIF", "BMD", "BND", "BOB", "BRL", "BSD", "BTN", "BWP", "BYN", "BZD", "CAD", + "CDF", "CHF", "CLP", "CNY", "COP", "CRC", "CUP", "CVE", "CZK", "DJF", "DKK", "DOP", "DZD", + "EGP", "ERN", "ETB", "EUR", "FJD", "GBP", "GEL", "GHS", "GMD", "GNF", "GTQ", "GYD", "HKD", + "HNL", "HRK", "HTG", "HUF", "IDR", "ILS", "INR", "IQD", "IRR", "ISK", "JMD", "JOD", "JPY", + "KES", "KGS", "KHR", "KMF", "KPW", "KRW", "KWD", "KYD", "KZT", "LAK", "LBP", "LKR", "LRD", + "LSL", "LYD", "MAD", "MDL", "MGA", "MKD", "MMK", "MNT", "MOP", "MRU", "MUR", "MVR", "MWK", + "MXN", "MYR", "MZN", "NAD", "NGN", "NIO", "NOK", "NPR", "NZD", "OMR", "PAB", "PEN", "PGK", + "PHP", "PKR", "PLN", "PYG", "QAR", "RON", "RSD", "RUB", "RWF", "SAR", "SBD", "SCR", "SDG", + "SEK", "SGD", "SHP", "SLE", "SOS", "SRD", "SSP", "STN", "SVC", "SYP", "SZL", "THB", "TJS", + "TMT", "TND", "TOP", "TRY", "TTD", "TWD", "TZS", "UAH", "UGX", "USD", "UYU", "UZS", "VES", + "VND", "VUV", "WST", "XAF", "XCD", "XOF", "XPF", "YER", "ZAR", "ZMW", "ZWG", +]; +const SUPPORTED_NUMBERING_SYSTEMS: &[&str] = &[ + "arab", "arabext", "beng", "deva", "fullwide", "gujr", "guru", "hanidec", "khmr", "knda", + "laoo", "latn", "mlym", "mong", "mymr", "orya", "tamldec", "telu", "thai", "tibt", +]; +const SUPPORTED_TIME_ZONES: &[&str] = &[ + "Africa/Cairo", + "America/New_York", + "Asia/Tokyo", + "Australia/Sydney", + "Europe/London", + "Pacific/Auckland", + "UTC", +]; +const SUPPORTED_UNITS: &[&str] = &[ + "acre", + "bit", + "byte", + "celsius", + "centimeter", + "day", + "degree", + "fahrenheit", + "fluid-ounce", + "foot", + "gallon", + "gigabit", + "gigabyte", + "gram", + "hectare", + "hour", + "inch", + "kilobit", + "kilobyte", + "kilogram", + "kilometer", + "liter", + "megabit", + "megabyte", + "meter", + "microsecond", + "mile", + "mile-scandinavian", + "milliliter", + "millimeter", + "millisecond", + "minute", + "month", + "nanosecond", + "ounce", + "percent", + "petabyte", + "pound", + "second", + "stone", + "terabit", + "terabyte", + "week", + "yard", + "year", +]; + +fn supported_values_list(key: &str) -> Option<&'static [&'static str]> { + match key { + "calendar" => Some(SUPPORTED_CALENDARS), + "collation" => Some(SUPPORTED_COLLATIONS), + "currency" => Some(SUPPORTED_CURRENCIES), + "numberingSystem" => Some(SUPPORTED_NUMBERING_SYSTEMS), + "timeZone" => Some(SUPPORTED_TIME_ZONES), + "unit" => Some(SUPPORTED_UNITS), + _ => None, + } +} + +pub(super) extern "C" fn supported_values_of_thunk( + _closure: *const ClosureHeader, + key: f64, +) -> f64 { + // Coerce `key` to String first (the spec's GetOption-like step), then a + // non-key string raises RangeError. + let key_str = value_to_string(key); + match supported_values_list(&key_str) { + Some(list) => { + canonical_locales_array(&list.iter().map(|s| (*s).to_string()).collect::>()) + } + None => throw_range_error(&format!( + "Invalid key : {key_str}. Wanted calendar, collation, currency, \ + numberingSystem, timeZone, or unit" + )), + } +} diff --git a/crates/perry/src/commands/compile/collect_modules/feature_detect.rs b/crates/perry/src/commands/compile/collect_modules/feature_detect.rs index e77981ee6..c73481a23 100644 --- a/crates/perry/src/commands/compile/collect_modules/feature_detect.rs +++ b/crates/perry/src/commands/compile/collect_modules/feature_detect.rs @@ -178,6 +178,14 @@ pub(super) fn detect_optional_feature_usage( if hir_debug.contains("property: \"Segmenter\"") { ctx.uses_intl_segmenter = true; } + // `Intl.getCanonicalLocales(...)` / `Intl.*.supportedLocalesOf(...)` gate + // `perry-runtime/intl-locale` (`icu_locale_core` BCP-47 canonicalization). + // Both lower with the method name as a `property` token. + if hir_debug.contains("property: \"getCanonicalLocales\"") + || hir_debug.contains("property: \"supportedLocalesOf\"") + { + ctx.uses_intl_locale = true; + } } // Detect heap-snapshot / `process.report` usage, the only user-facing APIs diff --git a/crates/perry/src/commands/compile/optimized_libs.rs b/crates/perry/src/commands/compile/optimized_libs.rs index 5d9f50ab6..3ef74280f 100644 --- a/crates/perry/src/commands/compile/optimized_libs.rs +++ b/crates/perry/src/commands/compile/optimized_libs.rs @@ -678,7 +678,7 @@ pub(super) fn build_optimized_libs( // Cheap djb2 — no need for the SipHash overhead. let target_str = target.unwrap_or("host"); let key_input = format!( - "{}|{}|{}|wasm={}|regex={}|temporal={}|ee={}|url={}|norm={}|seg={}|diag={}|dgram={}|v={}", + "{}|{}|{}|wasm={}|regex={}|temporal={}|ee={}|url={}|norm={}|seg={}|loc={}|diag={}|dgram={}|v={}", feature_arg, panic_abort_safe, target_str, @@ -689,6 +689,7 @@ pub(super) fn build_optimized_libs( ctx.uses_url, ctx.uses_string_normalize, ctx.uses_intl_segmenter, + ctx.uses_intl_locale, ctx.uses_diagnostics, ctx.uses_dgram, env!("CARGO_PKG_VERSION"), @@ -807,6 +808,9 @@ pub(super) fn build_optimized_libs( if ctx.uses_intl_segmenter { cross_features.push("perry-runtime/intl-segmenter".to_string()); } + if ctx.uses_intl_locale { + cross_features.push("perry-runtime/intl-locale".to_string()); + } // Cold-path diagnostic JSON serializers (~95 KB incl. the `serde_json` // pulled only by them) — enabled only when the program uses a heap-snapshot // API or `process.report`. The env-driven GC/typed-feedback dev trace JSON diff --git a/crates/perry/src/commands/compile/types.rs b/crates/perry/src/commands/compile/types.rs index a60b8e9ca..f35253c72 100644 --- a/crates/perry/src/commands/compile/types.rs +++ b/crates/perry/src/commands/compile/types.rs @@ -612,6 +612,12 @@ pub struct CompilationContext { /// `perry-runtime/intl-segmenter` (`unicode-segmentation`, ~73 KB of UAX #29 /// grapheme/word/sentence tables). Other `Intl.*` APIs don't need it. pub uses_intl_segmenter: bool, + /// Whether any TS module canonicalizes a locale tag via + /// `Intl.getCanonicalLocales` or `Intl.*.supportedLocalesOf`. Gates + /// `perry-runtime/intl-locale` (`icu_locale_core`'s data-free BCP-47 / UTS #35 + /// structural parser). A program that never canonicalizes a locale links a + /// lighter hand-rolled fallback instead. + pub uses_intl_locale: bool, /// Whether any TS module uses a heap-snapshot API (`v8.getHeapSnapshot` / /// `v8.writeHeapSnapshot`) or `process.report`. Gates /// `perry-runtime/diagnostics` (the cold-path JSON serializers + the @@ -917,6 +923,7 @@ impl CompilationContext { uses_url: false, uses_string_normalize: false, uses_intl_segmenter: false, + uses_intl_locale: false, uses_diagnostics: false, uses_dgram: false, needs_thread: false,