diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 53f7d2db93..7ef302796a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,6 +84,18 @@ jobs: python3 scripts/gc_store_site_inventory.py --self-test python3 scripts/gc_store_site_inventory.py + # Handle-vs-pointer address classification audit: POINTER_TAG payloads + # can be heap pointers OR small registry handles (fetch/zlib/proxy/...), + # and code must classify by magnitude through value/addr_class.rs before + # dereferencing. Catches new hand-typed band literals (0x100000 etc.) + # and new `as *const GcHeader` casts outside addr_class.rs / gc/ before + # they become Linux-only segfaults (#1843, #4004, #4665, #4800 class). + # Allowlist: scripts/addr_class_allowlist.txt. + - name: Address-classification audit + run: | + python3 scripts/addr_class_inventory.py --self-test + python3 scripts/addr_class_inventory.py + # --------------------------------------------------------------------------- # API docs drift gate (#465) # diff --git a/crates/perry-runtime/src/array/flat_clone.rs b/crates/perry-runtime/src/array/flat_clone.rs index 369983bec1..68de51a6af 100644 --- a/crates/perry-runtime/src/array/flat_clone.rs +++ b/crates/perry-runtime/src/array/flat_clone.rs @@ -276,7 +276,7 @@ pub extern "C" fn js_array_clone(src: *const ArrayHeader) -> *mut ArrayHeader { // as pointer-shaped ids. `Array.from(handle)` / `[...handle]` reach this // helper after codegen strips the tag, so ask the generic iterator resolver // before treating the id as a non-array and returning []. - if raw_addr > 0 && raw_addr < 0x100000 { + if crate::value::addr_class::is_small_handle(raw_addr) { if let Some(dispatch) = crate::object::handle_property_dispatch() { let method = b"@@iterator"; let iter_fn = unsafe { dispatch(raw_addr as i64, method.as_ptr(), method.len()) }; diff --git a/crates/perry-runtime/src/array/is_array.rs b/crates/perry-runtime/src/array/is_array.rs index 643ba845f4..558f7f139c 100644 --- a/crates/perry-runtime/src/array/is_array.rs +++ b/crates/perry-runtime/src/array/is_array.rs @@ -42,7 +42,7 @@ pub extern "C" fn js_array_is_array(value: f64) -> f64 { if raw_ptr.is_null() { return false_val; } - if (raw_ptr as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(raw_ptr as usize) { return false_val; } diff --git a/crates/perry-runtime/src/array/iterator.rs b/crates/perry-runtime/src/array/iterator.rs index 938a4dab32..dbfe8b5865 100644 --- a/crates/perry-runtime/src/array/iterator.rs +++ b/crates/perry-runtime/src/array/iterator.rs @@ -122,7 +122,7 @@ pub(crate) fn entries_array_for_small_handle_value(value: f64) -> Option<*mut Ar } pub(crate) fn entries_array_for_small_handle_id(id: i64) -> Option<*mut ArrayHeader> { - if id <= 0 || id >= 0x100000 { + if id <= 0 || !crate::value::addr_class::is_small_handle(id as usize) { return None; } let dispatch = crate::object::handle_method_dispatch()?; @@ -726,16 +726,16 @@ pub extern "C" fn js_array_spread_append(dest: *mut ArrayHeader, source: f64) -> /// (now inherited) `[Symbol.iterator]` method. pub(crate) fn is_builtin_iterator_class_id(raw_ptr: usize) -> bool { // Native handle ids (Web-Fetch Headers/Request/Response, streams, ws, DB, - // …) are NaN-boxed POINTER values in the small-handle band `[1, 0x100000)`: - // registry indices, NOT heap pointers. Dereferencing `raw_ptr - 8` as a - // GcHeader for one of them reads unmapped memory and segfaults — e.g. - // `for (const [k, v] of response.headers)`, where the lazy `for…of` - // protocol (#4786) routes the Headers handle (id >= 0x40000) through - // `js_get_iterator`, which calls this check. Reject the whole handle band, - // matching the `< 0x100000` floor used by `Array.isArray` and - // `try_dispatch_instance_method_value`. A real built-in iterator is always a - // heap object well above this floor, so this never loses a true match. - if raw_ptr < 0x100000 { + // …) are NaN-boxed POINTER values in the small-handle band (see + // `value::addr_class`): registry indices, NOT heap pointers. Dereferencing + // `raw_ptr - 8` as a GcHeader for one of them reads unmapped memory and + // segfaults — e.g. `for (const [k, v] of response.headers)` (#4800), where + // the lazy `for…of` protocol (#4786) routes the Headers handle + // (id >= 0x40000) through `js_get_iterator`, which calls this check. + // Reject the whole handle band, matching `Array.isArray` and + // `try_dispatch_instance_method_value`. A real built-in iterator is always + // a heap object well above this floor, so this never loses a true match. + if crate::value::addr_class::is_handle_band(raw_ptr) { return false; } unsafe { diff --git a/crates/perry-runtime/src/builtins/arithmetic.rs b/crates/perry-runtime/src/builtins/arithmetic.rs index 3281c80652..6df51f96b5 100644 --- a/crates/perry-runtime/src/builtins/arithmetic.rs +++ b/crates/perry-runtime/src/builtins/arithmetic.rs @@ -57,14 +57,15 @@ pub extern "C" fn js_eq(a: JSValue, b: JSValue) -> JSValue { /// Whether `v` has ECMAScript Type Object (not a primitive). True for plain /// objects, arrays, functions, Dates and boxed primitive wrappers; false for /// Symbols (which are NaN-boxed pointers but are primitives) and for the -/// native-handle id-space below `0x100000` (sockets, zlib streams, …) which -/// must not be dereferenced as heap objects in a coercion path. +/// native-handle id-space (sockets, zlib streams, … — see +/// `value::addr_class`) which must not be dereferenced as heap objects in a +/// coercion path. fn eq_is_object(v: JSValue) -> bool { if !v.is_pointer() { return false; } let ptr = v.as_pointer::(); - if ptr.is_null() || (ptr as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr as usize) { return false; } !crate::symbol::is_registered_symbol(ptr as usize) @@ -508,7 +509,7 @@ pub extern "C" fn js_value_typeof(value: f64) -> *mut StringHeader { get_cached(&TYPEOF_OBJECT, "object") }; } - if !ptr.is_null() && (ptr as usize) >= 0x100000 { + if crate::value::addr_class::is_above_handle_band(ptr as usize) { // Symbols: registered in SYMBOL_POINTERS (handles both gc_malloc'd // and Box-leaked symbols, which have no GcHeader). if crate::symbol::is_registered_symbol(ptr as usize) { diff --git a/crates/perry-runtime/src/builtins/console.rs b/crates/perry-runtime/src/builtins/console.rs index f0c8c2f856..3ae853b3b9 100644 --- a/crates/perry-runtime/src/builtins/console.rs +++ b/crates/perry-runtime/src/builtins/console.rs @@ -637,7 +637,7 @@ pub(crate) fn is_console_instance_value(value: f64) -> bool { return false; } let obj = jsval.as_pointer::(); - if obj.is_null() || (obj as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(obj as usize) { return false; } unsafe { (*obj).class_id == CONSOLE_INSTANCE_CLASS_ID } diff --git a/crates/perry-runtime/src/builtins/formatting.rs b/crates/perry-runtime/src/builtins/formatting.rs index 01753dc08e..88dbb0b5c3 100644 --- a/crates/perry-runtime/src/builtins/formatting.rs +++ b/crates/perry-runtime/src/builtins/formatting.rs @@ -825,7 +825,7 @@ pub(crate) fn format_jsvalue(value: f64, depth: usize) -> String { // is smaller than an ObjectHeader). crate::temporal::temporal_inspect_string(value) .unwrap_or_else(|| "[object Object]".to_string()) - } else if (ptr as usize) < 0x100000 { + } else if crate::value::addr_class::is_handle_band(ptr as usize) { // Refs #421: Web Fetch (and other) handles are NaN-boxed // POINTER_TAG values whose payload is a small registry id, NOT // a heap pointer — reading the GC header at `ptr - 8` would @@ -1530,7 +1530,7 @@ fn format_jsvalue_for_json(value: f64, depth: usize) -> String { // Temporal value inside an inspected object → `Temporal.X `. crate::temporal::temporal_inspect_string(value) .unwrap_or_else(|| "[object Object]".to_string()) - } else if (ptr as usize) < 0x100000 { + } else if crate::value::addr_class::is_handle_band(ptr as usize) { "[object Object]".to_string() } else if crate::symbol::is_registered_symbol(ptr as usize) || crate::regex::is_registered_regex(ptr as usize) diff --git a/crates/perry-runtime/src/builtins/formatting/boxed_primitives.rs b/crates/perry-runtime/src/builtins/formatting/boxed_primitives.rs index 7453500e78..64dab5528d 100644 --- a/crates/perry-runtime/src/builtins/formatting/boxed_primitives.rs +++ b/crates/perry-runtime/src/builtins/formatting/boxed_primitives.rs @@ -39,7 +39,7 @@ pub(super) unsafe fn boxed_primitive_base_for_object( unsafe fn boxed_primitive_payload_for_object( obj_ptr: *const crate::object::ObjectHeader, ) -> Option<(u32, f64)> { - if (obj_ptr as usize) < 0x100000 || !crate::object::is_valid_obj_ptr(obj_ptr as *const u8) { + if !crate::value::addr_class::is_plausible_heap_addr(obj_ptr as usize) { return None; } let class_id = (*obj_ptr).class_id; @@ -196,7 +196,7 @@ pub(crate) fn boxed_primitive_payload(value: f64) -> Option<(u32, f64)> { let bits = value.to_bits(); let ptr = if jv.is_pointer() { jv.as_pointer::() as *mut crate::object::ObjectHeader - } else if (bits >> 48) == 0 && bits >= 0x100000 { + } else if (bits >> 48) == 0 && crate::value::addr_class::is_above_handle_band(bits as usize) { bits as *mut crate::object::ObjectHeader } else { return None; @@ -204,12 +204,13 @@ pub(crate) fn boxed_primitive_payload(value: f64) -> Option<(u32, f64)> { // This is a defensive type-probe over arbitrary `f64` bits, so a candidate // that isn't a real heap object must be rejected *before* the `class_id` // read — otherwise a small subnormal double (e.g. raw bits `0x2800000207`) - // that slips through the `>= 0x100000` raw-pointer heuristic is dereferenced - // as an `ObjectHeader` and faults. Keep the `0x100000` small-handle floor - // (the fetch/Headers id-space lives below it and `is_valid_obj_ptr`'s Linux - // `HEAP_MIN` of `0x1000` would otherwise let those handles through), and - // additionally gate on the real heap range (#4099). - if (ptr as usize) < 0x100000 || !crate::object::is_valid_obj_ptr(ptr as *const u8) { + // that slips through the raw-pointer heuristic above is dereferenced as an + // `ObjectHeader` and faults. `is_plausible_heap_addr` keeps the + // small-handle floor (the fetch/Headers id-space lives below it and + // `is_valid_obj_ptr`'s Linux `HEAP_MIN` of `0x1000` would otherwise let + // those handles through) and additionally gates on the real heap range + // (#4099). + if !crate::value::addr_class::is_plausible_heap_addr(ptr as usize) { return None; } unsafe { boxed_primitive_payload_for_object(ptr) } diff --git a/crates/perry-runtime/src/builtins/formatting/util_format.rs b/crates/perry-runtime/src/builtins/formatting/util_format.rs index 38b72d563f..9cd684a509 100644 --- a/crates/perry-runtime/src/builtins/formatting/util_format.rs +++ b/crates/perry-runtime/src/builtins/formatting/util_format.rs @@ -44,7 +44,7 @@ unsafe fn util_format_json_value_has_cycle(value: f64, stack: &mut Vec) - unsafe fn util_format_json_ptr_has_cycle(ptr: *const u8, stack: &mut Vec) -> bool { let addr = ptr as usize; - if addr < 0x100000 + if crate::value::addr_class::is_handle_band(addr) || crate::buffer::is_registered_buffer(addr) || crate::symbol::is_registered_symbol(addr) { diff --git a/crates/perry-runtime/src/builtins/numbers.rs b/crates/perry-runtime/src/builtins/numbers.rs index b9f336ef0e..d1484b6302 100644 --- a/crates/perry-runtime/src/builtins/numbers.rs +++ b/crates/perry-runtime/src/builtins/numbers.rs @@ -474,7 +474,9 @@ pub extern "C" fn js_number_coerce(value: f64) -> f64 { // identifiers, so test assertions like `typeof x === "number"` // hold). Gate on the timer registry so unrelated small handles // (UI widgets, drizzle, etc.) still fall through to toPrimitive. - if id > 0 && id < 0x100000 && crate::timer::is_known_timer_id(id) { + if crate::value::addr_class::is_small_handle(id as usize) + && crate::timer::is_known_timer_id(id) + { return id as f64; } // Array → ToPrimitive(number) finds no `valueOf` override, so it diff --git a/crates/perry-runtime/src/closure/dynamic_props.rs b/crates/perry-runtime/src/closure/dynamic_props.rs index 49944edda1..8a2185b275 100644 --- a/crates/perry-runtime/src/closure/dynamic_props.rs +++ b/crates/perry-runtime/src/closure/dynamic_props.rs @@ -271,17 +271,15 @@ pub fn scan_closure_dynamic_props_roots_mut(visitor: &mut crate::gc::RuntimeRoot /// Check if a raw pointer points to a ClosureHeader by checking CLOSURE_MAGIC at offset 12. /// Safe to call with any non-null, sufficiently aligned pointer >= 0x10000. pub fn is_closure_ptr(ptr: usize) -> bool { - // Reject the native / Web-Fetch small-handle band (< 0x100000). Fetch - // handles (Headers/Request/Response/Blob, [0x40000, 0x100000)), node:http - // handles, and revocable-proxy ids ([0xF0000, 0x100000)) are NaN-boxed - // POINTER_TAG values holding a small registry id, not heap pointers — a - // real closure is always a heap allocation well above 0x100000. The old - // 0x10000 floor let a 0x40000 Headers handle through, so the - // `*(ptr + 12)` CLOSURE_MAGIC probe below dereferenced unmapped low - // memory and SIGSEGVd on Linux (macOS masked it via the much higher - // is_valid_obj_ptr heap floor). 0x100000 matches the cutoff used across - // the object field-read paths (field_get_set.rs / class_registry.rs). - if ptr < 0x100000 { + // Reject the native / Web-Fetch small-handle band (see + // `value::addr_class` for the band map). Fetch handles, node:http + // handles, and revocable-proxy ids are NaN-boxed POINTER_TAG values + // holding a small registry id, not heap pointers — a real closure is + // always a heap allocation above the band. The old 0x10000 floor let a + // 0x40000 Headers handle through, so the `*(ptr + 12)` CLOSURE_MAGIC + // probe below dereferenced unmapped low memory and SIGSEGVd on Linux + // (macOS masked it via the much higher is_valid_obj_ptr heap floor). + if crate::value::addr_class::is_handle_band(ptr) { return false; } if ptr % std::mem::align_of::() != 0 { diff --git a/crates/perry-runtime/src/date.rs b/crates/perry-runtime/src/date.rs index ce83514ad6..39ae25ac35 100644 --- a/crates/perry-runtime/src/date.rs +++ b/crates/perry-runtime/src/date.rs @@ -63,19 +63,20 @@ pub fn date_invalid() -> f64 { pub fn is_date_cell_addr(addr: usize) -> bool { // #4004: small-handle registry ids (Web Fetch Request/Headers/Response, // perry-ffi/node:http handles, timer ids, …) are NaN-boxed as POINTER_TAG - // values but are NOT real heap addresses — they live in the `< 0x100000` - // small-handle band. Real `DateCell`s are arena-allocated, always at or - // above the small-handle cutoff. Dereferencing `addr - GC_HEADER_SIZE` on a - // small handle reads unmapped memory: once #4018 moved fetch handles up to - // 0x40000 (past the old 0x1000 floor), any untyped `request.headers.get()` - // dispatch routed its receiver through `is_date_value` here and segfaulted. - // Reject the whole small-handle band so this is an exact heap-pointer check. - if addr < 0x100000 || !crate::object::is_valid_obj_ptr(addr as *const u8) { - return false; - } + // values but are NOT real heap addresses — they live in the small-handle + // band (see `value::addr_class`). Real `DateCell`s are arena-allocated, + // always above the small-handle cutoff. Dereferencing + // `addr - GC_HEADER_SIZE` on a small handle reads unmapped memory: once + // #4018 moved fetch handles up to 0x40000 (past the old 0x1000 floor), any + // untyped `request.headers.get()` dispatch routed its receiver through + // `is_date_value` here and segfaulted. `try_read_gc_header` rejects the + // whole band before the deref, so this is an exact heap-pointer check. unsafe { - let header = (addr - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader; - if (*header).obj_type != crate::gc::GC_TYPE_DATE_CELL { + let header = match crate::value::addr_class::try_read_gc_header(addr) { + Some(header) => header, + None => return false, + }; + if header.obj_type != crate::gc::GC_TYPE_DATE_CELL { return false; } // #4003: `Buffer`s are raw-`alloc`'d with NO `GcHeader`, so the word at diff --git a/crates/perry-runtime/src/fs/mod.rs b/crates/perry-runtime/src/fs/mod.rs index e7cb91adf7..8f33df5ffa 100644 --- a/crates/perry-runtime/src/fs/mod.rs +++ b/crates/perry-runtime/src/fs/mod.rs @@ -128,7 +128,7 @@ fn object_class_id(value: f64) -> Option { return None; } let obj = js_value.as_pointer::(); - if obj.is_null() || (obj as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(obj as usize) { return None; } unsafe { diff --git a/crates/perry-runtime/src/json/raw_json.rs b/crates/perry-runtime/src/json/raw_json.rs index b3eb2508f7..fcd317f252 100644 --- a/crates/perry-runtime/src/json/raw_json.rs +++ b/crates/perry-runtime/src/json/raw_json.rs @@ -149,7 +149,7 @@ pub unsafe extern "C" fn js_json_is_raw_json(value: f64) -> f64 { return false_val; } let ptr = (bits & crate::value::POINTER_MASK) as *const u8; - if ptr.is_null() || (ptr as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr as usize) { return false_val; } if ptr_is_raw_json_wrapper(ptr) { diff --git a/crates/perry-runtime/src/json/reviver.rs b/crates/perry-runtime/src/json/reviver.rs index 1ac4b4e52b..d6d75e19dc 100644 --- a/crates/perry-runtime/src/json/reviver.rs +++ b/crates/perry-runtime/src/json/reviver.rs @@ -320,7 +320,7 @@ unsafe fn force_materialize_if_lazy(value: JSValue) -> JSValue { return value; } let ptr = (bits & 0x0000_FFFF_FFFF_FFFF) as *const u8; - if ptr.is_null() || (ptr as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr as usize) { return value; } let gc_header = ptr.sub(crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader; @@ -416,7 +416,7 @@ unsafe fn json_is_object(value: f64) -> bool { return true; } if let Some(ptr) = extract_pointer(value.to_bits()) { - if ptr.is_null() || (ptr as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr as usize) { return false; } return gc_obj_type(ptr) == crate::gc::GC_TYPE_OBJECT; diff --git a/crates/perry-runtime/src/map.rs b/crates/perry-runtime/src/map.rs index dda6f59e7d..9d8013ab6d 100644 --- a/crates/perry-runtime/src/map.rs +++ b/crates/perry-runtime/src/map.rs @@ -69,38 +69,31 @@ fn register_map(ptr: *mut MapHeader) { } pub fn is_registered_map(addr: usize) -> bool { - // Fast pre-filter: managed Maps carry `GcHeader.obj_type == - // GC_TYPE_MAP` at `addr - GC_HEADER_SIZE`. A single i8 load + cmp - // short-circuits the non-Map path (the common case across the - // typed-dispatch chain `if is_registered_map { ... } else if - // is_registered_set { ... } ...`) without paying the - // `HashSet::contains` SipHash. The HashSet check still runs - // on byte-matches to defend against: - // 1. False-positive aliasing — another managed object or a non-GC - // allocation (for example a small BufferHeader slab entry) whose - // preceding byte happens to read as 8. - // 2. Stale post-sweep ptrs — drop_map_index removes from - // MAP_REGISTRY; the GcHeader byte may persist until the slot - // is reused. - // Profile (samply, perf-comprehensive): ~5.7% inclusive samples - // were attributed to is_registered_map's HashSet lookup before - // this fast path landed. // #4004: small-handle registry ids (Web Fetch, perry-ffi/node:http, timers, - // …) are NaN-boxed POINTER_TAG values living below the `0x100000` - // small-handle cutoff; they are not heap addresses. Managed Maps are - // arena-allocated above it, so reject the whole small-handle band before - // dereferencing `addr - GC_HEADER_SIZE` (deref'ing e.g. a 0x40000 fetch - // handle reads unmapped memory and segfaults — see is_date_cell_addr). - if addr < 0x100000 { + // …) are NaN-boxed POINTER_TAG values living below the small-handle + // cutoff; they are not heap addresses. Managed Maps are arena-allocated + // above it. See `value::addr_class` for the band map. + if crate::value::addr_class::is_handle_band(addr) { return false; } - unsafe { - let header = (addr - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader; - if (*header).obj_type != crate::gc::GC_TYPE_MAP { - return false; - } + // Registry FIRST: it is authoritative and dereference-free (mirrors + // set::is_registered_set, #4665). The previous ordering probed + // `GcHeader.obj_type` at `addr - 8` as a fast pre-filter BEFORE the + // registry lookup — that dereferenced arbitrary above-band candidate + // pointers (e.g. garbage read off a mis-typed receiver) and segfaults on + // Linux where freed/foreign pages get unmapped (mimalloc on macOS retains + // them, hiding the bug). The pre-filter's perf rationale (a ~5.7%-sample + // SipHash `HashSet::contains`) predates MAP_REGISTRY moving to the + // Fibonacci-hash `PtrHashSet`, which is what set.rs ships with today. + if !MAP_REGISTRY.with(|r| r.borrow().contains(&addr)) { + return false; + } + // A registered address is a live arena Map; the header read is safe and + // guards against a stale entry whose memory was reused by another type. + match unsafe { crate::value::addr_class::try_read_gc_header(addr) } { + Some(header) => header.obj_type == crate::gc::GC_TYPE_MAP, + None => false, } - MAP_REGISTRY.with(|r| r.borrow().contains(&addr)) } /// Resolve a NaN-boxed (or raw-i64) `this` receiver to a registered `Map` diff --git a/crates/perry-runtime/src/object/class_registry.rs b/crates/perry-runtime/src/object/class_registry.rs index d28a170764..755fc5255e 100644 --- a/crates/perry-runtime/src/object/class_registry.rs +++ b/crates/perry-runtime/src/object/class_registry.rs @@ -4155,15 +4155,14 @@ pub extern "C" fn js_object_mark_class(obj: i64) { /// so raw Map/Set/Buffer pointers (no GcHeader) are never misread. Used by /// `typeof`, `new`, and `instanceof` to recognize a class value. pub fn is_class_object_ptr(ptr: *const u8) -> bool { - // Reject anything in the native-module handle range (< 0x100000). Those - // are registry ids (net.Socket, zlib stream, crypto, fastify, ioredis, - // timers, …) bit-OR'd with POINTER_TAG, not real heap pointers — real - // objects always live well above 0x100000. The previous 0x1008 floor only - // caught the tiny net/fastify id space; a mid-range handle (e.g. zlib's - // zlib stream base, #1843) sailed past it and this function then - // segfaulted dereferencing `[handle - 8]` as a GcHeader. 0x100000 is the - // same handle/real-pointer threshold `js_native_call_method` already uses. - if ptr.is_null() || (ptr as usize) < 0x100000 { + // Reject anything in the native-module handle band (see + // `value::addr_class`). Those are registry ids (net.Socket, zlib stream, + // crypto, fastify, ioredis, timers, …) bit-OR'd with POINTER_TAG, not real + // heap pointers — real objects always live above the band. The previous + // 0x1008 floor only caught the tiny net/fastify id space; a mid-range + // handle (e.g. zlib's stream base, #1843) sailed past it and this function + // then segfaulted dereferencing `[handle - 8]` as a GcHeader. + if crate::value::addr_class::is_handle_band(ptr as usize) { return false; } unsafe { @@ -5104,7 +5103,7 @@ pub fn lookup_class_method_in_chain(class_id: u32, name: &str) -> Option<(usize, /// method value (for identity), not the raw stored field — i.e. the own-property /// shadow rule applies to genuine instances, not to the prototype itself. pub fn is_registered_class_prototype_object(ptr: usize) -> bool { - if ptr < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr) { return false; } if let Ok(guard) = CLASS_PROTOTYPE_OBJECTS.read() { diff --git a/crates/perry-runtime/src/object/delete_rest.rs b/crates/perry-runtime/src/object/delete_rest.rs index 3cd3309f63..3d37f9ae32 100644 --- a/crates/perry-runtime/src/object/delete_rest.rs +++ b/crates/perry-runtime/src/object/delete_rest.rs @@ -34,13 +34,13 @@ pub extern "C" fn js_object_delete_field( if obj.is_null() || key.is_null() { return 1; } - // A Proxy is a small registered id in [0xF0000, 0x100000), not a heap + // A Proxy is a small registered id in the proxy id band, not a heap // ObjectHeader. Dereferencing it below (GC header / keys_array reads) would // segfault. Route `delete proxy.k` / `delete proxy[k]` through the proxy // `deleteProperty` trap. (#2846-family Proxy crash cluster.) { let addr = obj as u64; - if (0xF0000..0x100000).contains(&addr) { + if crate::value::addr_class::is_proxy_id_band(addr as usize) { const POINTER_TAG: u64 = 0x7FFD_0000_0000_0000; let boxed = f64::from_bits(POINTER_TAG | (addr & 0x0000_FFFF_FFFF_FFFF)); if crate::proxy::js_proxy_is_proxy(boxed) != 0 { @@ -277,7 +277,7 @@ pub extern "C" fn js_object_delete_dynamic(obj: *mut ObjectHeader, key: f64) -> // `js_object_delete_field`, which has its own guard). { let addr = obj as u64; - if (0xF0000..0x100000).contains(&addr) { + if crate::value::addr_class::is_proxy_id_band(addr as usize) { const POINTER_TAG: u64 = 0x7FFD_0000_0000_0000; let boxed = f64::from_bits(POINTER_TAG | (addr & 0x0000_FFFF_FFFF_FFFF)); if crate::proxy::js_proxy_is_proxy(boxed) != 0 { diff --git a/crates/perry-runtime/src/object/descriptors.rs b/crates/perry-runtime/src/object/descriptors.rs index 0ce413d2a0..45e495602f 100644 --- a/crates/perry-runtime/src/object/descriptors.rs +++ b/crates/perry-runtime/src/object/descriptors.rs @@ -897,7 +897,7 @@ pub extern "C" fn js_object_get_own_property_names(obj_value: f64) -> f64 { } if obj_jv.is_pointer() { let raw = crate::value::js_nanbox_get_pointer(obj_value) as usize; - if raw > 0 && raw < 0x100000 { + if crate::value::addr_class::is_small_handle(raw) { if let Some(dispatch) = super::class_registry::handle_own_property_names_dispatch() { let names = dispatch(raw as i64); diff --git a/crates/perry-runtime/src/object/exotic_expando.rs b/crates/perry-runtime/src/object/exotic_expando.rs index 6dbe2fd8a1..a3feca577b 100644 --- a/crates/perry-runtime/src/object/exotic_expando.rs +++ b/crates/perry-runtime/src/object/exotic_expando.rs @@ -35,11 +35,11 @@ pub(crate) enum ExoticKind { /// `None` for everything else (including the small-handle band). One /// `GcHeader` read; the RegExp set probe only runs for `GC_TYPE_OBJECT`. pub(crate) fn exotic_expando_kind(addr: usize) -> Option { - if addr < 0x100000 || !super::is_valid_obj_ptr(addr as *const u8) { - return None; - } - let gc = (addr - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader; - match unsafe { (*gc).obj_type } { + let gc = match unsafe { crate::value::addr_class::try_read_gc_header(addr) } { + Some(header) => header, + None => return None, + }; + match gc.obj_type { crate::gc::GC_TYPE_DATE_CELL => Some(ExoticKind::Date), crate::gc::GC_TYPE_ERROR => Some(ExoticKind::Error), crate::gc::GC_TYPE_OBJECT if crate::regex::is_regex_pointer(addr as *const u8) => { diff --git a/crates/perry-runtime/src/object/field_get_set.rs b/crates/perry-runtime/src/object/field_get_set.rs index 9f998d61a0..7406c4420f 100644 --- a/crates/perry-runtime/src/object/field_get_set.rs +++ b/crates/perry-runtime/src/object/field_get_set.rs @@ -892,7 +892,7 @@ pub extern "C" fn js_object_set_field(obj: *mut ObjectHeader, field_index: u32, /// method body and crashing on the bogus `this` pointer. #[no_mangle] pub extern "C" fn js_object_get_class_id(obj: *const ObjectHeader) -> u32 { - if obj.is_null() || (obj as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(obj as usize) { return 0; } let addr = obj as usize; @@ -1159,7 +1159,7 @@ pub extern "C" fn js_object_keys_value(value: f64) -> *mut ArrayHeader { } if jv.is_pointer() { let ptr = jv.as_pointer::() as usize; - if ptr > 0 && ptr < 0x100000 { + if crate::value::addr_class::is_small_handle(ptr) { if let Some(dispatch) = super::class_registry::handle_own_property_names_dispatch() { let names = unsafe { dispatch(ptr as i64) }; if names.to_bits() != crate::value::TAG_UNDEFINED { @@ -2165,7 +2165,7 @@ pub extern "C" fn js_object_has_property(obj: f64, key: f64) -> f64 { let f = f64::from_bits(obj.to_bits()); if key_val.is_any_string() && f.is_finite() && f > 0.0 && f.fract() == 0.0 { let id = f as usize; - if (0x100000..0x200000).contains(&id) { + if crate::value::addr_class::is_stream_id_band(id) { if let Some(probe) = crate::object::stream_handle_probe() { unsafe { if probe(id) { @@ -2278,7 +2278,7 @@ pub extern "C" fn js_object_has_property(obj: f64, key: f64) -> f64 { // `keys_array`. Mirror the property-get IC miss path: ask the registered // handle property dispatcher whether the property resolves to a real // value. - if obj_addr > 0 && obj_addr < 0x100000 { + if crate::value::addr_class::is_small_handle(obj_addr as usize) { // #1781: accept inline SSO short keys (`"id" in handle`) — is_string() // is STRING_TAG-only, so a <=5-char key skipped the handle dispatcher // and `in` wrongly returned false. Materialize SSO bytes to a heap @@ -2627,10 +2627,10 @@ pub extern "C" fn js_object_get_field_by_name( // Node. `js_proxy_is_proxy` validates the value is a *registered* proxy so a // real heap object whose address happens to be small isn't misrouted. { - // Proxy ids live in [0xF0000, 0x100000); `js_proxy_is_proxy` confirms + // Proxy ids live in the proxy id band; `js_proxy_is_proxy` confirms // it is a *registered* proxy before we route to the proxy getter. let addr = obj as u64; - if (0xF0000..0x100000).contains(&addr) && !key.is_null() { + if crate::value::addr_class::is_proxy_id_band(addr as usize) && !key.is_null() { const POINTER_TAG: u64 = 0x7FFD_0000_0000_0000; let boxed = f64::from_bits(POINTER_TAG | (addr & 0x0000_FFFF_FFFF_FFFF)); if crate::proxy::js_proxy_is_proxy(boxed) != 0 { @@ -2841,7 +2841,7 @@ pub extern "C" fn js_object_get_field_by_name( } else { 0 }; - if raw > 0 && raw < 0x100000 { + if crate::value::addr_class::is_small_handle(raw) { if !key.is_null() { unsafe { let key_ptr = @@ -3194,9 +3194,9 @@ pub extern "C" fn js_object_get_field_by_name( } else { 0 }; - // Native-module registry handles live below 0x100000 and can also be - // POINTER_TAG-boxed; do not walk back to a GcHeader for those. - if raw >= 0x100000 && !key.is_null() && is_valid_obj_ptr(raw as *const u8) { + // Native-module registry handles live in the handle band and can also + // be POINTER_TAG-boxed; do not walk back to a GcHeader for those. + if crate::value::addr_class::is_plausible_heap_addr(raw) && !key.is_null() { { unsafe { let gc_header = (raw - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader; @@ -3264,7 +3264,7 @@ pub extern "C" fn js_object_get_field_by_name( let f = f64::from_bits(obj as u64); if !key.is_null() && f.is_finite() && f > 0.0 && f.fract() == 0.0 { let id = f as usize; - if (0x100000..0x200000).contains(&id) { + if crate::value::addr_class::is_stream_id_band(id) { if let Some(probe) = crate::object::stream_handle_probe() { unsafe { if probe(id) { @@ -3340,7 +3340,7 @@ pub extern "C" fn js_object_get_field_by_name( // status/data, fastify req query/params/...). Without // this, every property access on those handles silently // returned undefined. - if (raw as usize) > 0 && (raw as usize) < 0x100000 { + if crate::value::addr_class::is_small_handle(raw as usize) { if !key.is_null() { unsafe { let key_ptr = @@ -3407,7 +3407,7 @@ pub extern "C" fn js_object_get_field_by_name( } // Same handle-receiver path for already-stripped pointers — happens // when the codegen passes a raw i64 handle through the slow path. - if (obj as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(obj as usize) { if !key.is_null() { unsafe { let key_ptr = (key as *const u8).add(std::mem::size_of::()); @@ -5258,7 +5258,7 @@ pub extern "C" fn js_object_get_field_ic_miss( // a real heap object whose address happens to be small isn't misrouted. { let addr = obj as u64; - if (0xF0000..0x100000).contains(&addr) { + if crate::value::addr_class::is_proxy_id_band(addr as usize) { const POINTER_TAG: u64 = 0x7FFD_0000_0000_0000; let boxed = f64::from_bits(POINTER_TAG | (addr & 0x0000_FFFF_FFFF_FFFF)); if crate::proxy::js_proxy_is_proxy(boxed) != 0 { @@ -5277,7 +5277,7 @@ pub extern "C" fn js_object_get_field_ic_miss( // `< 0x100000` proxy / HANDLE_PROPERTY_DISPATCH routing below — matching // the ordering in `js_object_get_field_by_name`. The macOS heap floor // (0x200_0000_0000 in is_valid_obj_ptr) masked this; Linux's is 0x1000. - if (obj as usize) >= 0x100000 { + if crate::value::addr_class::is_above_handle_band(obj as usize) { unsafe { if let Some(val) = closure_dynamic_prop_by_key(obj as usize, key) { return val; @@ -5303,7 +5303,7 @@ pub extern "C" fn js_object_get_field_ic_miss( // so `r.status` / `r.data` and similar handle-property accesses // dispatch to the per-module accessor instead of silently // returning undefined. - if (obj as usize) > 0 && (obj as usize) < 0x100000 { + if crate::value::addr_class::is_small_handle(obj as usize) { // #2846: a revocable Proxy is encoded as a small fake pointer in the // proxy-id range (also `< 0x100000`). A generic `proxy.key` read funnels // here via the IC-miss path; route it to the proxy get dispatch (which diff --git a/crates/perry-runtime/src/object/field_set_by_name.rs b/crates/perry-runtime/src/object/field_set_by_name.rs index 087cccec56..f23ce97437 100644 --- a/crates/perry-runtime/src/object/field_set_by_name.rs +++ b/crates/perry-runtime/src/object/field_set_by_name.rs @@ -1188,7 +1188,10 @@ pub extern "C" fn js_object_set_field_by_name_nonenum( // TypedArrays, Temporal cells, etc. are handled by `set_field_by_name`'s own // routing and never reach the ordinary enumerable default, so skip them. let bits = obj as u64; - if (bits >> 48) == 0x7FFE || obj.is_null() || (obj as usize) < 0x100000 || key.is_null() { + if (bits >> 48) == 0x7FFE + || crate::value::addr_class::is_handle_band(obj as usize) + || key.is_null() + { return; } unsafe { diff --git a/crates/perry-runtime/src/object/global_this.rs b/crates/perry-runtime/src/object/global_this.rs index 70fc110fd4..0065cee4ee 100644 --- a/crates/perry-runtime/src/object/global_this.rs +++ b/crates/perry-runtime/src/object/global_this.rs @@ -318,7 +318,7 @@ unsafe fn subclass_this_object_ptr(this_box: f64) -> Option<*mut ObjectHeader> { return None; } let raw = (bits & 0x0000_FFFF_FFFF_FFFF) as usize; - if raw < 0x100000 || !crate::object::is_valid_obj_ptr(raw as *const u8) { + if !crate::value::addr_class::is_plausible_heap_addr(raw) { return None; } Some(raw as *mut ObjectHeader) diff --git a/crates/perry-runtime/src/object/instanceof.rs b/crates/perry-runtime/src/object/instanceof.rs index 2474b2498a..a68632f773 100644 --- a/crates/perry-runtime/src/object/instanceof.rs +++ b/crates/perry-runtime/src/object/instanceof.rs @@ -33,17 +33,22 @@ pub(crate) fn value_is_callable(value: f64) -> bool { } fn small_native_handle_id(value: f64) -> Option { + use crate::value::addr_class; let bits = value.to_bits(); if (bits & crate::value::TAG_MASK) == crate::value::POINTER_TAG { let raw = (bits & crate::value::POINTER_MASK) as i64; - if raw > 0 && raw < 0x100000 { + if addr_class::is_small_handle(raw as usize) { return Some(raw); } } - if bits > 0 && bits < 0x100000 { + if addr_class::is_small_handle(bits as usize) { return Some(bits as i64); } - if value.is_finite() && value > 0.0 && value.fract() == 0.0 && value < 0x100000 as f64 { + if value.is_finite() + && value > 0.0 + && value.fract() == 0.0 + && value < addr_class::HANDLE_BAND_MAX as f64 + { return Some(value as i64); } None @@ -439,7 +444,9 @@ fn rhs_is_object_value(value: f64) -> bool { // Symbols are primitives; small registry handles aren't real objects // here either, but they're still object-typed in JS (`typeof` is // "object"), so a "not callable" message is the right one for them. - if ptr >= 0x100000 && crate::symbol::is_registered_symbol(ptr) { + if crate::value::addr_class::is_above_handle_band(ptr) + && crate::symbol::is_registered_symbol(ptr) + { return false; } return true; @@ -529,7 +536,7 @@ pub extern "C" fn js_instanceof(value: f64, class_id: u32) -> f64 { let jv = crate::JSValue::from_bits(value.to_bits()); if jv.is_pointer() { let obj = jv.as_pointer::(); - if !obj.is_null() && (obj as usize) >= 0x100000 { + if crate::value::addr_class::is_above_handle_band(obj as usize) { let gc_header = unsafe { (obj as *const u8).sub(crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader }; @@ -840,7 +847,9 @@ pub extern "C" fn js_instanceof(value: f64, class_id: u32) -> f64 { // `blob instanceof Blob` is true for that representation too. if class_id == CLASS_ID_BLOB && jsval.is_pointer() { let obj = jsval.as_pointer::(); - if (obj as usize) >= 0x100000 && unsafe { (*obj).class_id } == CLASS_ID_BLOB { + if crate::value::addr_class::is_above_handle_band(obj as usize) + && unsafe { (*obj).class_id } == CLASS_ID_BLOB + { return true_val; } } @@ -998,7 +1007,7 @@ pub extern "C" fn js_instanceof(value: f64, class_id: u32) -> f64 { // are NOT real ObjectHeader pointers — reading the GC header at // `obj_ptr - 8` would SIGSEGV on unmapped memory. They aren't instances // of any user-defined class either, so return false unconditionally. - if (obj_ptr as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(obj_ptr as usize) { return false_val; } diff --git a/crates/perry-runtime/src/object/mod.rs b/crates/perry-runtime/src/object/mod.rs index c5b71cc615..2715eec81b 100644 --- a/crates/perry-runtime/src/object/mod.rs +++ b/crates/perry-runtime/src/object/mod.rs @@ -2154,54 +2154,11 @@ pub(crate) fn extends_builtin_error(class_id: u32) -> bool { false } -/// Check if a pointer is a valid heap object (safe to dereference GcHeader). -/// Values below 0x100000 (1MB) are likely INT32_TAG extracts, small handles, -/// or null. The upper bound filters out NaN-box tag bits that leaked through. -/// -/// Issue #73 follow-up: raised the lower bound from 1 MB to 2 TB to reject -/// corrupted NaN-boxes whose 48-bit handle lands in the 1-2 TB window -/// (e.g. `0x00FF_0000_0000` from an `ArrayHeader { length: 0, capacity: -/// 255 }` read as u64). Real macOS mimalloc + arena allocations all -/// land in the 3-5 TB range; anything below 2 TB is certainly bogus on -/// that platform. Linux glibc and Windows mimalloc allocate well below -/// 2 TB though (often in the GB-to-tens-of-GB range), so the macOS floor -/// silently rejects every legitimate object pointer there — issues -/// #385/#386/#387 traced back to this exact filter on Windows. -/// -/// #1136 / #1129: iOS-family *device* targets (aarch64-apple-ios, -/// -tvos, -watchos, -visionos) ship without mimalloc and use -/// libsystem_malloc, whose user allocations land in the same low range -/// as Android/Linux/Windows. Treat them like those platforms — the -/// downstream `GcHeader.obj_type` check is the real liveness guard. -/// The simulator (e.g. ios + target_abi = "sim") runs on the macOS -/// host's mimalloc so its allocations still land above 2 TB; lowering -/// the floor here is safe because the obj_type validation does the -/// work. -#[inline(always)] -pub(crate) fn is_valid_obj_ptr(ptr: *const u8) -> bool { - let addr = ptr as u64; - #[cfg(any( - target_os = "android", - target_os = "linux", - target_os = "windows", - target_os = "ios", - target_os = "tvos", - target_os = "watchos", - target_os = "visionos", - ))] - const HEAP_MIN: u64 = 0x1000; - #[cfg(not(any( - target_os = "android", - target_os = "linux", - target_os = "windows", - target_os = "ios", - target_os = "tvos", - target_os = "watchos", - target_os = "visionos", - )))] - const HEAP_MIN: u64 = 0x200_0000_0000; - (HEAP_MIN..0x8000_0000_0000).contains(&addr) -} +// `is_valid_obj_ptr` moved to `value/addr_class.rs` (the centralized +// handle-vs-heap-pointer classification module); re-exported here so the +// existing `crate::object::is_valid_obj_ptr` call sites keep compiling +// unchanged. +pub(crate) use crate::value::addr_class::is_valid_obj_ptr; /// Object header - precedes the fields in memory #[repr(C)] diff --git a/crates/perry-runtime/src/object/native_call_method.rs b/crates/perry-runtime/src/object/native_call_method.rs index 77a5fa7cad..0d0e04980c 100644 --- a/crates/perry-runtime/src/object/native_call_method.rs +++ b/crates/perry-runtime/src/object/native_call_method.rs @@ -1265,7 +1265,7 @@ pub(crate) unsafe fn try_dispatch_instance_method_value( return None; } let raw = crate::value::js_nanbox_get_pointer(receiver) as usize; - if raw < 0x100000 { + if crate::value::addr_class::is_handle_band(raw) { return None; } let ptr = raw as *const ObjectHeader; @@ -1416,7 +1416,7 @@ pub unsafe extern "C" fn js_native_call_method( // Fetch, stream, and other runtime objects use small tagged handles that // are pointer-shaped but not heap allocations. Avoid asking the closure // probe to dereference those handles as addresses. - if raw_addr >= 0x100000 + if crate::value::addr_class::is_above_handle_band(raw_addr) && crate::closure::is_closure_ptr(raw_addr) && !crate::closure::closure_is_key_deleted(raw_addr, method_name) // apply/call/bind/toString on a closure receiver have dedicated @@ -1460,7 +1460,7 @@ pub unsafe extern "C" fn js_native_call_method( // non-accessor programs skip it entirely. if jsval.is_pointer() && crate::object::ACCESSORS_IN_USE.with(|c| c.get()) { let obj_usize = crate::value::js_nanbox_get_pointer(object) as usize; - if obj_usize >= 0x100000 { + if crate::value::addr_class::is_above_handle_band(obj_usize) { if let Some(acc) = crate::object::get_accessor_descriptor(obj_usize, method_name) { if acc.get != 0 { let getter = (acc.get & crate::value::POINTER_MASK) @@ -1578,7 +1578,9 @@ pub unsafe extern "C" fn js_native_call_method( // codegen-registered text (or a synthesized native form), rather than // falling through to the generic `"[object Object]"`. let raw_addr = crate::value::js_nanbox_get_pointer(object) as usize; - if raw_addr >= 0x100000 && crate::closure::is_closure_ptr(raw_addr) { + if crate::value::addr_class::is_above_handle_band(raw_addr) + && crate::closure::is_closure_ptr(raw_addr) + { if let Some(result) = crate::value::function_to_string_method_result(object) { return result; } @@ -1984,10 +1986,10 @@ pub unsafe extern "C" fn js_native_call_method( } // Check for raw handle integer: Perry may bit-cast an i64 handle directly to f64, - // producing a subnormal float (bits == handle_id, no NaN-box tag). Values 0 < bits < 0x100000 - // with no tag are raw handle IDs from Perry's integer-typed handle parameters. + // producing a subnormal float (bits == handle_id, no NaN-box tag). Untagged values + // in the handle band are raw handle IDs from Perry's integer-typed handle parameters. let raw_bits = object.to_bits(); - if raw_bits > 0 && raw_bits < 0x100000 { + if crate::value::addr_class::is_small_handle(raw_bits as usize) { if let Some(dispatch) = handle_method_dispatch() { let args = refreshed_args(); return dispatch( @@ -2676,7 +2678,7 @@ pub unsafe extern "C" fn js_native_call_method( // objects in a registry and use integer IDs to reference them. if jsval.is_pointer() { let raw_ptr = jsval.as_pointer::() as usize; - if raw_ptr > 0 && raw_ptr < 0x100000 { + if crate::value::addr_class::is_small_handle(raw_ptr) { // This is a handle, not a real memory pointer - dispatch to stdlib if let Some(dispatch) = handle_method_dispatch() { return dispatch( @@ -3631,7 +3633,10 @@ pub unsafe extern "C" fn js_native_call_method( { let check_ptr = if jsval.is_pointer() { (raw_bits & 0x0000_FFFF_FFFF_FFFF) as usize - } else if !object.is_nan() && raw_bits >= 0x100000 && (raw_bits >> 48) == 0 { + } else if !object.is_nan() + && crate::value::addr_class::is_above_handle_band(raw_bits as usize) + && (raw_bits >> 48) == 0 + { raw_bits as usize } else { 0 @@ -3790,8 +3795,12 @@ pub unsafe extern "C" fn js_native_call_method( // Handle raw pointer values without NaN-box tags. // Perry sometimes bitcasts I64 pointers to F64 without NaN-boxing (POINTER_TAG). // These appear as subnormal floats with bits in the valid heap address range - // (0x100000 .. 0x0000_FFFF_FFFF_FFFF, upper 16 bits = 0). - if !jsval.is_pointer() && !object.is_nan() && raw_bits >= 0x100000 && (raw_bits >> 48) == 0 { + // (above the handle band, below 0x0000_FFFF_FFFF_FFFF, upper 16 bits = 0). + if !jsval.is_pointer() + && !object.is_nan() + && crate::value::addr_class::is_above_handle_band(raw_bits as usize) + && (raw_bits >> 48) == 0 + { // Looks like a raw heap pointer — re-wrap as POINTER_TAG and retry let reboxed = f64::from_bits(0x7FFD_0000_0000_0000u64 | raw_bits); let reboxed_jsval = JSValue::from_bits(reboxed.to_bits()); diff --git a/crates/perry-runtime/src/object/native_module.rs b/crates/perry-runtime/src/object/native_module.rs index a50f531322..72175ab5eb 100644 --- a/crates/perry-runtime/src/object/native_module.rs +++ b/crates/perry-runtime/src/object/native_module.rs @@ -5792,7 +5792,7 @@ pub extern "C" fn js_class_method_bind( let bits = instance.to_bits(); if (bits >> 48) == 0x7FFD { let id = (bits & 0x0000_FFFF_FFFF_FFFF) as i64; - if id > 0 && id < 0x100000 { + if crate::value::addr_class::is_small_handle(id as usize) { if let Some(dispatch) = handle_property_dispatch() { let value = HANDLE_PROPERTY_BIND_REENTRY.with(|guard| { if guard.get() { @@ -5857,7 +5857,7 @@ pub extern "C" fn js_class_method_bind( ) { let obj = recv_jsv.as_pointer::(); - if !obj.is_null() && (obj as usize) >= 0x100000 { + if crate::value::addr_class::is_above_handle_band(obj as usize) { let key = crate::string::js_string_from_bytes( method_name_ptr, method_name_len as u32, @@ -5947,7 +5947,7 @@ fn class_id_from_method_receiver(instance: f64) -> Option { let jsv = JSValue::from_bits(instance.to_bits()); if jsv.is_pointer() { let obj = jsv.as_pointer::(); - if !obj.is_null() && (obj as usize) >= 0x100000 { + if crate::value::addr_class::is_above_handle_band(obj as usize) { // A callable (closure / function object) is never a class-method // receiver for bound-method marker substitution. Its allocation is a // `ClosureHeader`, so reading `class_id` off it as an `ObjectHeader` @@ -6080,7 +6080,7 @@ pub(crate) unsafe fn get_module_name_from_namespace(namespace_obj: f64) -> &'sta return ""; } let obj = jsval.as_pointer::(); - if obj.is_null() || (obj as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(obj as usize) { return ""; } let module_field = js_object_get_field(obj as *mut _, 0); diff --git a/crates/perry-runtime/src/object/object_ops.rs b/crates/perry-runtime/src/object/object_ops.rs index 609d3a0b77..ee43b3d284 100644 --- a/crates/perry-runtime/src/object/object_ops.rs +++ b/crates/perry-runtime/src/object/object_ops.rs @@ -2103,24 +2103,16 @@ pub extern "C" fn js_object_get_own_field_or_undef( } unsafe { let obj = extract_obj_ptr(obj_value); - // Reject anything in the native / Web-Fetch small-handle range - // (< 0x100000). Headers/Request/Response/Blob and node:http handles - // are NaN-boxed POINTER_TAG values holding a small registry id in - // [0x40000, 0x100000) (perry-stdlib `FETCH_HANDLE_ID_{START,END}`), - // not heap object pointers. The old `< 0x10000` floor let a Headers - // handle (first id = 0x40000) through; this fn then dereferenced - // `[handle - GC_HEADER_SIZE]` as a GcHeader and segfaulted. macOS's - // `is_valid_obj_ptr` floor (0x200_0000_0000) masked this, but on - // Linux/Android/iOS the floor is 0x1000, so the bad deref reached. - // 0x100000 matches the cutoff in `js_object_get_field_by_name` - // (field_get_set.rs) and `class_registry` handle guards. - if obj.is_null() || (obj as usize) < 0x100000 { - return f64::from_bits(TAG_UNDEF); - } - if (obj as usize) < crate::gc::GC_HEADER_SIZE + 0x1000 { - return f64::from_bits(TAG_UNDEF); - } - if !is_valid_obj_ptr(obj as *const u8) { + // Reject anything in the native / Web-Fetch small-handle band (see + // `value::addr_class`). Headers/Request/Response/Blob and node:http + // handles are NaN-boxed POINTER_TAG values holding a small registry + // id, not heap object pointers. The old `< 0x10000` floor let a + // Headers handle (first id = 0x40000) through; this fn then + // dereferenced `[handle - GC_HEADER_SIZE]` as a GcHeader and + // segfaulted. macOS's `is_valid_obj_ptr` floor (0x200_0000_0000) + // masked this, but on Linux/Android/iOS the floor is 0x1000, so the + // bad deref reached. + if !crate::value::addr_class::is_plausible_heap_addr(obj as usize) { return f64::from_bits(TAG_UNDEF); } let gc_header = @@ -2349,7 +2341,7 @@ pub extern "C" fn js_object_get_prototype_of(obj_value: f64) -> f64 { let top16 = bits >> 48; if top16 == 0x7FFD { let raw_addr = bits & 0x0000_FFFF_FFFF_FFFF; - if raw_addr > 0 && raw_addr < 0x100000 { + if crate::value::addr_class::is_small_handle(raw_addr as usize) { if let Some(dispatch) = super::class_registry::handle_prototype_dispatch() { let proto = unsafe { dispatch(raw_addr as i64) }; if proto.to_bits() != crate::value::TAG_UNDEFINED { diff --git a/crates/perry-runtime/src/promise/combinators.rs b/crates/perry-runtime/src/promise/combinators.rs index 03bbea11b2..2f5f514471 100644 --- a/crates/perry-runtime/src/promise/combinators.rs +++ b/crates/perry-runtime/src/promise/combinators.rs @@ -114,7 +114,7 @@ fn promise_try_type_error_value(callback: f64) -> f64 { fn promise_try_closure_ptr(callback: f64) -> Option<*const crate::closure::ClosureHeader> { let ptr = crate::value::js_nanbox_get_pointer(callback) as usize; - if ptr < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr) { return None; } crate::closure::is_closure_ptr(ptr).then_some(ptr as *const crate::closure::ClosureHeader) @@ -201,7 +201,7 @@ pub extern "C" fn js_value_is_promise(value: f64) -> i32 { // Pointer-tagged native handles (Fetch/Headers/Timers/etc.) also carry // small payloads. They are not GC allocations and must not be probed as // Promise headers before the handle dispatch tables see them. - if ptr_usize < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr_usize) { return 0; } unsafe { @@ -323,7 +323,7 @@ pub(crate) fn combinator_iterable_to_array( } let raw = (value.to_bits() & 0x0000_FFFF_FFFF_FFFF) as usize; - if raw < 0x100000 { + if crate::value::addr_class::is_handle_band(raw) { return Err(not_iterable_error_value(value)); } @@ -671,7 +671,7 @@ fn is_native_array_value(value: f64) -> bool { return false; } let ptr = crate::value::js_nanbox_get_pointer(value) as usize; - if ptr < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr) { return false; } unsafe { @@ -1167,7 +1167,7 @@ pub extern "C" fn js_assimilate_thenable(value: f64) -> f64 { } let raw_ptr = (bits & 0x0000_FFFF_FFFF_FFFF) as usize; - if raw_ptr < 0x100000 { + if crate::value::addr_class::is_handle_band(raw_ptr) { return value; } diff --git a/crates/perry-runtime/src/promise/spec_combinators.rs b/crates/perry-runtime/src/promise/spec_combinators.rs index 603ac55129..4e2262ad26 100644 --- a/crates/perry-runtime/src/promise/spec_combinators.rs +++ b/crates/perry-runtime/src/promise/spec_combinators.rs @@ -118,7 +118,7 @@ fn is_callable(value: f64) -> bool { } else { return false; }; - if raw < 0x100000 || (raw as u64) >= HEAP_ADDR_CEILING { + if crate::value::addr_class::is_handle_band(raw) || (raw as u64) >= HEAP_ADDR_CEILING { return false; } crate::closure::is_closure_ptr(raw) || crate::proxy::js_proxy_is_proxy(value) == 1 @@ -647,7 +647,7 @@ fn is_object_value(value: f64) -> bool { return false; } let raw = (bits & crate::value::POINTER_MASK) as usize; - if raw < 0x100000 { + if crate::value::addr_class::is_handle_band(raw) { return false; } !crate::symbol::is_registered_symbol(raw) diff --git a/crates/perry-runtime/src/proxy.rs b/crates/perry-runtime/src/proxy.rs index 49ebb4d934..bdd2b85d1a 100644 --- a/crates/perry-runtime/src/proxy.rs +++ b/crates/perry-runtime/src/proxy.rs @@ -91,11 +91,12 @@ const POINTER_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; /// Tag bits high enough to live inside a 48-bit pointer slot but low enough /// that real heap pointers never collide. Keep proxies near the top of the -/// runtime's `< 0x100000` small-handle band so Web Fetch handles can occupy a -/// broad disjoint range below this without sharing visible `POINTER_TAG | id` -/// bits with a proxy. Any operation on a proxy MUST go through the Proxy* -/// dispatch helpers in this module. -const PROXY_TAG_BASE: u64 = 0x000F_0000; +/// runtime's small-handle band so Web Fetch handles can occupy a broad +/// disjoint range below this without sharing visible `POINTER_TAG | id` bits +/// with a proxy. Any operation on a proxy MUST go through the Proxy* dispatch +/// helpers in this module. The band boundary is owned by +/// `value::addr_class` (`PROXY_ID_BAND_START`). +const PROXY_TAG_BASE: u64 = crate::value::addr_class::PROXY_ID_BAND_START as u64; fn encode_proxy_id(id: u64) -> i64 { (PROXY_TAG_BASE + id) as i64 @@ -617,7 +618,7 @@ fn small_handle_from_value(value: f64) -> Option { if raw > 0 && (raw as u64) < PROXY_TAG_BASE { return Some(raw); } - } else if top == 0 && bits > 0 && bits < 0x100000 { + } else if top == 0 && crate::value::addr_class::is_small_handle(bits as usize) { return Some(bits as i64); } None diff --git a/crates/perry-runtime/src/set.rs b/crates/perry-runtime/src/set.rs index aa444e234e..c56030de0c 100644 --- a/crates/perry-runtime/src/set.rs +++ b/crates/perry-runtime/src/set.rs @@ -109,11 +109,11 @@ fn register_set(ptr: *mut SetHeader) { } pub fn is_registered_set(addr: usize) -> bool { - // #4004: reject the `< 0x100000` small-handle band (Web Fetch / node:http / - // timer ids are NaN-boxed POINTER_TAG values, not heap addresses) before + // #4004: reject the small-handle band (Web Fetch / node:http / timer ids + // are NaN-boxed POINTER_TAG values, not heap addresses) before // dereferencing the GC header. Managed Sets are arena-allocated above the - // cutoff. See map::is_registered_map / date::is_date_cell_addr. - if addr < 0x100000 { + // cutoff. See `value::addr_class` for the band map. + if crate::value::addr_class::is_handle_band(addr) { return false; } // Registry FIRST: it is authoritative and dereference-free. Probing the @@ -126,9 +126,9 @@ pub fn is_registered_set(addr: usize) -> bool { } // A registered address is a live arena Set; the header read is safe and // guards against a stale entry whose memory was reused by another type. - unsafe { - let header = (addr - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader; - (*header).obj_type == crate::gc::GC_TYPE_SET + match unsafe { crate::value::addr_class::try_read_gc_header(addr) } { + Some(header) => header.obj_type == crate::gc::GC_TYPE_SET, + None => false, } } diff --git a/crates/perry-runtime/src/symbol.rs b/crates/perry-runtime/src/symbol.rs index 5807a9828b..461f6d05c3 100644 --- a/crates/perry-runtime/src/symbol.rs +++ b/crates/perry-runtime/src/symbol.rs @@ -266,10 +266,10 @@ pub unsafe extern "C" fn js_is_symbol(value: f64) -> i32 { // Registry handles (proxies, fetch/stream handles, …) are POINTER_TAG'd // small ids, NOT heap allocations — dereferencing one for the magic // probe segfaults on Linux (unmapped page; mimalloc on macOS happens to - // retain, hiding it). Real heap symbols live above the 0x100000 floor + // retain, hiding it). Real heap symbols live above the handle band // (same rationale as the typeof / iterator guards, #1843/#4800), and // registered symbols already returned above. - if ptr.is_null() || (ptr as usize) < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr as usize) { return 0; } if (*ptr).magic == SYMBOL_MAGIC { @@ -1600,7 +1600,9 @@ pub unsafe extern "C" fn js_object_get_symbol_property(obj_f64: f64, sym_f64: f6 // otherwise misses the side table and returns undefined. if (bits >> 48) == 0x7FFD { let id = (bits & 0x0000_FFFF_FFFF_FFFF) as i64; - if id > 0 && id < 0x100000 && crate::timer::is_known_timer_id(id) { + if crate::value::addr_class::is_small_handle(id as usize) + && crate::timer::is_known_timer_id(id) + { let dispose = well_known_symbol("dispose"); if !dispose.is_null() { let dispose_f64 = @@ -1621,7 +1623,7 @@ pub unsafe extern "C" fn js_object_get_symbol_property(obj_f64: f64, sym_f64: f6 // without adding a runtime-specific special case. if (bits >> 48) == 0x7FFD { let id = (bits & 0x0000_FFFF_FFFF_FFFF) as i64; - if id > 0 && id < 0x100000 { + if crate::value::addr_class::is_small_handle(id as usize) { let dispose = well_known_symbol("dispose"); if !dispose.is_null() { let dispose_f64 = @@ -1643,7 +1645,7 @@ pub unsafe extern "C" fn js_object_get_symbol_property(obj_f64: f64, sym_f64: f6 // interpreted as heap pointers when the dispatcher owns the method. if (bits >> 48) == 0x7FFD { let id = (bits & 0x0000_FFFF_FFFF_FFFF) as i64; - if id > 0 && id < 0x100000 { + if crate::value::addr_class::is_small_handle(id as usize) { let async_dispose = well_known_symbol("asyncDispose"); if !async_dispose.is_null() { let async_dispose_f64 = f64::from_bits( @@ -1668,7 +1670,7 @@ pub unsafe extern "C" fn js_object_get_symbol_property(obj_f64: f64, sym_f64: f6 // `Headers` can expose its `entries` method as the iterator function. if (bits >> 48) == 0x7FFD { let id = (bits & 0x0000_FFFF_FFFF_FFFF) as i64; - if id > 0 && id < 0x100000 { + if crate::value::addr_class::is_small_handle(id as usize) { let iter_wk = well_known_symbol("iterator"); if !iter_wk.is_null() { let iter_f64 = @@ -1702,8 +1704,7 @@ pub unsafe extern "C" fn js_object_get_symbol_property(obj_f64: f64, sym_f64: f6 // `is_valid_obj_ptr` (validates the GcHeader) rather than the address // band alone — otherwise a symbol read on a low-address object returned // undefined. Proxies (registered small ids) keep their own semantics. - if id > 0 - && id < 0x100000 + if crate::value::addr_class::is_small_handle(id) && !crate::object::is_valid_obj_ptr(id as *const u8) && crate::proxy::js_proxy_is_proxy(obj_f64) == 0 { diff --git a/crates/perry-runtime/src/temporal/mod.rs b/crates/perry-runtime/src/temporal/mod.rs index a37e60bed9..d43c2fe104 100644 --- a/crates/perry-runtime/src/temporal/mod.rs +++ b/crates/perry-runtime/src/temporal/mod.rs @@ -169,12 +169,9 @@ pub fn alloc_temporal_cell(value: TemporalValue) -> f64 { /// then read the `GcHeader.obj_type`. #[inline] pub fn is_temporal_cell_addr(addr: usize) -> bool { - if addr < 0x100000 || !crate::object::is_valid_obj_ptr(addr as *const u8) { - return false; - } - unsafe { - let header = (addr - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader; - (*header).obj_type == crate::gc::GC_TYPE_TEMPORAL + match unsafe { crate::value::addr_class::try_read_gc_header(addr) } { + Some(header) => header.obj_type == crate::gc::GC_TYPE_TEMPORAL, + None => false, } } diff --git a/crates/perry-runtime/src/typed_feedback.rs b/crates/perry-runtime/src/typed_feedback.rs index 24401310ed..12f4c305ba 100644 --- a/crates/perry-runtime/src/typed_feedback.rs +++ b/crates/perry-runtime/src/typed_feedback.rs @@ -656,7 +656,7 @@ fn normalize_raw_object_addr(bits: u64) -> usize { // Native module registry handles are carried as small raw values in several // dispatch paths. They are not GC objects, and probing `addr - header_size` // for them can fault before the generic native-handle dispatcher runs. - if addr < 0x100000 || (addr as u64) >> 48 != 0 { + if crate::value::addr_class::is_handle_band(addr) || (addr as u64) >> 48 != 0 { 0 } else { addr diff --git a/crates/perry-runtime/src/value/addr_class.rs b/crates/perry-runtime/src/value/addr_class.rs new file mode 100644 index 0000000000..775de07052 --- /dev/null +++ b/crates/perry-runtime/src/value/addr_class.rs @@ -0,0 +1,252 @@ +//! Centralized handle-vs-heap-pointer address classification. +//! +//! Perry NaN-boxes JS values; `POINTER_TAG` (0x7FFD) carries a 48-bit payload +//! that is USUALLY a heap pointer to a GC-managed allocation (8-byte +//! [`crate::gc::GcHeader`] at `addr - GC_HEADER_SIZE`), but several +//! subsystems smuggle small integer *registry handles* under the same tag. +//! Handles are NOT addresses: dereferencing one reads unmapped low memory and +//! segfaults on Linux (macOS mimalloc page retention masks the class — see +//! #4665, #4800). Runtime code therefore classifies a payload by MAGNITUDE +//! before any dereference. This module is the single owner of the band +//! boundaries and the classification predicates; do not re-type the literals +//! at call sites (the `scripts/addr_class_inventory.py` lint gate enforces +//! this). +//! +//! ## Band map (who owns which id range) +//! +//! | Range | Owner | +//! |------------------------|------------------------------------------------------------------| +//! | `0` | null / INVALID_HANDLE | +//! | `[1, 0x40000)` | perry-stdlib `common/handle.rs` registry (net.Socket, node:http, | +//! | | crypto, fastify, ioredis, UI widgets, timers, …) | +//! | `[0x40000, 0xE0000)` | Web Fetch family (Request/Response/Headers/Blob), perry-stdlib | +//! | | `fetch/mod.rs` `FETCH_HANDLE_ID_{START,END}` (#3973/#3974/#4004) | +//! | `[0xE0000, 0xF0000)` | zlib streams, perry-stdlib `zlib.rs` (#1843) | +//! | `[0xF0000, 0x100000)` | revocable Proxy ids, perry-runtime `proxy.rs` `PROXY_TAG_BASE` | +//! | | (#2846 crash cluster) | +//! | `>= 0x100000` | plausible heap addresses (see [`is_valid_obj_ptr`] for the | +//! | | platform heap floor/ceiling) | +//! | `[0x100000, 0x200000)` | EXCEPTION: Web Streams ids (perry-stdlib `streams.rs`) are RAW | +//! | | NUMERIC `f64` values — never `POINTER_TAG`-boxed — deliberately | +//! | | placed above the pointer-tagged handle band (#1545). Only probe | +//! | | this band on values that arrived as plain finite numbers. | +//! +//! The `0x100000` ceiling was established by #1843 (zlib handle deref'd as +//! heap object), #4004 (fetch handles moved to 0x40000), and #4800 +//! (`is_builtin_iterator_class_id` used an 0x1008 floor and deref'd a Headers +//! handle on every hono response). All four sub-bands must stay below +//! [`HANDLE_BAND_MAX`]; perry-stdlib re-exports these constants and its unit +//! tests assert the containment. + +use crate::gc::{GcHeader, GC_HEADER_SIZE}; + +/// Exclusive upper bound of the small-handle id space. Payloads below this are +/// registry handles (or null/garbage), never dereferenceable heap pointers. +/// Raising any sub-band past this value requires auditing every +/// `is_handle_band` caller. +pub const HANDLE_BAND_MAX: usize = 0x100000; + +/// Exclusive end of the generic perry-stdlib `common/handle.rs` registry band +/// (`[1, COMMON_HANDLE_BAND_END)`). The registry panics rather than allocate +/// into the fetch band above it. +pub const COMMON_HANDLE_BAND_END: usize = 0x40000; + +/// Web Fetch handle band `[FETCH_HANDLE_BAND_START, FETCH_HANDLE_BAND_END)`, +/// owned by perry-stdlib `fetch/mod.rs` (#4004 moved it here, out of the +/// common registry's way). +pub const FETCH_HANDLE_BAND_START: usize = 0x40000; +pub const FETCH_HANDLE_BAND_END: usize = 0xE0000; + +/// zlib stream handle band `[ZLIB_HANDLE_BAND_START, ZLIB_HANDLE_BAND_END)`, +/// owned by perry-stdlib `zlib.rs` (#1843 established that these ids must not +/// be dereferenced as heap objects). +pub const ZLIB_HANDLE_BAND_START: usize = 0xE0000; +pub const ZLIB_HANDLE_BAND_END: usize = 0xF0000; + +/// Revocable Proxy id band `[PROXY_ID_BAND_START, HANDLE_BAND_MAX)`, owned by +/// perry-runtime `proxy.rs` (`PROXY_TAG_BASE`). Kept at the top of the handle +/// band so fetch ids below never collide with a proxy id (#2846). +pub const PROXY_ID_BAND_START: usize = 0xF0000; + +/// Web Streams id band `[STREAM_ID_BAND_START, STREAM_ID_BAND_END)`, owned by +/// perry-stdlib `streams.rs`. NOT part of the pointer-tagged handle band: +/// stream ids travel as raw numeric `f64`s (#1545), so they sit just above +/// `HANDLE_BAND_MAX` and only number-typed probe paths may classify into it. +pub const STREAM_ID_BAND_START: usize = 0x100000; +pub const STREAM_ID_BAND_END: usize = 0x200000; + +/// True when `addr` lies in the small-handle band (including 0/null). A +/// payload in this band must never be dereferenced; route it to the handle +/// dispatch tables instead. +#[inline(always)] +pub fn is_handle_band(addr: usize) -> bool { + addr < HANDLE_BAND_MAX +} + +/// True for a plausible *live* handle id: non-zero and inside the handle +/// band. Mirrors the widespread `addr > 0 && addr < 0x100000` shape (0 is +/// null / INVALID_HANDLE, not a handle). +#[inline(always)] +pub fn is_small_handle(addr: usize) -> bool { + (1..HANDLE_BAND_MAX).contains(&addr) +} + +/// Complement of [`is_handle_band`]: the payload is above the handle band and +/// may be treated as a candidate heap address (subject to +/// [`is_valid_obj_ptr`] / registry checks as the call site requires). Note +/// `0`/null is NOT above the band. +#[inline(always)] +pub fn is_above_handle_band(addr: usize) -> bool { + addr >= HANDLE_BAND_MAX +} + +/// True when `addr` is a revocable-Proxy id. Callers must still confirm +/// registration via `proxy::js_proxy_is_proxy` before routing — a heap-free +/// check, so do it before any dereference. +#[inline(always)] +pub fn is_proxy_id_band(addr: usize) -> bool { + (PROXY_ID_BAND_START..HANDLE_BAND_MAX).contains(&addr) +} + +/// True when `id` is in the raw-numeric Web Streams id band. Only meaningful +/// for values that arrived as plain finite numbers (never for `POINTER_TAG` +/// payloads — heap pointers live in this range too). +#[inline(always)] +pub fn is_stream_id_band(id: usize) -> bool { + (STREAM_ID_BAND_START..STREAM_ID_BAND_END).contains(&id) +} + +/// Check if a pointer is a valid heap object (safe to dereference GcHeader). +/// Values below 0x100000 (1MB) are likely INT32_TAG extracts, small handles, +/// or null. The upper bound filters out NaN-box tag bits that leaked through. +/// +/// Issue #73 follow-up: raised the lower bound from 1 MB to 2 TB to reject +/// corrupted NaN-boxes whose 48-bit handle lands in the 1-2 TB window +/// (e.g. `0x00FF_0000_0000` from an `ArrayHeader { length: 0, capacity: +/// 255 }` read as u64). Real macOS mimalloc + arena allocations all +/// land in the 3-5 TB range; anything below 2 TB is certainly bogus on +/// that platform. Linux glibc and Windows mimalloc allocate well below +/// 2 TB though (often in the GB-to-tens-of-GB range), so the macOS floor +/// silently rejects every legitimate object pointer there — issues +/// #385/#386/#387 traced back to this exact filter on Windows. +/// +/// #1136 / #1129: iOS-family *device* targets (aarch64-apple-ios, +/// -tvos, -watchos, -visionos) ship without mimalloc and use +/// libsystem_malloc, whose user allocations land in the same low range +/// as Android/Linux/Windows. Treat them like those platforms — the +/// downstream `GcHeader.obj_type` check is the real liveness guard. +/// The simulator (e.g. ios + target_abi = "sim") runs on the macOS +/// host's mimalloc so its allocations still land above 2 TB; lowering +/// the floor here is safe because the obj_type validation does the +/// work. +/// +/// NOTE: the platform `HEAP_MIN` floor on Linux/Android/iOS/Windows +/// (`0x1000`) is BELOW the handle band, so this predicate alone does NOT +/// reject small handles there — pair it with [`is_handle_band`] (or use +/// [`try_read_gc_header`], which does both) when the input can carry a +/// handle id. +#[inline(always)] +pub(crate) fn is_valid_obj_ptr(ptr: *const u8) -> bool { + let addr = ptr as u64; + #[cfg(any( + target_os = "android", + target_os = "linux", + target_os = "windows", + target_os = "ios", + target_os = "tvos", + target_os = "watchos", + target_os = "visionos", + ))] + const HEAP_MIN: u64 = 0x1000; + #[cfg(not(any( + target_os = "android", + target_os = "linux", + target_os = "windows", + target_os = "ios", + target_os = "tvos", + target_os = "watchos", + target_os = "visionos", + )))] + const HEAP_MIN: u64 = 0x200_0000_0000; + (HEAP_MIN..0x8000_0000_0000).contains(&addr) +} + +/// True when `addr` is outside every handle band AND inside the platform +/// heap range — i.e. plausible to dereference as a GC allocation. This is the +/// canonical `addr >= 0x100000 && is_valid_obj_ptr(addr)` pairing. +#[inline(always)] +pub(crate) fn is_plausible_heap_addr(addr: usize) -> bool { + is_above_handle_band(addr) && is_valid_obj_ptr(addr as *const u8) +} + +/// Validated GcHeader read: magnitude-classify FIRST (reject the handle band +/// and implausible heap addresses), only then dereference +/// `addr - GC_HEADER_SIZE`. Returns `None` without touching memory for +/// handles, null, tag remnants, and out-of-range garbage. +/// +/// # Safety +/// `addr` must either be a live GC allocation's user address or arbitrary +/// non-pointer bits; a STALE heap address that passes the magnitude checks is +/// still dereferenced (same contract as every existing call site — the +/// registries/`obj_type` checks layered above this are what catch reuse). +#[inline(always)] +pub(crate) unsafe fn try_read_gc_header(addr: usize) -> Option<&'static GcHeader> { + if !is_plausible_heap_addr(addr) { + return None; + } + Some(&*((addr - GC_HEADER_SIZE) as *const GcHeader)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn band_layout_is_contiguous_and_contained() { + assert!(COMMON_HANDLE_BAND_END <= FETCH_HANDLE_BAND_START); + assert!(FETCH_HANDLE_BAND_START < FETCH_HANDLE_BAND_END); + assert!(FETCH_HANDLE_BAND_END <= ZLIB_HANDLE_BAND_START); + assert!(ZLIB_HANDLE_BAND_END <= PROXY_ID_BAND_START); + assert!(PROXY_ID_BAND_START < HANDLE_BAND_MAX); + assert!(STREAM_ID_BAND_START >= HANDLE_BAND_MAX); + } + + #[test] + fn handle_band_predicates() { + // The #4800 shape: a first-allocation fetch Headers handle. + assert!(is_handle_band(0x40000)); + assert!(is_small_handle(0x40000)); + // Proxy ids (#2846), zlib (#1843), common registry, null. + assert!(is_proxy_id_band(0xF0000)); + assert!(is_proxy_id_band(0xF_FFF8)); + assert!(!is_proxy_id_band(0x40000)); + assert!(is_handle_band(0xE0000)); + assert!(is_handle_band(1)); + assert!(is_handle_band(0)); + assert!(!is_small_handle(0)); + // First heap-plausible address. + assert!(!is_handle_band(HANDLE_BAND_MAX)); + assert!(is_above_handle_band(HANDLE_BAND_MAX)); + assert!(!is_small_handle(HANDLE_BAND_MAX)); + } + + #[test] + fn try_read_gc_header_rejects_handles_without_deref() { + // Would SIGSEGV on Linux if dereferenced (#4665/#4800) — must be None + // purely from the magnitude check. + for addr in [ + 0usize, 1, 0x1008, 0x10000, 0x40000, 0x4000c, 0xF0000, 0xF_FFF8, + ] { + assert!(unsafe { try_read_gc_header(addr) }.is_none()); + } + // Tag remnants / out-of-range bits. + assert!(unsafe { try_read_gc_header(0x7FFD_0000_0000_0000) }.is_none()); + } + + #[test] + fn stream_id_band_is_above_pointer_handles() { + assert!(is_stream_id_band(STREAM_ID_BAND_START)); + assert!(!is_stream_id_band(HANDLE_BAND_MAX - 1)); + assert!(!is_handle_band(STREAM_ID_BAND_START)); + } +} diff --git a/crates/perry-runtime/src/value/dynamic_object.rs b/crates/perry-runtime/src/value/dynamic_object.rs index aee0bd04c8..68b7024559 100644 --- a/crates/perry-runtime/src/value/dynamic_object.rs +++ b/crates/perry-runtime/src/value/dynamic_object.rs @@ -249,7 +249,7 @@ pub unsafe extern "C" fn js_dynamic_object_get_property( } // Check if this is a handle-based object (small integer, not a real heap pointer) - if ptr < 0x100000 { + if crate::value::addr_class::is_handle_band(ptr as usize) { if let Some(dispatch) = crate::object::handle_property_dispatch() { return dispatch(ptr, property_name_ptr as *const u8, property_name_len); } diff --git a/crates/perry-runtime/src/value/mod.rs b/crates/perry-runtime/src/value/mod.rs index f119795fa0..2672affbd9 100644 --- a/crates/perry-runtime/src/value/mod.rs +++ b/crates/perry-runtime/src/value/mod.rs @@ -33,6 +33,7 @@ //! re-export surface — perry-codegen + perry-runtime consumers //! pattern-match against the names below. +pub mod addr_class; mod dyn_index; mod dynamic_arith; mod dynamic_array; diff --git a/crates/perry-runtime/src/value/to_string.rs b/crates/perry-runtime/src/value/to_string.rs index d7a7454425..b6444e7065 100644 --- a/crates/perry-runtime/src/value/to_string.rs +++ b/crates/perry-runtime/src/value/to_string.rs @@ -812,7 +812,7 @@ pub extern "C" fn js_jsvalue_to_string_method(value: f64) -> *mut crate::string: } if jsval.is_pointer() { let handle = jsval.as_pointer::() as usize; - if (1..0x100000).contains(&handle) { + if crate::value::addr_class::is_small_handle(handle) { if let Some(dispatch) = crate::object::handle_method_dispatch() { let result = unsafe { dispatch(handle as i64, b"toString".as_ptr(), 8, std::ptr::null(), 0) diff --git a/crates/perry-runtime/src/value/truthy.rs b/crates/perry-runtime/src/value/truthy.rs index 8789848aa4..7c6938914c 100644 --- a/crates/perry-runtime/src/value/truthy.rs +++ b/crates/perry-runtime/src/value/truthy.rs @@ -102,7 +102,10 @@ pub extern "C" fn js_is_truthy(value: f64) -> i32 { // this check. // Both filters together make a false-positive astronomically unlikely // while still preserving the legacy bitcast path for real pointers. - if bits >= 0x10_0000 && bits < 0x0001_0000_0000_0000 && (bits & 0x7) == 0 { + if crate::value::addr_class::is_above_handle_band(bits as usize) + && bits < 0x0001_0000_0000_0000 + && (bits & 0x7) == 0 + { // This could be a raw string pointer - check if it's a valid string let str_ptr = bits as *const crate::string::StringHeader; // Try to read the string length - empty string is falsy diff --git a/crates/perry-runtime/src/weakref.rs b/crates/perry-runtime/src/weakref.rs index d7bc9707ef..08c921a0e9 100644 --- a/crates/perry-runtime/src/weakref.rs +++ b/crates/perry-runtime/src/weakref.rs @@ -742,17 +742,14 @@ pub unsafe fn try_weak_method_dispatch( /// (`js_nanbox_get_pointer` yields 0) safely resolves to `None`. pub fn weak_class_id_from_receiver(receiver: f64) -> Option { let addr = js_nanbox_get_pointer(receiver) as usize; - // #4004: reject the `< 0x100000` small-handle band (Web Fetch / node:http / - // timer ids are NaN-boxed POINTER_TAG values, not heap addresses) before + // #4004: reject the small-handle band (Web Fetch / node:http / timer ids + // are NaN-boxed POINTER_TAG values, not heap addresses) before // dereferencing the GC header. WeakMap/WeakSet are ObjectHeader-backed - // allocations above the cutoff. See map::is_registered_map. - if addr < 0x100000 { - return None; - } + // allocations above the cutoff. See `value::addr_class` for the band map. unsafe { - let header = (addr - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader; - if (*header).obj_type != crate::gc::GC_TYPE_OBJECT { - return None; + match crate::value::addr_class::try_read_gc_header(addr) { + Some(header) if header.obj_type == crate::gc::GC_TYPE_OBJECT => {} + _ => return None, } let cid = (*(addr as *const ObjectHeader)).class_id; if cid == CLASS_ID_WEAKMAP || cid == CLASS_ID_WEAKSET { diff --git a/crates/perry-stdlib/src/common/handle.rs b/crates/perry-stdlib/src/common/handle.rs index 36eee180b9..254b03f015 100644 --- a/crates/perry-stdlib/src/common/handle.rs +++ b/crates/perry-stdlib/src/common/handle.rs @@ -21,8 +21,10 @@ pub const INVALID_HANDLE: Handle = 0; /// Global handle registry using DashMap for concurrent access static HANDLES: Lazy>> = Lazy::new(DashMap::new); +// Band boundary owned by `perry_runtime::value::addr_class`. const COMMON_HANDLE_ID_START: Handle = 1; -const COMMON_HANDLE_ID_END: Handle = 0x40000; +const COMMON_HANDLE_ID_END: Handle = + perry_runtime::value::addr_class::COMMON_HANDLE_BAND_END as Handle; /// Next handle ID (0 is reserved for invalid/null). The visible low range stops /// before Web Fetch's pointer-tagged handle band so generic dispatch cannot diff --git a/crates/perry-stdlib/src/events/constructors.rs b/crates/perry-stdlib/src/events/constructors.rs index 7e7a06956e..9303592b11 100644 --- a/crates/perry-stdlib/src/events/constructors.rs +++ b/crates/perry-stdlib/src/events/constructors.rs @@ -15,7 +15,7 @@ unsafe fn event_emitter_options_capture_rejections(options: f64) -> bool { return false; } let options_obj = js_nanbox_get_pointer(options) as *const ObjectHeader; - if options_obj.is_null() || (options_obj as usize) < 0x100000 { + if perry_runtime::value::addr_class::is_handle_band(options_obj as usize) { return false; } let gc_header = (options_obj as *const u8).sub(perry_runtime::gc::GC_HEADER_SIZE) diff --git a/crates/perry-stdlib/src/fetch/headers.rs b/crates/perry-stdlib/src/fetch/headers.rs index e25bd0ec3b..eb37427e81 100644 --- a/crates/perry-stdlib/src/fetch/headers.rs +++ b/crates/perry-stdlib/src/fetch/headers.rs @@ -61,7 +61,7 @@ fn gc_type_for_raw_ptr(raw: i64) -> Option { return None; } let addr = raw as usize; - if addr < 0x100000 { + if perry_runtime::value::addr_class::is_handle_band(addr) { return None; } unsafe { Some(*(raw as *const u8).sub(perry_runtime::gc::GC_HEADER_SIZE)) } diff --git a/crates/perry-stdlib/src/fetch/mod.rs b/crates/perry-stdlib/src/fetch/mod.rs index 141d6eaf66..3ccef7f826 100644 --- a/crates/perry-stdlib/src/fetch/mod.rs +++ b/crates/perry-stdlib/src/fetch/mod.rs @@ -42,10 +42,14 @@ use validation::{ normalize_method, parse_redirect_location, redirect_status_from_value, }; -// Web Fetch handles must stay below the `0x100000` small-handle cutoff while -// avoiding the low native-id range exposed by `node:http` (#3973/#3974 via #4004). -pub(crate) const FETCH_HANDLE_ID_START: usize = 0x40000; -pub(crate) const FETCH_HANDLE_ID_END: usize = 0xE0000; +// Web Fetch handles must stay below the small-handle cutoff while avoiding +// the low native-id range exposed by `node:http` (#3973/#3974 via #4004). The +// band boundaries are owned by `perry_runtime::value::addr_class` (the +// runtime's magnitude checks classify against them). +pub(crate) const FETCH_HANDLE_ID_START: usize = + perry_runtime::value::addr_class::FETCH_HANDLE_BAND_START; +pub(crate) const FETCH_HANDLE_ID_END: usize = + perry_runtime::value::addr_class::FETCH_HANDLE_BAND_END; // Response handle storage lazy_static::lazy_static! { @@ -98,8 +102,9 @@ mod tests { #[test] fn fetch_handle_ids_use_high_small_handle_range() { - assert!(FETCH_HANDLE_ID_START >= 0x40000); - assert!(FETCH_HANDLE_ID_END <= 0x100000); + use perry_runtime::value::addr_class; + assert!(FETCH_HANDLE_ID_START >= addr_class::COMMON_HANDLE_BAND_END); + assert!(FETCH_HANDLE_ID_END <= addr_class::HANDLE_BAND_MAX); let native_id = crate::common::register_handle("native-request-marker".to_string()); let id = alloc_fetch_handle_id(); diff --git a/crates/perry-stdlib/src/streams.rs b/crates/perry-stdlib/src/streams.rs index 4fd3945f17..d02bee1a93 100644 --- a/crates/perry-stdlib/src/streams.rs +++ b/crates/perry-stdlib/src/streams.rs @@ -179,8 +179,11 @@ unsafe impl Send for WritableStreamData {} unsafe impl Send for ReaderData {} unsafe impl Send for WriterData {} -pub(crate) const STREAM_HANDLE_ID_START: usize = 0x100000; -pub(crate) const STREAM_HANDLE_ID_END: usize = 0x200000; +// Band boundaries owned by `perry_runtime::value::addr_class` (the runtime's +// finite-number stream probes classify against the same range). +pub(crate) const STREAM_HANDLE_ID_START: usize = + perry_runtime::value::addr_class::STREAM_ID_BAND_START; +pub(crate) const STREAM_HANDLE_ID_END: usize = perry_runtime::value::addr_class::STREAM_ID_BAND_END; lazy_static::lazy_static! { static ref READABLE_STREAMS: Mutex> = Mutex::new(HashMap::new()); diff --git a/crates/perry-stdlib/src/streams/tests.rs b/crates/perry-stdlib/src/streams/tests.rs index 4eef4049fe..587943b805 100644 --- a/crates/perry-stdlib/src/streams/tests.rs +++ b/crates/perry-stdlib/src/streams/tests.rs @@ -9,7 +9,7 @@ fn stream_ids_live_outside_pointer_tag_small_handle_band() { "stream id {id:#x} must stay in the raw numeric stream band" ); assert!( - id >= 0x100000, + id >= perry_runtime::value::addr_class::HANDLE_BAND_MAX, "stream id {id:#x} must not overlap pointer-tagged small handles" ); } diff --git a/crates/perry-stdlib/src/zlib.rs b/crates/perry-stdlib/src/zlib.rs index 4bab8fceeb..030ef9420f 100644 --- a/crates/perry-stdlib/src/zlib.rs +++ b/crates/perry-stdlib/src/zlib.rs @@ -573,8 +573,11 @@ lazy_static::lazy_static! { static ref NEXT_ZLIB_ID: Mutex = Mutex::new(ZLIB_STREAM_HANDLE_ID_START); } -const ZLIB_STREAM_HANDLE_ID_START: i64 = 0xE0000; -const ZLIB_STREAM_HANDLE_ID_END: i64 = 0xF0000; +// Band boundaries owned by `perry_runtime::value::addr_class`. +const ZLIB_STREAM_HANDLE_ID_START: i64 = + perry_runtime::value::addr_class::ZLIB_HANDLE_BAND_START as i64; +const ZLIB_STREAM_HANDLE_ID_END: i64 = + perry_runtime::value::addr_class::ZLIB_HANDLE_BAND_END as i64; static ZLIB_GC_REGISTERED: std::sync::Once = std::sync::Once::new(); diff --git a/crates/perry/tests/addr_class_handle_bands.rs b/crates/perry/tests/addr_class_handle_bands.rs new file mode 100644 index 0000000000..97bfdf8628 --- /dev/null +++ b/crates/perry/tests/addr_class_handle_bands.rs @@ -0,0 +1,107 @@ +//! End-to-end exercise of the handle-vs-heap-pointer classification bands +//! (`perry-runtime/src/value/addr_class.rs`), in the #4800 shape: a Web Fetch +//! `Headers` handle (POINTER_TAG payload in the fetch band, NOT a heap +//! address) flowing through for-of / spread / typeof / instanceof / brand +//! checks alongside real heap objects (Map, Set, plain object, array, Date). +//! +//! Every probe here historically reached a `GcHeader` deref: a missing or +//! too-low magnitude floor dereferenced the handle id as a heap address and +//! SIGSEGV'd on Linux (#1843, #4004, #4665, #4800 — macOS mimalloc page +//! retention masks the class, so the binary-exits-cleanly assertion is the +//! real gate on the Linux CI runners). + +use std::path::PathBuf; +use std::process::Command; + +fn perry_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_perry")) +} + +#[test] +fn handle_band_values_classify_without_deref() { + let dir = tempfile::tempdir().expect("tempdir"); + let entry = dir.path().join("main.ts"); + let output = dir.path().join("main_bin"); + + std::fs::write( + &entry, + r#" +// Fetch-band handle (id in [0x40000, 0xE0000), pointer-tagged, no GcHeader). +const h = new Headers(); +h.set("content-type", "application/json"); +h.set("x-perry", "1"); + +// #4800: lazy for-of routes the handle through js_get_iterator → +// is_builtin_iterator_class_id; a low floor deref'd the id as a GcHeader. +const pairs: string[] = []; +for (const [k, v] of h) { + pairs.push(k + "=" + v); +} +pairs.sort(); +console.log(pairs.join(",")); + +// Spread over the handle (flat_clone handle @@iterator path). +const spread = [...h]; +console.log("spread:" + spread.length); + +// Classification probes that all magnitude-check before dereferencing. +console.log("typeof:" + typeof h); +console.log("isArray:" + Array.isArray(h)); +console.log("instanceofMap:" + (h instanceof Map)); + +// Real heap collections — is_registered_map/is_registered_set brand checks +// (registry-first after #4665) must still say yes for these... +const m = new Map(); +m.set("a", 1); +const s = new Set([1, 2, 3]); +console.log("map:" + (m instanceof Map) + ":" + m.get("a") + ":" + m.size); +console.log("set:" + (s instanceof Set) + ":" + s.has(2) + ":" + s.size); + +// ...and plain heap objects/arrays/Dates keep their identities. +const obj = { x: 1, y: "z" }; +const arr = [1, 2, 3]; +const d = new Date(0); +console.log(JSON.stringify(obj)); +console.log(arr.map((n: number) => n * 2).join("-")); +console.log("date:" + (d instanceof Date) + ":" + d.getTime()); +"#, + ) + .expect("write entry"); + + let compile = Command::new(perry_bin()) + .current_dir(dir.path()) + .arg("compile") + .arg(&entry) + .arg("-o") + .arg(&output) + .output() + .expect("run perry compile"); + assert!( + compile.status.success(), + "perry compile failed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&compile.stdout), + String::from_utf8_lossy(&compile.stderr) + ); + + let run = Command::new(&output).output().expect("run compiled binary"); + assert!( + run.status.success(), + "compiled binary failed (signal/segfault = handle deref'd as heap pointer, \ + #4800/#4665 regression class)\nstatus: {:?}\nstdout:\n{}\nstderr:\n{}", + run.status, + String::from_utf8_lossy(&run.stdout), + String::from_utf8_lossy(&run.stderr) + ); + let stdout = String::from_utf8_lossy(&run.stdout); + let expected = "content-type=application/json,x-perry=1\n\ + spread:2\n\ + typeof:object\n\ + isArray:false\n\ + instanceofMap:false\n\ + map:true:1:1\n\ + set:true:true:3\n\ + {\"x\":1,\"y\":\"z\"}\n\ + 2-4-6\n\ + date:true:0\n"; + assert_eq!(stdout, expected, "handle/heap classification drifted"); +} diff --git a/scripts/addr_class_allowlist.txt b/scripts/addr_class_allowlist.txt new file mode 100644 index 0000000000..c9993a52e0 --- /dev/null +++ b/scripts/addr_class_allowlist.txt @@ -0,0 +1,124 @@ +# Address-classification audit allowlist (scripts/addr_class_inventory.py). +# +# Format: path-prefix | line-substring-or-* | justification +# Matched findings are suppressed. Every entry MUST have a justification; +# malformed lines fail the run (exit 2). +# +# Rule classes audited: +# band-literal -- hand-typed handle-band boundary literals; new code must +# use the constants/predicates in value/addr_class.rs. +# gcheader-cast -- `as *const/mut GcHeader` outside gc/ and addr_class.rs; +# grandfathered per-file below. A wildcard entry only +# covers the file as it exists today -- when touching one +# of these files, prefer addr_class::try_read_gc_header +# for any NEW header read. New FILES are not covered. +# +# Band-literal exceptions (deliberate, not band classification): +crates/perry-runtime/src/closure/dynamic_props.rs | for handle in [0x10000usize, | #4740 regression-test fixture: raw in-band addresses asserted NOT to be dereferenced +crates/perry-runtime/src/object/native_module.rs | "O_SYMLINK" => Some(0x200000), | fs.constants O_SYMLINK flag value; unrelated to the handle bands +# +# Grandfathered GcHeader-cast files: +crates/perry-runtime/src/arena/allocators.rs | * | arena allocator/walker internals: header addresses come from block iteration or fresh allocation, never from NaN-box payloads +crates/perry-runtime/src/arena/tests.rs | * | arena allocator/walker internals: header addresses come from block iteration or fresh allocation, never from NaN-box payloads +crates/perry-runtime/src/arena/walk.rs | * | arena allocator/walker internals: header addresses come from block iteration or fresh allocation, never from NaN-box payloads +crates/perry-runtime/src/array/alloc.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/concat_reverse.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/flat_clone.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/generic.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/header.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/indexing.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/is_array.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/iter_methods.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/iter_object.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/iterator.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/push_pop.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/array/species.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/async_hooks.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/buffer/access.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/buffer/header.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/buffer/mod.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/buffer/validate.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/builtins/console.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/builtins/formatting.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/builtins/formatting/collection_equality.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/builtins/formatting/identity_equality.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/builtins/formatting/prototype_equality.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/builtins/formatting/util_format.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/builtins/table.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/child_process/mod.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/closure/dispatch.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/cluster.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/collection_iter_object.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/dgram.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/dns.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/event_target.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/fs/mod.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/fs/stream/write_file_input.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/fs/validate.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/intl.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/iterator_helpers.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/json/mod.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/json/reviver.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/json/stringify_api.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/jsx.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/map.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/native_abi.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/native_handle.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/net_validate.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_inspector.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_repl.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_stream_dispatch.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_submodules/consumers.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_submodules/stream_promises.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_submodules/test.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_submodules/timers.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_submodules/trace_events.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_v8.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/node_vm.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/alloc.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/array_object_ops.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/assert.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/async_generator_queue.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/class_registry.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/delete_rest.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/descriptors.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/exotic_expando.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/field_get_set.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/field_set_by_name.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/global_this.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/instanceof.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/mod.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/native_call_method.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/object_ops.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/object/util_types.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/os/signal.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/path.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/perf_hooks.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/process.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/promise/async_step.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/promise/combinators.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/promise/mod.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/proxy.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/set.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/string/format.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/string/intern.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/string/locale.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/string/tests.rs | * | test code probing GcHeaders on addresses it allocated itself +crates/perry-runtime/src/symbol.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/timer.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/tls.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/tty.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/typed_feedback.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/typed_feedback/tests.rs | * | test code probing GcHeaders on addresses it allocated itself +crates/perry-runtime/src/typedarray/mod.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/url/url_class.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/util_call_sites.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/util_diff.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/util_parse_args.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/util_promisify.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/util_style_text.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/value/dyn_index.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/value/dynamic_object.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/value/to_string.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/wasi.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up +crates/perry-runtime/src/weakref.rs | * | pre-existing GcHeader probe predating addr_class; address validated by call-site guards (magnitude/registry/is_valid_obj_ptr) -- migrate to addr_class::try_read_gc_header in a follow-up diff --git a/scripts/addr_class_inventory.py b/scripts/addr_class_inventory.py new file mode 100644 index 0000000000..9a338c40e7 --- /dev/null +++ b/scripts/addr_class_inventory.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python3 +"""Audit handle-vs-heap-pointer address classification sites. + +Perry NaN-boxes JS values; POINTER_TAG payloads are USUALLY heap pointers but +several subsystems smuggle small integer registry handles under the same tag +(see crates/perry-runtime/src/value/addr_class.rs for the band map). Runtime +code must classify a payload by magnitude through the predicates in +`value::addr_class` BEFORE dereferencing it — hand-re-typed band literals and +unvalidated `as *const GcHeader` casts are the root of a recurring +Linux-only segfault class (#1843, #4004, #4665, #4800). + +Two rule classes: + +1. BAND LITERAL — a handle-band boundary literal (0x100000, 0xF0000, 0x40000, + 0xE0000, 0x200000, underscore-separated variants) appearing in code in + perry-runtime/perry-stdlib outside `value/addr_class.rs`. New sites must + call the named `addr_class` predicates/constants instead. + +2. GCHEADER CAST — `as *const/mut GcHeader` outside `gc/` (collector + internals) and `value/addr_class.rs` (the checked `try_read_gc_header` + owner). Pre-existing probe sites are grandfathered through the allowlist + with a justification; new sites should route through + `addr_class::try_read_gc_header` or carry an allowlist entry explaining + what validates the address before the dereference. + +Allowlist: scripts/addr_class_allowlist.txt, same +`path-prefix | line-substring-or-* | justification` format as +scripts/gc_store_site_allowlist.txt. Malformed lines fail the run (exit 2). +""" + +from __future__ import annotations + +import argparse +import re +import sys +from dataclasses import dataclass, field +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] +DEFAULT_ALLOWLIST = REPO_ROOT / "scripts" / "addr_class_allowlist.txt" + +SCAN_ROOTS = ( + "crates/perry-runtime/src", + "crates/perry-stdlib/src", +) + +# The module that owns the band constants/predicates, and the collector +# internals that legitimately manipulate GcHeader layout directly. +EXCLUDED_PREFIXES = ( + "crates/perry-runtime/src/value/addr_class.rs", + "crates/perry-runtime/src/gc/", +) + +# Word-bounded band-boundary literals (plus Rust underscore-separator +# variants). 0x100000001b3 (FNV prime), 0x400000 (O_DSYNC), 0x100000000 +# (.text floor) etc. do NOT match because the literal continues with more +# word characters. +BAND_LITERAL_RE = re.compile( + r"0x(?:10_?0000|F_?0000|4_?0000|E_?0000|20_?0000)\b", + re.IGNORECASE, +) + +GC_HEADER_CAST_RE = re.compile(r"as\s+\*(?:const|mut)\s+(?:crate::gc::)?GcHeader\b") + +LINE_COMMENT_RE = re.compile(r"//.*$") + + +@dataclass +class Finding: + rel_path: str + line_no: int + rule: str + line: str + + def render(self) -> str: + return f"{self.rel_path}:{self.line_no}: [{self.rule}] {self.line.strip()}" + + +@dataclass +class AllowlistEntry: + path_prefix: str + substring: str + justification: str + line_no: int + hits: int = field(default=0) + + def matches(self, finding: Finding) -> bool: + if not finding.rel_path.startswith(self.path_prefix): + return False + return self.substring == "*" or self.substring in finding.line + + +def strip_comment(line: str) -> str: + # Good enough for this audit: drop everything after `//`. Band literals + # inside string literals are not a thing in these crates, and doc-comment + # mentions of historical values are fine. + return LINE_COMMENT_RE.sub("", line) + + +def scan_text(rel_path: str, text: str) -> list[Finding]: + findings: list[Finding] = [] + if any(rel_path.startswith(prefix) for prefix in EXCLUDED_PREFIXES): + return findings + for line_no, raw in enumerate(text.splitlines(), 1): + code = strip_comment(raw) + if BAND_LITERAL_RE.search(code): + findings.append(Finding(rel_path, line_no, "band-literal", raw)) + if GC_HEADER_CAST_RE.search(code): + findings.append(Finding(rel_path, line_no, "gcheader-cast", raw)) + return findings + + +def collect_inventory() -> tuple[list[Finding], int]: + findings: list[Finding] = [] + files_scanned = 0 + for root in SCAN_ROOTS: + for path in sorted((REPO_ROOT / root).rglob("*.rs")): + rel_path = path.relative_to(REPO_ROOT).as_posix() + # Skip parked/hidden trees (e.g. `.value.parked/`) — not compiled. + if any(part.startswith(".") for part in rel_path.split("/")): + continue + files_scanned += 1 + findings.extend(scan_text(rel_path, path.read_text(encoding="utf-8"))) + return findings, files_scanned + + +def load_allowlist(path: Path) -> list[AllowlistEntry]: + """Parse `path-prefix | line-substring-or-* | justification` lines. + + Every entry MUST carry a non-empty justification; a malformed line is a + hard error so the allowlist can't silently rot. + """ + + if not path.is_file(): + return [] + entries: list[AllowlistEntry] = [] + errors: list[str] = [] + for line_no, raw in enumerate(path.read_text(encoding="utf-8").splitlines(), 1): + line = raw.strip() + if not line or line.startswith("#"): + continue + parts = [part.strip() for part in line.split("|", 2)] + if len(parts) != 3 or not parts[0] or not parts[1] or not parts[2]: + errors.append( + f"{path.name}:{line_no}: expected " + "'path-prefix | line-substring-or-* | justification', got: " + raw + ) + continue + entries.append(AllowlistEntry(parts[0], parts[1], parts[2], line_no)) + if errors: + for error in errors: + print(error, file=sys.stderr) + raise SystemExit(2) + return entries + + +def apply_allowlist( + findings: list[Finding], entries: list[AllowlistEntry] +) -> tuple[list[Finding], int]: + kept: list[Finding] = [] + suppressed = 0 + for finding in findings: + entry = next((e for e in entries if e.matches(finding)), None) + if entry is None: + kept.append(finding) + else: + entry.hits += 1 + suppressed += 1 + return kept, suppressed + + +def run_self_tests() -> int: + failures: list[str] = [] + + def expect(cond: bool, message: str) -> None: + if not cond: + failures.append(message) + + runtime = "crates/perry-runtime/src/foo.rs" + + # Band literals in code are caught; comment-only mentions are not. + hits = scan_text(runtime, "if addr < 0x100000 {\n") + expect( + len(hits) == 1 and hits[0].rule == "band-literal", + "band literal in code should be flagged", + ) + expect( + not scan_text(runtime, "// historic floor was 0x100000\n"), + "band literal in a comment should be ignored", + ) + expect( + bool(scan_text(runtime, "if (0xF0000..0x100000).contains(&a) {}\n")), + "proxy band range should be flagged", + ) + expect( + bool(scan_text(runtime, "const X: usize = 0x4_0000;\n")), + "underscore variant should be flagged", + ) + + # Neighbouring literals that merely contain a band prefix must not match. + for benign in ( + "h = h.wrapping_mul(0x100000001b3);\n", + '"O_DSYNC" => Some(0x400000),\n', + "if !(0x100000000..=0x400000000).contains(&f) {}\n", + "let mask = 0x0000_FFFF_FFFF_FFFF;\n", + ): + expect(not scan_text(runtime, benign), f"benign literal flagged: {benign!r}") + + # GcHeader casts are caught in both path forms. + expect( + scan_text(runtime, "let h = (a - 8) as *const crate::gc::GcHeader;\n")[0].rule + == "gcheader-cast", + "qualified GcHeader cast should be flagged", + ) + expect( + bool(scan_text(runtime, "let h = p.sub(8) as *mut GcHeader;\n")), + "bare GcHeader cast should be flagged", + ) + + # Owner module and collector internals are exempt. + expect( + not scan_text( + "crates/perry-runtime/src/value/addr_class.rs", + "pub const HANDLE_BAND_MAX: usize = 0x100000;\n", + ), + "addr_class.rs must be exempt", + ) + expect( + not scan_text( + "crates/perry-runtime/src/gc/mod.rs", + "let h = a as *const GcHeader;\n", + ), + "gc/ must be exempt", + ) + + # Allowlist matching: prefix + substring, prefix + wildcard. + finding = Finding(runtime, 1, "gcheader-cast", "x as *const GcHeader") + expect( + AllowlistEntry("crates/perry-runtime/src/foo.rs", "*", "j", 1).matches(finding), + "wildcard entry should match", + ) + expect( + AllowlistEntry("crates/perry-runtime/src/foo.rs", "GcHeader", "j", 1).matches( + finding + ), + "substring entry should match", + ) + expect( + not AllowlistEntry("crates/perry-runtime/src/bar.rs", "*", "j", 1).matches( + finding + ), + "other-path entry must not match", + ) + + if failures: + for failure in failures: + print(f"self-test failure: {failure}", file=sys.stderr) + return 1 + print("addr-class inventory self-tests passed.") + return 0 + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--self-test", action="store_true") + parser.add_argument("--allowlist", type=Path, default=DEFAULT_ALLOWLIST) + parser.add_argument( + "--list-unused-allowlist", + action="store_true", + help="also report allowlist entries that matched nothing", + ) + args = parser.parse_args(argv) + if args.self_test: + return run_self_tests() + + findings, files_scanned = collect_inventory() + entries = load_allowlist(args.allowlist) + findings, suppressed = apply_allowlist(findings, entries) + + if args.list_unused_allowlist: + for entry in entries: + if entry.hits == 0: + print( + f"unused allowlist entry ({args.allowlist.name}:{entry.line_no}): " + f"{entry.path_prefix} | {entry.substring}" + ) + + if findings: + print( + "Address-classification audit failed; use the predicates/constants in\n" + "crates/perry-runtime/src/value/addr_class.rs (is_handle_band /\n" + "is_small_handle / is_proxy_id_band / try_read_gc_header / ...) instead\n" + "of re-typing band literals or casting to GcHeader, or add a justified\n" + "entry to scripts/addr_class_allowlist.txt:" + ) + for finding in findings: + print(f" {finding.render()}") + return 1 + + print( + f"Address-classification audit passed " + f"({files_scanned} files scanned, {suppressed} allowlisted)." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main())