Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ jobs:
python3 scripts/gc_store_site_inventory.py --self-test
python3 scripts/gc_store_site_inventory.py

# Handle-vs-pointer address classification audit: POINTER_TAG payloads
# can be heap pointers OR small registry handles (fetch/zlib/proxy/...),
# and code must classify by magnitude through value/addr_class.rs before
# dereferencing. Catches new hand-typed band literals (0x100000 etc.)
# and new `as *const GcHeader` casts outside addr_class.rs / gc/ before
# they become Linux-only segfaults (#1843, #4004, #4665, #4800 class).
# Allowlist: scripts/addr_class_allowlist.txt.
- name: Address-classification audit
run: |
python3 scripts/addr_class_inventory.py --self-test
python3 scripts/addr_class_inventory.py

# ---------------------------------------------------------------------------
# API docs drift gate (#465)
#
Expand Down
2 changes: 1 addition & 1 deletion crates/perry-runtime/src/array/flat_clone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ pub extern "C" fn js_array_clone(src: *const ArrayHeader) -> *mut ArrayHeader {
// as pointer-shaped ids. `Array.from(handle)` / `[...handle]` reach this
// helper after codegen strips the tag, so ask the generic iterator resolver
// before treating the id as a non-array and returning [].
if raw_addr > 0 && raw_addr < 0x100000 {
if crate::value::addr_class::is_small_handle(raw_addr) {
if let Some(dispatch) = crate::object::handle_property_dispatch() {
let method = b"@@iterator";
let iter_fn = unsafe { dispatch(raw_addr as i64, method.as_ptr(), method.len()) };
Expand Down
2 changes: 1 addition & 1 deletion crates/perry-runtime/src/array/is_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub extern "C" fn js_array_is_array(value: f64) -> f64 {
if raw_ptr.is_null() {
return false_val;
}
if (raw_ptr as usize) < 0x100000 {
if crate::value::addr_class::is_handle_band(raw_ptr as usize) {
return false_val;
}

Expand Down
22 changes: 11 additions & 11 deletions crates/perry-runtime/src/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ pub(crate) fn entries_array_for_small_handle_value(value: f64) -> Option<*mut Ar
}

pub(crate) fn entries_array_for_small_handle_id(id: i64) -> Option<*mut ArrayHeader> {
if id <= 0 || id >= 0x100000 {
if id <= 0 || !crate::value::addr_class::is_small_handle(id as usize) {
return None;
}
let dispatch = crate::object::handle_method_dispatch()?;
Expand Down Expand Up @@ -726,16 +726,16 @@ pub extern "C" fn js_array_spread_append(dest: *mut ArrayHeader, source: f64) ->
/// (now inherited) `[Symbol.iterator]` method.
pub(crate) fn is_builtin_iterator_class_id(raw_ptr: usize) -> bool {
// Native handle ids (Web-Fetch Headers/Request/Response, streams, ws, DB,
// …) are NaN-boxed POINTER values in the small-handle band `[1, 0x100000)`:
// registry indices, NOT heap pointers. Dereferencing `raw_ptr - 8` as a
// GcHeader for one of them reads unmapped memory and segfaults — e.g.
// `for (const [k, v] of response.headers)`, where the lazy `for…of`
// protocol (#4786) routes the Headers handle (id >= 0x40000) through
// `js_get_iterator`, which calls this check. Reject the whole handle band,
// matching the `< 0x100000` floor used by `Array.isArray` and
// `try_dispatch_instance_method_value`. A real built-in iterator is always a
// heap object well above this floor, so this never loses a true match.
if raw_ptr < 0x100000 {
// …) are NaN-boxed POINTER values in the small-handle band (see
// `value::addr_class`): registry indices, NOT heap pointers. Dereferencing
// `raw_ptr - 8` as a GcHeader for one of them reads unmapped memory and
// segfaults — e.g. `for (const [k, v] of response.headers)` (#4800), where
// the lazy `for…of` protocol (#4786) routes the Headers handle
// (id >= 0x40000) through `js_get_iterator`, which calls this check.
// Reject the whole handle band, matching `Array.isArray` and
// `try_dispatch_instance_method_value`. A real built-in iterator is always
// a heap object well above this floor, so this never loses a true match.
if crate::value::addr_class::is_handle_band(raw_ptr) {
return false;
}
unsafe {
Expand Down
9 changes: 5 additions & 4 deletions crates/perry-runtime/src/builtins/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,15 @@ pub extern "C" fn js_eq(a: JSValue, b: JSValue) -> JSValue {
/// Whether `v` has ECMAScript Type Object (not a primitive). True for plain
/// objects, arrays, functions, Dates and boxed primitive wrappers; false for
/// Symbols (which are NaN-boxed pointers but are primitives) and for the
/// native-handle id-space below `0x100000` (sockets, zlib streams, …) which
/// must not be dereferenced as heap objects in a coercion path.
/// native-handle id-space (sockets, zlib streams, … — see
/// `value::addr_class`) which must not be dereferenced as heap objects in a
/// coercion path.
fn eq_is_object(v: JSValue) -> bool {
if !v.is_pointer() {
return false;
}
let ptr = v.as_pointer::<u8>();
if ptr.is_null() || (ptr as usize) < 0x100000 {
if crate::value::addr_class::is_handle_band(ptr as usize) {
return false;
}
!crate::symbol::is_registered_symbol(ptr as usize)
Expand Down Expand Up @@ -508,7 +509,7 @@ pub extern "C" fn js_value_typeof(value: f64) -> *mut StringHeader {
get_cached(&TYPEOF_OBJECT, "object")
};
}
if !ptr.is_null() && (ptr as usize) >= 0x100000 {
if crate::value::addr_class::is_above_handle_band(ptr as usize) {
// Symbols: registered in SYMBOL_POINTERS (handles both gc_malloc'd
// and Box-leaked symbols, which have no GcHeader).
if crate::symbol::is_registered_symbol(ptr as usize) {
Expand Down
2 changes: 1 addition & 1 deletion crates/perry-runtime/src/builtins/console.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ pub(crate) fn is_console_instance_value(value: f64) -> bool {
return false;
}
let obj = jsval.as_pointer::<crate::object::ObjectHeader>();
if obj.is_null() || (obj as usize) < 0x100000 {
if crate::value::addr_class::is_handle_band(obj as usize) {
return false;
}
unsafe { (*obj).class_id == CONSOLE_INSTANCE_CLASS_ID }
Expand Down
4 changes: 2 additions & 2 deletions crates/perry-runtime/src/builtins/formatting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ pub(crate) fn format_jsvalue(value: f64, depth: usize) -> String {
// is smaller than an ObjectHeader).
crate::temporal::temporal_inspect_string(value)
.unwrap_or_else(|| "[object Object]".to_string())
} else if (ptr as usize) < 0x100000 {
} else if crate::value::addr_class::is_handle_band(ptr as usize) {
// Refs #421: Web Fetch (and other) handles are NaN-boxed
// POINTER_TAG values whose payload is a small registry id, NOT
// a heap pointer — reading the GC header at `ptr - 8` would
Expand Down Expand Up @@ -1530,7 +1530,7 @@ fn format_jsvalue_for_json(value: f64, depth: usize) -> String {
// Temporal value inside an inspected object → `Temporal.X <iso>`.
crate::temporal::temporal_inspect_string(value)
.unwrap_or_else(|| "[object Object]".to_string())
} else if (ptr as usize) < 0x100000 {
} else if crate::value::addr_class::is_handle_band(ptr as usize) {
"[object Object]".to_string()
} else if crate::symbol::is_registered_symbol(ptr as usize)
|| crate::regex::is_registered_regex(ptr as usize)
Expand Down
17 changes: 9 additions & 8 deletions crates/perry-runtime/src/builtins/formatting/boxed_primitives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub(super) unsafe fn boxed_primitive_base_for_object(
unsafe fn boxed_primitive_payload_for_object(
obj_ptr: *const crate::object::ObjectHeader,
) -> Option<(u32, f64)> {
if (obj_ptr as usize) < 0x100000 || !crate::object::is_valid_obj_ptr(obj_ptr as *const u8) {
if !crate::value::addr_class::is_plausible_heap_addr(obj_ptr as usize) {
return None;
}
let class_id = (*obj_ptr).class_id;
Expand Down Expand Up @@ -196,20 +196,21 @@ pub(crate) fn boxed_primitive_payload(value: f64) -> Option<(u32, f64)> {
let bits = value.to_bits();
let ptr = if jv.is_pointer() {
jv.as_pointer::<crate::object::ObjectHeader>() as *mut crate::object::ObjectHeader
} else if (bits >> 48) == 0 && bits >= 0x100000 {
} else if (bits >> 48) == 0 && crate::value::addr_class::is_above_handle_band(bits as usize) {
bits as *mut crate::object::ObjectHeader
} else {
return None;
};
// This is a defensive type-probe over arbitrary `f64` bits, so a candidate
// that isn't a real heap object must be rejected *before* the `class_id`
// read — otherwise a small subnormal double (e.g. raw bits `0x2800000207`)
// that slips through the `>= 0x100000` raw-pointer heuristic is dereferenced
// as an `ObjectHeader` and faults. Keep the `0x100000` small-handle floor
// (the fetch/Headers id-space lives below it and `is_valid_obj_ptr`'s Linux
// `HEAP_MIN` of `0x1000` would otherwise let those handles through), and
// additionally gate on the real heap range (#4099).
if (ptr as usize) < 0x100000 || !crate::object::is_valid_obj_ptr(ptr as *const u8) {
// that slips through the raw-pointer heuristic above is dereferenced as an
// `ObjectHeader` and faults. `is_plausible_heap_addr` keeps the
// small-handle floor (the fetch/Headers id-space lives below it and
// `is_valid_obj_ptr`'s Linux `HEAP_MIN` of `0x1000` would otherwise let
// those handles through) and additionally gates on the real heap range
// (#4099).
if !crate::value::addr_class::is_plausible_heap_addr(ptr as usize) {
return None;
}
unsafe { boxed_primitive_payload_for_object(ptr) }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ unsafe fn util_format_json_value_has_cycle(value: f64, stack: &mut Vec<usize>) -

unsafe fn util_format_json_ptr_has_cycle(ptr: *const u8, stack: &mut Vec<usize>) -> bool {
let addr = ptr as usize;
if addr < 0x100000
if crate::value::addr_class::is_handle_band(addr)
|| crate::buffer::is_registered_buffer(addr)
|| crate::symbol::is_registered_symbol(addr)
{
Expand Down
4 changes: 3 additions & 1 deletion crates/perry-runtime/src/builtins/numbers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,9 @@ pub extern "C" fn js_number_coerce(value: f64) -> f64 {
// identifiers, so test assertions like `typeof x === "number"`
// hold). Gate on the timer registry so unrelated small handles
// (UI widgets, drizzle, etc.) still fall through to toPrimitive.
if id > 0 && id < 0x100000 && crate::timer::is_known_timer_id(id) {
if crate::value::addr_class::is_small_handle(id as usize)
&& crate::timer::is_known_timer_id(id)
{
return id as f64;
}
// Array → ToPrimitive(number) finds no `valueOf` override, so it
Expand Down
20 changes: 9 additions & 11 deletions crates/perry-runtime/src/closure/dynamic_props.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,17 +271,15 @@ pub fn scan_closure_dynamic_props_roots_mut(visitor: &mut crate::gc::RuntimeRoot
/// Check if a raw pointer points to a ClosureHeader by checking CLOSURE_MAGIC at offset 12.
/// Safe to call with any non-null, sufficiently aligned pointer >= 0x10000.
pub fn is_closure_ptr(ptr: usize) -> bool {
// Reject the native / Web-Fetch small-handle band (< 0x100000). Fetch
// handles (Headers/Request/Response/Blob, [0x40000, 0x100000)), node:http
// handles, and revocable-proxy ids ([0xF0000, 0x100000)) are NaN-boxed
// POINTER_TAG values holding a small registry id, not heap pointers — a
// real closure is always a heap allocation well above 0x100000. The old
// 0x10000 floor let a 0x40000 Headers handle through, so the
// `*(ptr + 12)` CLOSURE_MAGIC probe below dereferenced unmapped low
// memory and SIGSEGVd on Linux (macOS masked it via the much higher
// is_valid_obj_ptr heap floor). 0x100000 matches the cutoff used across
// the object field-read paths (field_get_set.rs / class_registry.rs).
if ptr < 0x100000 {
// Reject the native / Web-Fetch small-handle band (see
// `value::addr_class` for the band map). Fetch handles, node:http
// handles, and revocable-proxy ids are NaN-boxed POINTER_TAG values
// holding a small registry id, not heap pointers — a real closure is
// always a heap allocation above the band. The old 0x10000 floor let a
// 0x40000 Headers handle through, so the `*(ptr + 12)` CLOSURE_MAGIC
// probe below dereferenced unmapped low memory and SIGSEGVd on Linux
// (macOS masked it via the much higher is_valid_obj_ptr heap floor).
if crate::value::addr_class::is_handle_band(ptr) {
return false;
}
if ptr % std::mem::align_of::<ClosureHeader>() != 0 {
Expand Down
25 changes: 13 additions & 12 deletions crates/perry-runtime/src/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,20 @@ pub fn date_invalid() -> f64 {
pub fn is_date_cell_addr(addr: usize) -> bool {
// #4004: small-handle registry ids (Web Fetch Request/Headers/Response,
// perry-ffi/node:http handles, timer ids, …) are NaN-boxed as POINTER_TAG
// values but are NOT real heap addresses — they live in the `< 0x100000`
// small-handle band. Real `DateCell`s are arena-allocated, always at or
// above the small-handle cutoff. Dereferencing `addr - GC_HEADER_SIZE` on a
// small handle reads unmapped memory: once #4018 moved fetch handles up to
// 0x40000 (past the old 0x1000 floor), any untyped `request.headers.get()`
// dispatch routed its receiver through `is_date_value` here and segfaulted.
// Reject the whole small-handle band so this is an exact heap-pointer check.
if addr < 0x100000 || !crate::object::is_valid_obj_ptr(addr as *const u8) {
return false;
}
// values but are NOT real heap addresses — they live in the small-handle
// band (see `value::addr_class`). Real `DateCell`s are arena-allocated,
// always above the small-handle cutoff. Dereferencing
// `addr - GC_HEADER_SIZE` on a small handle reads unmapped memory: once
// #4018 moved fetch handles up to 0x40000 (past the old 0x1000 floor), any
// untyped `request.headers.get()` dispatch routed its receiver through
// `is_date_value` here and segfaulted. `try_read_gc_header` rejects the
// whole band before the deref, so this is an exact heap-pointer check.
unsafe {
let header = (addr - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader;
if (*header).obj_type != crate::gc::GC_TYPE_DATE_CELL {
let header = match crate::value::addr_class::try_read_gc_header(addr) {
Some(header) => header,
None => return false,
};
if header.obj_type != crate::gc::GC_TYPE_DATE_CELL {
return false;
}
// #4003: `Buffer`s are raw-`alloc`'d with NO `GcHeader`, so the word at
Expand Down
2 changes: 1 addition & 1 deletion crates/perry-runtime/src/fs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ fn object_class_id(value: f64) -> Option<u32> {
return None;
}
let obj = js_value.as_pointer::<crate::object::ObjectHeader>();
if obj.is_null() || (obj as usize) < 0x100000 {
if crate::value::addr_class::is_handle_band(obj as usize) {
return None;
}
unsafe {
Expand Down
2 changes: 1 addition & 1 deletion crates/perry-runtime/src/json/raw_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ pub unsafe extern "C" fn js_json_is_raw_json(value: f64) -> f64 {
return false_val;
}
let ptr = (bits & crate::value::POINTER_MASK) as *const u8;
if ptr.is_null() || (ptr as usize) < 0x100000 {
if crate::value::addr_class::is_handle_band(ptr as usize) {
return false_val;
}
if ptr_is_raw_json_wrapper(ptr) {
Expand Down
4 changes: 2 additions & 2 deletions crates/perry-runtime/src/json/reviver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ unsafe fn force_materialize_if_lazy(value: JSValue) -> JSValue {
return value;
}
let ptr = (bits & 0x0000_FFFF_FFFF_FFFF) as *const u8;
if ptr.is_null() || (ptr as usize) < 0x100000 {
if crate::value::addr_class::is_handle_band(ptr as usize) {
return value;
}
let gc_header = ptr.sub(crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader;
Expand Down Expand Up @@ -416,7 +416,7 @@ unsafe fn json_is_object(value: f64) -> bool {
return true;
}
if let Some(ptr) = extract_pointer(value.to_bits()) {
if ptr.is_null() || (ptr as usize) < 0x100000 {
if crate::value::addr_class::is_handle_band(ptr as usize) {
return false;
}
return gc_obj_type(ptr) == crate::gc::GC_TYPE_OBJECT;
Expand Down
49 changes: 21 additions & 28 deletions crates/perry-runtime/src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,38 +69,31 @@ fn register_map(ptr: *mut MapHeader) {
}

pub fn is_registered_map(addr: usize) -> bool {
// Fast pre-filter: managed Maps carry `GcHeader.obj_type ==
// GC_TYPE_MAP` at `addr - GC_HEADER_SIZE`. A single i8 load + cmp
// short-circuits the non-Map path (the common case across the
// typed-dispatch chain `if is_registered_map { ... } else if
// is_registered_set { ... } ...`) without paying the
// `HashSet<usize>::contains` SipHash. The HashSet check still runs
// on byte-matches to defend against:
// 1. False-positive aliasing — another managed object or a non-GC
// allocation (for example a small BufferHeader slab entry) whose
// preceding byte happens to read as 8.
// 2. Stale post-sweep ptrs — drop_map_index removes from
// MAP_REGISTRY; the GcHeader byte may persist until the slot
// is reused.
// Profile (samply, perf-comprehensive): ~5.7% inclusive samples
// were attributed to is_registered_map's HashSet lookup before
// this fast path landed.
// #4004: small-handle registry ids (Web Fetch, perry-ffi/node:http, timers,
// …) are NaN-boxed POINTER_TAG values living below the `0x100000`
// small-handle cutoff; they are not heap addresses. Managed Maps are
// arena-allocated above it, so reject the whole small-handle band before
// dereferencing `addr - GC_HEADER_SIZE` (deref'ing e.g. a 0x40000 fetch
// handle reads unmapped memory and segfaults — see is_date_cell_addr).
if addr < 0x100000 {
// …) are NaN-boxed POINTER_TAG values living below the small-handle
// cutoff; they are not heap addresses. Managed Maps are arena-allocated
// above it. See `value::addr_class` for the band map.
if crate::value::addr_class::is_handle_band(addr) {
return false;
}
unsafe {
let header = (addr - crate::gc::GC_HEADER_SIZE) as *const crate::gc::GcHeader;
if (*header).obj_type != crate::gc::GC_TYPE_MAP {
return false;
}
// Registry FIRST: it is authoritative and dereference-free (mirrors
// set::is_registered_set, #4665). The previous ordering probed
// `GcHeader.obj_type` at `addr - 8` as a fast pre-filter BEFORE the
// registry lookup — that dereferenced arbitrary above-band candidate
// pointers (e.g. garbage read off a mis-typed receiver) and segfaults on
// Linux where freed/foreign pages get unmapped (mimalloc on macOS retains
// them, hiding the bug). The pre-filter's perf rationale (a ~5.7%-sample
// SipHash `HashSet::contains`) predates MAP_REGISTRY moving to the
// Fibonacci-hash `PtrHashSet`, which is what set.rs ships with today.
if !MAP_REGISTRY.with(|r| r.borrow().contains(&addr)) {
return false;
}
// A registered address is a live arena Map; the header read is safe and
// guards against a stale entry whose memory was reused by another type.
match unsafe { crate::value::addr_class::try_read_gc_header(addr) } {
Some(header) => header.obj_type == crate::gc::GC_TYPE_MAP,
None => false,
}
MAP_REGISTRY.with(|r| r.borrow().contains(&addr))
}

/// Resolve a NaN-boxed (or raw-i64) `this` receiver to a registered `Map`
Expand Down
Loading
Loading