From 446a8199763df4e532a38806360fc544983757e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 13 Jun 2026 12:35:02 +0200 Subject: [PATCH] =?UTF-8?q?fix(runtime):=20node:http2=20=E2=80=94=20server?= =?UTF-8?q?.close()=20swallowed=20by=20colliding=20timer=20id?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: Perry's Node timer ids (NEXT_TIMER_ID, from 1) and the perry-ffi handle registry ids (NEXT_HANDLE, from 1) share the pointer-tagged small-integer value space and both count from 1, so a value like `POINTER_TAG | 1` is ambiguous between an HTTP/2 server handle and a `setTimeout` id. js_native_call_method's timer-method arm fires on is_known_timer_id(id) alone, so when a server (handle 1) and a timer (id 1) were alive at once, `server.close()` was routed to clearTimeout(1) instead of js_node_http2_server_close. The server never closed, base.listening stayed true, js_node_http_server_has_active kept returning 1, and the event loop parked forever (node:http2 session/controls.ts hung; exit 124). Fix: an authoritative FFI-handle-existence probe. perry-ffi registers ffi_handle_exists_probe (HANDLES.contains_key) with perry-runtime on first register_handle; the timer arm now yields (is_known_timer_id(id) && !ffi_handle_exists(id)) so a live registered handle wins over the ambiguous timer-id heuristic. Provably inert for pure-JS/test262: with no register_handle the probe is unregistered and ffi_handle_exists returns false, leaving the timer arm byte-identical. node-suite http2: the suite no longer hangs on case 9 (controls.ts now exits 0). Collateral: 3 http tests that combine server handles + timers now pass (client-request/header-state-controls, incoming-message/client-set-encoding, incoming-message/server-headers-body). 0 regressions across timers/net/http/https. controls.ts remains flaky for a Node-side reason: the server 'session' vs client 'connect' event ordering is a genuine loopback race in node v26 (measured server-first 12 : client-first 8 over 20 runs), and sequential-session-cleanup deterministically requires the opposite order — no single deterministic Perry ordering matches both, so Perry keeps client-first (the other 8 cases green). --- crates/perry-ffi/src/handle.rs | 27 ++++++++++++++ .../perry-runtime/src/object/class_handles.rs | 36 +++++++++++++++++++ .../src/object/native_call_method.rs | 11 +++++- 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/crates/perry-ffi/src/handle.rs b/crates/perry-ffi/src/handle.rs index 5edb091bc4..062f24d3d3 100644 --- a/crates/perry-ffi/src/handle.rs +++ b/crates/perry-ffi/src/handle.rs @@ -105,6 +105,32 @@ extern "C" { scanner_id: usize, scanner: PerryFfiNamedMutableRootScanner, ); + /// perry-runtime hook: register a probe the runtime's generic method + /// dispatcher consults to tell a `register_handle` id apart from a Node + /// timer id (both occupy the pointer-tagged small-integer band). Resolved + /// at final link (perry-runtime is always linked into the binary). + fn js_register_ffi_handle_exists_probe(probe: extern "C" fn(handle: i64) -> bool); +} + +/// Probe handed to perry-runtime: is `handle` a live entry in this registry? +/// Used to disambiguate a `POINTER_TAG | id` value that names both a live +/// handle and a live timer (e.g. HTTP/2 server handle 1 vs `setTimeout` id 1), +/// so the runtime routes `server.close()` to the handle rather than swallowing +/// it as `clearTimeout`. See `class_handles::ffi_handle_exists`. +extern "C" fn ffi_handle_exists_probe(handle: Handle) -> bool { + HANDLES.contains_key(&handle) +} + +/// Register [`ffi_handle_exists_probe`] with perry-runtime exactly once, the +/// first time any handle is created. Done lazily (rather than at an init entry +/// point perry-ffi doesn't own) so it is wired up before any handle value can +/// reach the runtime's generic dispatcher. +fn ensure_handle_exists_probe_registered() { + use std::sync::Once; + static REGISTER: Once = Once::new(); + REGISTER.call_once(|| unsafe { + js_register_ffi_handle_exists_probe(ffi_handle_exists_probe); + }); } /// Function pointer type for native wrappers that expose mutable GC root slots. @@ -193,6 +219,7 @@ impl<'a> GcRootVisitor<'a> { /// across threads (tokio workers may resolve promises that touch /// handle data while the main thread is also touching it). pub fn register_handle(value: T) -> Handle { + ensure_handle_exists_probe_registered(); let handle = next_handle_id(); HANDLES.insert(handle, Box::new(value)); handle diff --git a/crates/perry-runtime/src/object/class_handles.rs b/crates/perry-runtime/src/object/class_handles.rs index 38d584a00b..3a3dde11c6 100644 --- a/crates/perry-runtime/src/object/class_handles.rs +++ b/crates/perry-runtime/src/object/class_handles.rs @@ -101,6 +101,15 @@ pub type EventEmitterSetDomainFn = unsafe extern "C" fn(handle: i64, domain: i64 /// pointer-tagged small integer handles, not heap objects with class ids. pub type NetSocketHandleProbeFn = unsafe extern "C" fn(handle: i64) -> bool; +/// Probe for live `perry-ffi` registry handles. `register_handle`-issued ids +/// and Node timer ids both occupy the pointer-tagged small-integer band and +/// both count from 1, so a bare id is ambiguous between (say) an HTTP/2 server +/// handle and a `setTimeout` id. The timer-method dispatch arm consults this +/// probe to yield to an authoritative registered handle when the id is one, +/// so `server.close()` (handle 1) is not swallowed by `clearTimeout(1)` when a +/// timer with the colliding id also happens to be alive. +pub type FfiHandleExistsProbeFn = unsafe extern "C" fn(handle: i64) -> bool; + /// Narrow registration hook for runtime code that needs to attach an /// EventEmitter listener without routing through the generic handle dispatcher. pub type EventEmitterOnFn = @@ -143,6 +152,7 @@ static EVENT_EMITTER_ASYNC_RESOURCE_HANDLE_PROBE_PTR: AtomicPtr<()> = static EVENT_EMITTER_GET_DOMAIN_PTR: AtomicPtr<()> = AtomicPtr::new(ptr::null_mut()); static EVENT_EMITTER_SET_DOMAIN_PTR: AtomicPtr<()> = AtomicPtr::new(ptr::null_mut()); static NET_SOCKET_HANDLE_PROBE_PTR: AtomicPtr<()> = AtomicPtr::new(ptr::null_mut()); +static FFI_HANDLE_EXISTS_PROBE_PTR: AtomicPtr<()> = AtomicPtr::new(ptr::null_mut()); static EVENT_EMITTER_ON_PTR: AtomicPtr<()> = AtomicPtr::new(ptr::null_mut()); const TAG_UNDEFINED: u64 = 0x7FFC_0000_0000_0001; @@ -487,6 +497,32 @@ pub unsafe extern "C" fn js_register_net_socket_handle_probe(f: NetSocketHandleP NET_SOCKET_HANDLE_PROBE_PTR.store(f as *mut (), Ordering::Release); } +#[inline] +pub fn ffi_handle_exists_probe() -> Option { + let p = FFI_HANDLE_EXISTS_PROBE_PTR.load(Ordering::Acquire); + if p.is_null() { + None + } else { + Some(unsafe { std::mem::transmute::<*mut (), FfiHandleExistsProbeFn>(p) }) + } +} + +/// Returns `true` only when a probe is registered AND it confirms `id` is a +/// live `perry-ffi` registry handle. Absent a probe (no native wrapper linked) +/// this is `false`, preserving the prior behavior. +#[inline] +pub fn ffi_handle_exists(id: i64) -> bool { + match ffi_handle_exists_probe() { + Some(probe) => unsafe { probe(id) }, + None => false, + } +} + +#[no_mangle] +pub unsafe extern "C" fn js_register_ffi_handle_exists_probe(f: FfiHandleExistsProbeFn) { + FFI_HANDLE_EXISTS_PROBE_PTR.store(f as *mut (), Ordering::Release); +} + #[inline] pub fn event_emitter_on() -> Option { let p = EVENT_EMITTER_ON_PTR.load(Ordering::Acquire); diff --git a/crates/perry-runtime/src/object/native_call_method.rs b/crates/perry-runtime/src/object/native_call_method.rs index 9159468b67..949d2be28d 100644 --- a/crates/perry-runtime/src/object/native_call_method.rs +++ b/crates/perry-runtime/src/object/native_call_method.rs @@ -2009,7 +2009,16 @@ pub unsafe extern "C" fn js_native_call_method( let top16 = bits >> 48; if top16 == 0x7FFD { let id = (bits & 0x0000_FFFF_FFFF_FFFF) as i64; - if crate::timer::is_known_timer_id(id) { + // Timer ids and `perry-ffi` registry handles share the pointer-tagged + // small-integer band and both count from 1, so a bare id can be + // ambiguous (e.g. an HTTP/2 server handle 1 vs a `setTimeout` id 1 + // alive at the same time). A live registered handle is the + // authoritative interpretation — it owns a real Rust object and its + // method surface (`close`/`ref`/`unref`/…) — so yield to the handle + // dispatch below rather than swallow `server.close()` as + // `clearTimeout`. A genuine timer whose id does not also name a live + // handle still resolves here. + if crate::timer::is_known_timer_id(id) && !super::class_handles::ffi_handle_exists(id) { match method_name { "ref" => { crate::timer::js_timer_ref(id);