From 4848568f3ddf16672f0a0349589b1027e5c9d7ed Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Fri, 17 Oct 2025 17:00:37 +0800
Subject: [PATCH 01/13] Add pool in-place reset mode

---
 crates/aardvark-core/src/engine.rs            |  49 +++++---
 crates/aardvark-core/src/pool.rs              |  23 +++-
 .../aardvark-core/src/runtime/javascript.rs   |   7 ++
 crates/aardvark-core/src/runtime/mod.rs       |  23 ++++
 crates/aardvark-core/src/runtime/python.rs    |   9 ++
 .../tests/runtime_pool_and_outcome.rs         | 108 +++++++++++++++++-
 docs/api/rust-host.md                         |  15 ++-
 docs/architecture/runtime-lifecycle.md        |   2 +
 8 files changed, 215 insertions(+), 21 deletions(-)
diff --git a/crates/aardvark-core/src/engine.rs b/crates/aardvark-core/src/engine.rs
index 543e685..8806c08 100644
--- a/crates/aardvark-core/src/engine.rs
+++ b/crates/aardvark-core/src/engine.rs
@@ -321,19 +321,7 @@ impl JsRuntime {
     /// Creates a new isolate with an empty context and basic polyfills.
     pub fn new() -> Result<Self> {
         init_v8();
-        let context_state = Rc::new(RuntimeContext {
-            assets: AssetStore::new(),
-            modules: RefCell::new(HashMap::new()),
-            module_by_hash: RefCell::new(HashMap::new()),
-            module_namespaces: RefCell::new(HashMap::new()),
-            pyodide_instance: RefCell::new(None),
-            stdout_log: RefCell::new(String::new()),
-            stderr_log: RefCell::new(String::new()),
-            network_policy: RwLock::new(NetworkPolicy::default()),
-            network_contacts: RwLock::new(Vec::new()),
-            network_denied: RwLock::new(Vec::new()),
-            filesystem_violations: RwLock::new(Vec::new()),
-        });
+        let context_state = Rc::new(RuntimeContext::new());
         let create_params =
             v8::CreateParams::default().array_buffer_allocator(v8::new_default_allocator());
         let mut isolate = v8::Isolate::new(create_params);
@@ -353,6 +341,25 @@ impl JsRuntime {
         Ok(runtime)
     }
 
+    /// Reinitializes the isolate in place, keeping the outer runtime alive.
+    pub fn reset(&mut self) -> Result<()> {
+        // Drop the previous context state so any module caches or globals are released.
+        let new_state = Rc::new(RuntimeContext::new());
+        self.context_state = new_state.clone();
+        self.isolate.set_slot(new_state);
+
+        // Hint V8 to reclaim memory from the old context before installing a new one.
+        self.isolate.low_memory_notification();
+
+        let global = {
+            v8::scope!(let scope, &mut self.isolate);
+            let context = v8::Context::new(scope, v8::ContextOptions::default());
+            v8::Global::new(scope, context)
+        };
+        self.context = global;
+        self.install_polyfills()
+    }
+
     /// Configures the network allowlist for subsequent native fetches.
     pub fn set_network_policy(&self, allow: &[String], https_only: bool) {
         self.context_state.set_network_policy(allow, https_only);
@@ -2240,6 +2247,22 @@ fn normalize_specifier(spec: &str) -> String {
 }
 
 impl RuntimeContext {
+    fn new() -> Self {
+        Self {
+            assets: AssetStore::new(),
+            modules: RefCell::new(HashMap::new()),
+            module_by_hash: RefCell::new(HashMap::new()),
+            module_namespaces: RefCell::new(HashMap::new()),
+            pyodide_instance: RefCell::new(None),
+            stdout_log: RefCell::new(String::new()),
+            stderr_log: RefCell::new(String::new()),
+            network_policy: RwLock::new(NetworkPolicy::default()),
+            network_contacts: RwLock::new(Vec::new()),
+            network_denied: RwLock::new(Vec::new()),
+            filesystem_violations: RwLock::new(Vec::new()),
+        }
+    }
+
     fn clear_console(&self) {
         self.stdout_log.borrow_mut().clear();
         self.stderr_log.borrow_mut().clear();
diff --git a/crates/aardvark-core/src/pool.rs b/crates/aardvark-core/src/pool.rs
index 07ca43a..76a454b 100644
--- a/crates/aardvark-core/src/pool.rs
+++ b/crates/aardvark-core/src/pool.rs
@@ -17,6 +17,8 @@ pub struct PoolConfig {
     pub max_runtimes: usize,
     /// Baseline configuration applied to every newly-created runtime.
     pub runtime_config: PyRuntimeConfig,
+    /// How the pool resets runtimes when they are returned.
+    pub reset_mode: PoolResetMode,
 }
 
 impl PoolConfig {
@@ -25,6 +27,7 @@ impl PoolConfig {
         Self {
             max_runtimes,
             runtime_config,
+            reset_mode: PoolResetMode::RecreateEngine,
         }
     }
 }
@@ -34,10 +37,20 @@ impl Default for PoolConfig {
         Self {
             max_runtimes: 4,
             runtime_config: PyRuntimeConfig::default(),
+            reset_mode: PoolResetMode::RecreateEngine,
         }
     }
 }
 
+/// Reset strategy to use when returning runtimes to the pool.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum PoolResetMode {
+    /// Drop the language engine and recreate it from scratch (`reset_to_snapshot`).
+    RecreateEngine,
+    /// Keep the existing isolate/context and rebuild it in place (`reset_in_place`).
+    InPlace,
+}
+
 /// Runtime pool managing reusable PyRuntime instances.
 pub struct PyRuntimePool {
     inner: Arc<PoolInner>,
@@ -172,10 +185,15 @@ impl Drop for PooledRuntime {
                 let span = info_span!(
                     target: "aardvark::runtime",
                     "runtime.reset",
-                    runtime_id = runtime_id.as_str()
+                    runtime_id = runtime_id.as_str(),
+                    mode = ?self.inner.config.reset_mode
                 );
                 let _guard = span.enter();
-                if let Err(err) = managed.runtime.reset_to_snapshot() {
+                let reset_result = match self.inner.config.reset_mode {
+                    PoolResetMode::RecreateEngine => managed.runtime.reset_to_snapshot(),
+                    PoolResetMode::InPlace => managed.runtime.reset_in_place(),
+                };
+                if let Err(err) = reset_result {
                     warn!(
                         target: "aardvark::runtime",
                         runtime_id = runtime_id.as_str(),
@@ -191,6 +209,7 @@ impl Drop for PooledRuntime {
                 info!(
                     target: "aardvark::runtime",
                     runtime_id = runtime_id.as_str(),
+                    mode = ?self.inner.config.reset_mode,
                     "reset complete"
                 );
             }
diff --git a/crates/aardvark-core/src/runtime/javascript.rs b/crates/aardvark-core/src/runtime/javascript.rs
index eeb1e24..88dd9f2 100644
--- a/crates/aardvark-core/src/runtime/javascript.rs
+++ b/crates/aardvark-core/src/runtime/javascript.rs
@@ -56,4 +56,11 @@ impl LanguageEngine for JavaScriptEngine {
         }
         Ok(())
     }
+
+    fn reset_in_place(&mut self, _config: &PyRuntimeConfig) -> Result<()> {
+        self.js.reset()?;
+        self.js
+            .insert_text_asset("js_runtime_bootstrap.js", assets::js_runtime_bootstrap_js());
+        self.js.ensure_module("js_runtime_bootstrap.js")
+    }
 }
diff --git a/crates/aardvark-core/src/runtime/mod.rs b/crates/aardvark-core/src/runtime/mod.rs
index cb6ce0c..0d3b4c0 100644
--- a/crates/aardvark-core/src/runtime/mod.rs
+++ b/crates/aardvark-core/src/runtime/mod.rs
@@ -51,6 +51,7 @@ trait LanguageEngine {
     fn prepare_environment(&mut self, config: &PyRuntimeConfig) -> Result<()>;
     fn load_manifest_packages(&mut self, manifest: &BundleManifest) -> Result<()>;
     fn mount_bundle(&mut self, bundle: &Bundle) -> Result<()>;
+    fn reset_in_place(&mut self, config: &PyRuntimeConfig) -> Result<()>;
     fn set_warm_state(&mut self, _state: Option<WarmState>) {}
 }
 
@@ -573,6 +574,28 @@ impl AardvarkRuntime {
         Ok(())
     }
 
+    /// Resets the runtime by rebuilding the language engine in place without dropping the isolate.
+    pub fn reset_in_place(&mut self) -> Result<()> {
+        let span = info_span!(
+            target: "aardvark::runtime",
+            "runtime.reset_in_place",
+            runtime_id = self.runtime_id_str()
+        );
+        let _guard = span.enter();
+        let language = self
+            .engine
+            .as_ref()
+            .map(|engine| engine.language())
+            .unwrap_or(self.config.default_language);
+        if let Some(engine) = self.engine.as_mut() {
+            engine.reset_in_place(&self.config)?;
+        } else {
+            self.engine = Some(create_engine(language, &self.config)?);
+        }
+        self.warm_restored = false;
+        Ok(())
+    }
+
     fn ensure_engine(&mut self, language: RuntimeLanguage) -> Result<()> {
         if self.engine.as_ref().map(|engine| engine.language()) == Some(language) {
             return Ok(());
diff --git a/crates/aardvark-core/src/runtime/python.rs b/crates/aardvark-core/src/runtime/python.rs
index 0613367..4472c03 100644
--- a/crates/aardvark-core/src/runtime/python.rs
+++ b/crates/aardvark-core/src/runtime/python.rs
@@ -127,6 +127,15 @@ impl LanguageEngine for PythonEngine {
         self.js.mount_bundle(bundle, "/app")
     }
 
+    fn reset_in_place(&mut self, config: &PyRuntimeConfig) -> Result<()> {
+        self.js.reset()?;
+        self.snapshot_bytes = load_snapshot_bytes(config)?;
+        self.warm_state = config.warm_state.clone();
+        self.register_core_assets();
+        self.inject_version_globals(config)?;
+        Ok(())
+    }
+
     fn set_warm_state(&mut self, state: Option<WarmState>) {
         self.warm_state = state;
         self.snapshot_bytes = self.warm_state.as_ref().map(|s| s.snapshot());
diff --git a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
index 9ae51dc..3a15a29 100644
--- a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
+++ b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
@@ -2,7 +2,7 @@ use aardvark_core::{
     config::{PyRuntimeConfig, ResetPolicy},
     invocation::{FieldDescriptor, InvocationDescriptor, InvocationLimits},
     outcome::{FailureKind, OutcomeStatus, ResultPayload},
-    pool::PoolConfig,
+    pool::{PoolConfig, PoolResetMode},
     strategy::{
         JsonInvocationStrategy, RawCtxBindingBuilder, RawCtxInput, RawCtxInvocationStrategy,
         RawCtxMetadata, RawCtxPublishBuilder, RawCtxTableColumnBuilder, RawCtxTableSpecBuilder,
@@ -19,6 +19,8 @@ use zip::CompressionMethod;
 #[test]
 fn runtime_pool_and_outcome_behaviour() -> Result<()> {
     verify_pooled_runtime_manual_reset()?;
+    verify_pooled_runtime_in_place_reset()?;
+    verify_runtime_reset_in_place()?;
     verify_after_invocation_reset_policy()?;
     verify_python_exception_outcome()?;
     verify_timeout_failure()?;
@@ -104,6 +106,110 @@ def main():
     Ok(())
 }
 
+fn verify_pooled_runtime_in_place_reset() -> Result<()> {
+    let runtime_config = PyRuntimeConfig {
+        reset_policy: ResetPolicy::Manual,
+        ..PyRuntimeConfig::default()
+    };
+    let pool = PyRuntimePool::new(PoolConfig {
+        max_runtimes: 1,
+        runtime_config,
+        reset_mode: PoolResetMode::InPlace,
+    })?;
+
+    let pointer_before;
+    {
+        let mut handle = pool.checkout()?;
+        let runtime = handle.runtime();
+        pointer_before = runtime.js_runtime() as *mut _ as usize;
+        let outcome = run_main(
+            runtime,
+            r#"
+import builtins
+
+def main():
+    if hasattr(builtins, "__pool_marker"):
+        return "stale"
+    builtins.__pool_marker = "present"
+    return "fresh"
+"#,
+        )?;
+        assert!(outcome.is_success(), "expected success outcome");
+        assert_eq!(payload_text(&outcome), "'fresh'");
+    }
+
+    {
+        let mut handle = pool.checkout()?;
+        let runtime = handle.runtime();
+        let pointer_after = runtime.js_runtime() as *mut _ as usize;
+        assert_eq!(pointer_before, pointer_after, "engine should stay in place");
+        let outcome = run_main(
+            runtime,
+            r#"
+import builtins
+
+def main():
+    if hasattr(builtins, "__pool_marker"):
+        return "stale"
+    builtins.__pool_marker = "present"
+    return "fresh"
+"#,
+        )?;
+        assert!(outcome.is_success(), "expected success outcome");
+        assert_eq!(payload_text(&outcome), "'fresh'");
+    }
+
+    Ok(())
+}
+
+fn verify_runtime_reset_in_place() -> Result<()> {
+    let mut runtime = PyRuntime::new(PyRuntimeConfig::default())?;
+
+    let before_ptr = {
+        let js = runtime.js_runtime();
+        js as *mut _ as usize
+    };
+
+    let initial = run_main(
+        &mut runtime,
+        r#"
+import builtins
+
+def main():
+    if hasattr(builtins, "__reset_marker"):
+        return "stale"
+    builtins.__reset_marker = "present"
+    return "fresh"
+"#,
+    )?;
+    assert!(initial.is_success(), "expected success outcome");
+    assert_eq!(payload_text(&initial), "'fresh'");
+
+    runtime.reset_in_place()?;
+
+    let after_ptr = {
+        let js = runtime.js_runtime();
+        js as *mut _ as usize
+    };
+    assert_eq!(before_ptr, after_ptr, "engine should be reused in-place");
+
+    let second = run_main(
+        &mut runtime,
+        r#"
+import builtins
+
+def main():
+    if hasattr(builtins, "__reset_marker"):
+        return "stale"
+    builtins.__reset_marker = "present"
+    return "fresh"
+"#,
+    )?;
+    assert!(second.is_success(), "expected success outcome");
+    assert_eq!(payload_text(&second), "'fresh'");
+    Ok(())
+}
+
 fn verify_shared_buffer_payload() -> Result<()> {
     let mut runtime = PyRuntime::new(PyRuntimeConfig::default())?;
     let outcome = run_main(
diff --git a/docs/api/rust-host.md b/docs/api/rust-host.md
index f84d7c2..f729531 100644
--- a/docs/api/rust-host.md
+++ b/docs/api/rust-host.md
@@ -104,13 +104,12 @@ All payload types are supported: text, JSON, binary, and shared buffers. Use pat
 ## Using the runtime pool
 
 ```rust
-use aardvark_core::{PoolConfig, PyRuntimePool, PyRuntimeConfig};
+use aardvark_core::{PoolConfig, PoolResetMode, PyRuntimePool, PyRuntimeConfig};
 
 fn pool_example() -> anyhow::Result<()> {
-    let pool = PyRuntimePool::new(PoolConfig {
-        max_runtimes: 8,
-        runtime_config: PyRuntimeConfig::default(),
-    })?;
+    let mut config = PoolConfig::new(8, PyRuntimeConfig::default());
+    config.reset_mode = PoolResetMode::InPlace; // reuse isolates between checkouts
+    let pool = PyRuntimePool::new(config)?;
     let mut handle = pool.checkout()?;
     let bundle = Bundle::from_zip_bytes(include_bytes!("../../hello_bundle.zip"))?;
     let session = handle.runtime().prepare_session_with_manifest(bundle)?.0;
@@ -127,6 +126,12 @@ Returned runtimes are marked dirty and scrubbed the next time the pool needs add
 
 > **Pool Limitation:** resets still run on the thread that performs the next checkout. If the warm snapshot takes ~800 ms to hydrate, the first borrower after a drop still pays that cost.
 
+## Resetting a runtime explicitly
+
+- `reset_to_snapshot()` recreates the language engine from scratch. This is the slow but safest option when you want to reclaim every resource.
+- `reset_in_place()` reuses the existing isolate, wipes the context, and replays the bootstrap assets so the next invocation starts from the warm snapshot without a full teardown.
+- `PoolResetMode::InPlace` lets the pool call `reset_in_place()` automatically when a handle is dropped; use it together with `ResetPolicy::Manual`.
+
 ## Warm Snapshots for Faster Cold Starts
 
 If you want Cloudflare-style deploy-time hydration, capture a warm snapshot once and reuse it:
diff --git a/docs/architecture/runtime-lifecycle.md b/docs/architecture/runtime-lifecycle.md
index d971c0b..6a02f9c 100644
--- a/docs/architecture/runtime-lifecycle.md
+++ b/docs/architecture/runtime-lifecycle.md
@@ -87,6 +87,8 @@ Regardless of success or failure the runtime collects:
 
 - If `ResetPolicy::AfterInvocation` is configured, the runtime automatically rolls back to the warm snapshot before returning from `run_session`.
 - If `ResetPolicy::Manual` is used (default in the pool), the `Drop` implementation on `PooledRuntime` triggers `reset_to_snapshot` when the handle returns to the pool. Failures drop the runtime from the pool and reduce capacity until a fresh runtime is created.
+- Hosts that want to avoid tearing down the engine can call `PyRuntime::reset_in_place()`. This keeps the underlying isolate alive, wipes the context, and replays the bootstrap assets before the next invocation.
+- `PoolResetMode::InPlace` applies the same in-place reset when a pooled handle drops, trading strict engine recreation for lower latency on reuse.
 
 ## Failure Modes and Recovery
 

From 4136caa9d2144c3f96b4fd960838219fe47f3196 Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Fri, 17 Oct 2025 17:35:36 +0800
Subject: [PATCH 02/13] Capture reset telemetry for pooled runtimes

---
 crates/aardvark-core/src/host.rs              | 25 ++++++-
 crates/aardvark-core/src/outcome.rs           | 17 +++++
 crates/aardvark-core/src/runtime/mod.rs       | 75 ++++++++++++++++++-
 .../tests/runtime_pool_and_outcome.rs         | 48 +++++++++---
 crates/aardvark-core/tests/telemetry.rs       |  1 +
 docs/api/rust-host.md                         |  1 +
 docs/architecture/runtime-lifecycle.md        |  1 +
 7 files changed, 155 insertions(+), 13 deletions(-)

diff --git a/crates/aardvark-core/src/host.rs b/crates/aardvark-core/src/host.rs
index 14cc099..5d4f9c7 100644
--- a/crates/aardvark-core/src/host.rs
+++ b/crates/aardvark-core/src/host.rs
@@ -1,6 +1,9 @@
 //! Host-facing helpers for consuming sandbox diagnostics.
 
-use crate::outcome::{Diagnostics, FilesystemViolation, NetworkDeniedHost, NetworkHostContact};
+use crate::outcome::{
+    Diagnostics, FilesystemViolation, NetworkDeniedHost, NetworkHostContact, ResetMode,
+    ResetSummary,
+};
 
 /// Aggregated telemetry derived from [`Diagnostics`] for host integrations.
 #[derive(Clone, Debug, Default)]
@@ -8,6 +11,7 @@ pub struct SandboxTelemetry {
     pub cpu_ms_used: Option<u64>,
     pub filesystem: FilesystemTelemetry,
     pub network: NetworkTelemetry,
+    pub reset: Option<ResetTelemetry>,
 }
 
 /// Filesystem usage and violation details.
@@ -24,6 +28,14 @@ pub struct NetworkTelemetry {
     pub blocked: Vec<NetworkDeniedHost>,
 }
 
+/// Reset data captured prior to invocation.
+#[derive(Clone, Debug)]
+pub struct ResetTelemetry {
+    pub mode: ResetMode,
+    pub duration_ms: u64,
+    pub engine_generation: u64,
+}
+
 impl From<&Diagnostics> for SandboxTelemetry {
     fn from(value: &Diagnostics) -> Self {
         Self {
@@ -36,6 +48,17 @@ impl From<&Diagnostics> for SandboxTelemetry {
                 allowed: value.network_hosts_contacted.clone(),
                 blocked: value.network_hosts_blocked.clone(),
             },
+            reset: value.reset.as_ref().map(ResetTelemetry::from),
+        }
+    }
+}
+
+impl From<&ResetSummary> for ResetTelemetry {
+    fn from(summary: &ResetSummary) -> Self {
+        Self {
+            mode: summary.mode.clone(),
+            duration_ms: summary.duration_ms,
+            engine_generation: summary.engine_generation,
         }
     }
 }
diff --git a/crates/aardvark-core/src/outcome.rs b/crates/aardvark-core/src/outcome.rs
index df748b2..8934be2 100644
--- a/crates/aardvark-core/src/outcome.rs
+++ b/crates/aardvark-core/src/outcome.rs
@@ -64,6 +64,8 @@ pub struct Diagnostics {
     pub network_hosts_blocked: Vec<NetworkDeniedHost>,
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub filesystem_violations: Vec<FilesystemViolation>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub reset: Option<ResetSummary>,
 }
 
 impl Diagnostics {
@@ -73,6 +75,21 @@ impl Diagnostics {
     }
 }
 
+/// Summary of the reset that prepared the runtime for this invocation.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ResetSummary {
+    pub mode: ResetMode,
+    pub duration_ms: u64,
+    pub engine_generation: u64,
+}
+
+/// Reset mechanism used before the invocation.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub enum ResetMode {
+    RecreateEngine,
+    InPlace,
+}
+
 /// Structured status of the execution.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum OutcomeStatus {
diff --git a/crates/aardvark-core/src/runtime/mod.rs b/crates/aardvark-core/src/runtime/mod.rs
index 0d3b4c0..b1077e8 100644
--- a/crates/aardvark-core/src/runtime/mod.rs
+++ b/crates/aardvark-core/src/runtime/mod.rs
@@ -11,7 +11,7 @@ use crate::error::{PyRunnerError, Result};
 use crate::invocation::{InvocationDescriptor, InvocationLimits};
 use crate::outcome::{
     Diagnostics, ExecutionOutcome, FailureKind, FilesystemViolation, NetworkDeniedHost,
-    NetworkHostContact, ResultPayload,
+    NetworkHostContact, ResetMode, ResetSummary, ResultPayload,
 };
 use crate::runtime_language::RuntimeLanguage;
 use crate::session::PySession;
@@ -25,7 +25,7 @@ use std::sync::{
     atomic::{AtomicBool, Ordering},
     mpsc, Arc,
 };
-use std::time::Duration;
+use std::time::{Duration, Instant};
 use tracing::{info, info_span, warn};
 use v8::{self, PinScope};
 
@@ -43,6 +43,8 @@ pub struct AardvarkRuntime {
     engine: Option<Box<dyn LanguageEngine>>,
     runtime_id: Option<String>,
     warm_restored: bool,
+    engine_generation: u64,
+    pending_reset_summary: Option<ResetSummary>,
 }
 
 trait LanguageEngine {
@@ -103,6 +105,7 @@ struct CollectedDiagnostics {
     network_hosts_contacted: Vec<NetworkHostContact>,
     network_hosts_blocked: Vec<NetworkDeniedHost>,
     filesystem_violations: Vec<FilesystemViolation>,
+    reset_summary: Option<crate::outcome::ResetSummary>,
 }
 
 impl AardvarkRuntime {
@@ -114,6 +117,8 @@ impl AardvarkRuntime {
             engine: Some(engine),
             runtime_id: None,
             warm_restored: false,
+            engine_generation: 1,
+            pending_reset_summary: None,
         })
     }
 
@@ -390,6 +395,7 @@ impl AardvarkRuntime {
             network_hosts_contacted,
             network_hosts_blocked,
             filesystem_violations,
+            reset_summary: self.pending_reset_summary.take(),
         };
 
         Self::emit_diagnostics_events(&collected, self.runtime_id_str(), descriptor.entrypoint());
@@ -550,6 +556,7 @@ impl AardvarkRuntime {
             runtime_id = self.runtime_id_str()
         );
         let _guard = span.enter();
+        let start = Instant::now();
         if let Some(token) = env::var_os("AARDVARK_TEST_FORCE_RESET_FAILURE") {
             env::remove_var("AARDVARK_TEST_FORCE_RESET_FAILURE");
             let label = token
@@ -570,6 +577,21 @@ impl AardvarkRuntime {
             drop(old);
         }
         self.engine = Some(create_engine(language, &self.config)?);
+        self.engine_generation = self.engine_generation.saturating_add(1);
+        let summary = ResetSummary {
+            mode: ResetMode::RecreateEngine,
+            duration_ms: start.elapsed().as_millis().min(u128::from(u64::MAX)) as u64,
+            engine_generation: self.engine_generation,
+        };
+        info!(
+            target: "aardvark::runtime",
+            runtime_id = self.runtime_id_str(),
+            reset.mode = ?summary.mode,
+            reset.duration_ms = summary.duration_ms,
+            reset.engine_generation = summary.engine_generation,
+            "reset recorded"
+        );
+        self.pending_reset_summary = Some(summary);
         self.warm_restored = false;
         Ok(())
     }
@@ -582,6 +604,7 @@ impl AardvarkRuntime {
             runtime_id = self.runtime_id_str()
         );
         let _guard = span.enter();
+        let start = Instant::now();
         let language = self
             .engine
             .as_ref()
@@ -589,8 +612,39 @@ impl AardvarkRuntime {
             .unwrap_or(self.config.default_language);
         if let Some(engine) = self.engine.as_mut() {
             engine.reset_in_place(&self.config)?;
+            let summary = ResetSummary {
+                mode: ResetMode::InPlace,
+                duration_ms: start.elapsed().as_millis().min(u128::from(u64::MAX)) as u64,
+                engine_generation: self.engine_generation,
+            };
+            info!(
+                target: "aardvark::runtime",
+                runtime_id = self.runtime_id_str(),
+                reset.mode = ?summary.mode,
+                reset.duration_ms = summary.duration_ms,
+                reset.engine_generation = summary.engine_generation,
+                "reset recorded"
+            );
+            self.pending_reset_summary = Some(summary);
         } else {
             self.engine = Some(create_engine(language, &self.config)?);
+            self.engine_generation = self.engine_generation.saturating_add(1);
+            let summary = ResetSummary {
+                mode: ResetMode::RecreateEngine,
+                duration_ms: start.elapsed().as_millis().min(u128::from(u64::MAX)) as u64,
+                engine_generation: self.engine_generation,
+            };
+            info!(
+                target: "aardvark::runtime",
+                runtime_id = self.runtime_id_str(),
+                reset.mode = ?summary.mode,
+                reset.duration_ms = summary.duration_ms,
+                reset.engine_generation = summary.engine_generation,
+                "reset recorded"
+            );
+            self.pending_reset_summary = Some(summary);
+            self.warm_restored = false;
+            return Ok(());
         }
         self.warm_restored = false;
         Ok(())
@@ -600,10 +654,26 @@ impl AardvarkRuntime {
         if self.engine.as_ref().map(|engine| engine.language()) == Some(language) {
             return Ok(());
         }
+        let start = Instant::now();
         if let Some(old) = self.engine.take() {
             drop(old);
         }
         self.engine = Some(create_engine(language, &self.config)?);
+        self.engine_generation = self.engine_generation.saturating_add(1);
+        let summary = ResetSummary {
+            mode: ResetMode::RecreateEngine,
+            duration_ms: start.elapsed().as_millis().min(u128::from(u64::MAX)) as u64,
+            engine_generation: self.engine_generation,
+        };
+        info!(
+            target: "aardvark::runtime",
+            runtime_id = self.runtime_id_str(),
+            reset.mode = ?summary.mode,
+            reset.duration_ms = summary.duration_ms,
+            reset.engine_generation = summary.engine_generation,
+            "reset recorded"
+        );
+        self.pending_reset_summary = Some(summary);
         self.warm_restored = false;
         Ok(())
     }
@@ -819,6 +889,7 @@ impl AardvarkRuntime {
         diagnostics.network_hosts_contacted = collected.network_hosts_contacted.clone();
         diagnostics.network_hosts_blocked = collected.network_hosts_blocked.clone();
         diagnostics.filesystem_violations = collected.filesystem_violations.clone();
+        diagnostics.reset = collected.reset_summary.clone();
         diagnostics
     }
 
diff --git a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
index 3a15a29..b500a4c 100644
--- a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
+++ b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
@@ -1,7 +1,7 @@
 use aardvark_core::{
     config::{PyRuntimeConfig, ResetPolicy},
     invocation::{FieldDescriptor, InvocationDescriptor, InvocationLimits},
-    outcome::{FailureKind, OutcomeStatus, ResultPayload},
+    outcome::{FailureKind, OutcomeStatus, ResetMode, ResultPayload},
     pool::{PoolConfig, PoolResetMode},
     strategy::{
         JsonInvocationStrategy, RawCtxBindingBuilder, RawCtxInput, RawCtxInvocationStrategy,
@@ -96,6 +96,13 @@ def main():
         )?;
         assert!(outcome.is_success(), "expected success outcome");
         assert_eq!(payload_text(&outcome), "'fresh'");
+        let reset = outcome
+            .diagnostics
+            .reset
+            .as_ref()
+            .expect("expected reset summary for pooled runtime");
+        assert!(matches!(reset.mode, ResetMode::RecreateEngine));
+        assert!(reset.engine_generation >= 2);
         runtime_id
     };
 
@@ -117,14 +124,17 @@ fn verify_pooled_runtime_in_place_reset() -> Result<()> {
         reset_mode: PoolResetMode::InPlace,
     })?;
 
-    let pointer_before;
-    {
+    let pointer_before = {
         let mut handle = pool.checkout()?;
-        let runtime = handle.runtime();
-        pointer_before = runtime.js_runtime() as *mut _ as usize;
-        let outcome = run_main(
-            runtime,
-            r#"
+        let ptr = {
+            let runtime = handle.runtime();
+            runtime.js_runtime() as *mut _ as usize
+        };
+        let outcome = {
+            let runtime = handle.runtime();
+            run_main(
+                runtime,
+                r#"
 import builtins
 
 def main():
@@ -133,10 +143,13 @@ def main():
     builtins.__pool_marker = "present"
     return "fresh"
 "#,
-        )?;
+            )?
+        };
         assert!(outcome.is_success(), "expected success outcome");
         assert_eq!(payload_text(&outcome), "'fresh'");
-    }
+        drop(handle);
+        ptr
+    };
 
     {
         let mut handle = pool.checkout()?;
@@ -157,6 +170,14 @@ def main():
         )?;
         assert!(outcome.is_success(), "expected success outcome");
         assert_eq!(payload_text(&outcome), "'fresh'");
+        let reset = outcome
+            .diagnostics
+            .reset
+            .as_ref()
+            .expect("expected in-place reset summary after pool reuse");
+        assert!(matches!(reset.mode, ResetMode::InPlace));
+        assert!(reset.engine_generation >= 1);
+        drop(handle);
     }
 
     Ok(())
@@ -207,6 +228,13 @@ def main():
     )?;
     assert!(second.is_success(), "expected success outcome");
     assert_eq!(payload_text(&second), "'fresh'");
+    let reset = second
+        .diagnostics
+        .reset
+        .as_ref()
+        .expect("expected reset telemetry");
+    assert!(matches!(reset.mode, ResetMode::InPlace));
+    assert!(reset.engine_generation >= 1);
     Ok(())
 }
 
diff --git a/crates/aardvark-core/tests/telemetry.rs b/crates/aardvark-core/tests/telemetry.rs
index 1ae71ae..7b251ff 100644
--- a/crates/aardvark-core/tests/telemetry.rs
+++ b/crates/aardvark-core/tests/telemetry.rs
@@ -27,6 +27,7 @@ fn diagnostics_to_telemetry_maps_fields() {
             path: Some("/session/tmp.txt".into()),
             message: "quota exceeded".into(),
         }],
+        reset: None,
     };
 
     let telemetry: SandboxTelemetry = diagnostics.to_telemetry();
diff --git a/docs/api/rust-host.md b/docs/api/rust-host.md
index f729531..d6197b0 100644
--- a/docs/api/rust-host.md
+++ b/docs/api/rust-host.md
@@ -131,6 +131,7 @@ Returned runtimes are marked dirty and scrubbed the next time the pool needs add
 - `reset_to_snapshot()` recreates the language engine from scratch. This is the slow but safest option when you want to reclaim every resource.
 - `reset_in_place()` reuses the existing isolate, wipes the context, and replays the bootstrap assets so the next invocation starts from the warm snapshot without a full teardown.
 - `PoolResetMode::InPlace` lets the pool call `reset_in_place()` automatically when a handle is dropped; use it together with `ResetPolicy::Manual`.
+- After a reset runs, the next invocation’s diagnostics include `reset.mode`, `reset.duration_ms`, and `reset.engine_generation` so hosts can export per-checkout latency metrics.
 
 ## Warm Snapshots for Faster Cold Starts
 
diff --git a/docs/architecture/runtime-lifecycle.md b/docs/architecture/runtime-lifecycle.md
index 6a02f9c..9e100e0 100644
--- a/docs/architecture/runtime-lifecycle.md
+++ b/docs/architecture/runtime-lifecycle.md
@@ -89,6 +89,7 @@ Regardless of success or failure the runtime collects:
 - If `ResetPolicy::Manual` is used (default in the pool), the `Drop` implementation on `PooledRuntime` triggers `reset_to_snapshot` when the handle returns to the pool. Failures drop the runtime from the pool and reduce capacity until a fresh runtime is created.
 - Hosts that want to avoid tearing down the engine can call `PyRuntime::reset_in_place()`. This keeps the underlying isolate alive, wipes the context, and replays the bootstrap assets before the next invocation.
 - `PoolResetMode::InPlace` applies the same in-place reset when a pooled handle drops, trading strict engine recreation for lower latency on reuse.
+- Each reset (manual or in-place) records `mode`, `duration_ms`, and `engine_generation` so the next invocation’s diagnostics expose how the runtime was scrubbed.
 
 ## Failure Modes and Recovery
 

From 67abb15343281e4135fa34a7f7e5970ea485211d Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Fri, 17 Oct 2025 19:01:15 +0800
Subject: [PATCH 03/13] Bake overlays into warm snapshots to skip rehydrate

---
 crates/aardvark-core/src/config.rs            | 23 +++++++++++++++
 crates/aardvark-core/src/runtime/mod.rs       |  2 +-
 crates/aardvark-core/src/runtime/python.rs    | 22 +++++++++++++--
 .../tests/runtime_pool_and_outcome.rs         | 28 +++++++++++++++++++
 docs/api/rust-host.md                         |  2 ++
 docs/architecture/runtime-lifecycle.md        |  1 +
 6 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/crates/aardvark-core/src/config.rs b/crates/aardvark-core/src/config.rs
index 292ff1e..9518a25 100644
--- a/crates/aardvark-core/src/config.rs
+++ b/crates/aardvark-core/src/config.rs
@@ -101,6 +101,7 @@ impl fmt::Debug for HostHooks {
 pub struct WarmState {
     snapshot: Arc<[u8]>,
     overlay: Arc<OverlayExport>,
+    overlay_preloaded: bool,
 }
 
 impl WarmState {
@@ -109,9 +110,25 @@ impl WarmState {
         Self {
             snapshot,
             overlay: Arc::new(overlay),
+            overlay_preloaded: false,
         }
     }
 
+    /// Constructs a warm state that already includes the overlay in the snapshot image.
+    pub fn with_overlay_preloaded(snapshot: Arc<[u8]>, overlay: OverlayExport) -> Self {
+        Self {
+            snapshot,
+            overlay: Arc::new(overlay),
+            overlay_preloaded: true,
+        }
+    }
+
+    /// Returns a new warm state flagged as overlay-preloaded.
+    pub fn into_overlay_preloaded(mut self) -> Self {
+        self.overlay_preloaded = true;
+        self
+    }
+
     /// Returns the snapshot bytes.
     pub fn snapshot(&self) -> Arc<[u8]> {
         self.snapshot.clone()
@@ -121,6 +138,11 @@ impl WarmState {
     pub fn overlay(&self) -> Arc<OverlayExport> {
         self.overlay.clone()
     }
+
+    /// Indicates whether the overlay contents were baked into the snapshot.
+    pub fn overlay_preloaded(&self) -> bool {
+        self.overlay_preloaded
+    }
 }
 
 impl fmt::Debug for WarmState {
@@ -128,6 +150,7 @@ impl fmt::Debug for WarmState {
         f.debug_struct("WarmState")
             .field("snapshot_len", &self.snapshot.len())
             .field("overlay_blobs", &self.overlay.blobs.len())
+            .field("overlay_preloaded", &self.overlay_preloaded)
             .finish()
     }
 }
diff --git a/crates/aardvark-core/src/runtime/mod.rs b/crates/aardvark-core/src/runtime/mod.rs
index b1077e8..1581b97 100644
--- a/crates/aardvark-core/src/runtime/mod.rs
+++ b/crates/aardvark-core/src/runtime/mod.rs
@@ -257,7 +257,7 @@ impl AardvarkRuntime {
             Arc::<[u8]>::from(bytes.into_boxed_slice())
         };
         let overlay = self.engine_mut().js_mut().export_overlay()?;
-        let state = WarmState::new(snapshot_bytes, overlay);
+        let state = WarmState::with_overlay_preloaded(snapshot_bytes, overlay);
         self.config.warm_state = Some(state.clone());
         self.engine_mut().set_warm_state(Some(state.clone()));
         self.config.snapshot.store_cached_bytes(state.snapshot());
diff --git a/crates/aardvark-core/src/runtime/python.rs b/crates/aardvark-core/src/runtime/python.rs
index 4472c03..52a561a 100644
--- a/crates/aardvark-core/src/runtime/python.rs
+++ b/crates/aardvark-core/src/runtime/python.rs
@@ -8,6 +8,7 @@ use crate::engine::{JsRuntime, PyodideLoadOptions};
 use crate::error::{PyRunnerError, Result};
 use crate::package_metadata;
 use crate::runtime_language::RuntimeLanguage;
+use std::env;
 use std::fs;
 use std::path::Path;
 use std::sync::Arc;
@@ -103,9 +104,24 @@ impl LanguageEngine for PythonEngine {
         self.js.load_pyodide(load_opts)?;
         self.snapshot_bytes = None;
         if let Some(state) = self.warm_state.as_ref() {
-            let overlay = state.overlay();
-            self.js.import_overlay(&overlay.metadata, &overlay.blobs)?;
-            self.js.prepare_dynlibs()?;
+            if state.overlay_preloaded() {
+                // Overlay already baked into the snapshot; no additional work required.
+            } else {
+                if let Some(token) = env::var_os("AARDVARK_TEST_FORCE_OVERLAY_IMPORT_FAILURE") {
+                    env::remove_var("AARDVARK_TEST_FORCE_OVERLAY_IMPORT_FAILURE");
+                    let label = token
+                        .to_str()
+                        .filter(|value| !value.is_empty())
+                        .map(|value| format!(" forced by {value}"))
+                        .unwrap_or_default();
+                    return Err(PyRunnerError::Init(format!(
+                        "forced overlay import failure{label}"
+                    )));
+                }
+                let overlay = state.overlay();
+                self.js.import_overlay(&overlay.metadata, &overlay.blobs)?;
+                self.js.prepare_dynlibs()?;
+            }
         }
         Ok(())
     }
diff --git a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
index b500a4c..cf00901 100644
--- a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
+++ b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
@@ -12,6 +12,7 @@ use aardvark_core::{
 use bytes::Bytes;
 use serde_json::json;
 use std::env;
+use std::path::PathBuf;
 use std::io::Write;
 use zip::write::FileOptions;
 use zip::CompressionMethod;
@@ -21,6 +22,7 @@ fn runtime_pool_and_outcome_behaviour() -> Result<()> {
     verify_pooled_runtime_manual_reset()?;
     verify_pooled_runtime_in_place_reset()?;
     verify_runtime_reset_in_place()?;
+    verify_warm_state_skips_overlay_import()?;
     verify_after_invocation_reset_policy()?;
     verify_python_exception_outcome()?;
     verify_timeout_failure()?;
@@ -238,6 +240,32 @@ def main():
     Ok(())
 }
 
+fn verify_warm_state_skips_overlay_import() -> Result<()> {
+    let mut config = PyRuntimeConfig::default();
+    config.snapshot.save_to = Some(PathBuf::from("target/warm-state.snapshot"));
+    let mut runtime = PyRuntime::new(config)?;
+    let outcome = run_main(&mut runtime, SIMPLE_SUCCESS)?;
+    assert!(outcome.is_success(), "expected success outcome");
+
+    let warm_state = runtime.capture_warm_state()?;
+    assert!(
+        warm_state.overlay_preloaded(),
+        "warm state should mark overlay as preloaded"
+    );
+
+    let mut config = PyRuntimeConfig::default();
+    config.warm_state = Some(warm_state);
+    let mut runtime = PyRuntime::new(config)?;
+    env::set_var("AARDVARK_TEST_FORCE_OVERLAY_IMPORT_FAILURE", "1");
+    let outcome = run_main(&mut runtime, SIMPLE_SUCCESS)?;
+    env::remove_var("AARDVARK_TEST_FORCE_OVERLAY_IMPORT_FAILURE");
+    assert!(
+        outcome.is_success(),
+        "expected success outcome with preloaded overlay"
+    );
+    Ok(())
+}
+
 fn verify_shared_buffer_payload() -> Result<()> {
     let mut runtime = PyRuntime::new(PyRuntimeConfig::default())?;
     let outcome = run_main(
diff --git a/docs/api/rust-host.md b/docs/api/rust-host.md
index d6197b0..4ff31d2 100644
--- a/docs/api/rust-host.md
+++ b/docs/api/rust-host.md
@@ -158,6 +158,8 @@ fn host_with_warm_state(warm: WarmState) -> anyhow::Result<PyRuntime> {
 
 The saved `WarmState` bundles a Pyodide memory snapshot with its overlay. Runtimes constructed with it skip package installation and restore the filesystem/DLLs immediately. Call `config.snapshot.clear_cache()` or set `config.warm_state = None` if you regenerate the warm state at runtime.
 
+Warm states captured via `capture_warm_state()` mark the overlay as preloaded, so `reset_in_place()` skips the heavy overlay import. If you assemble a warm state manually, call `WarmState::with_overlay_preloaded` (or `WarmState::into_overlay_preloaded`) after hydrating the overlay to unlock the same fast path.
+
 ### Warm Snapshot Hooks
 
 Hooks let you run custom logic right before a snapshot is captured and immediately after a warm snapshot is applied:
diff --git a/docs/architecture/runtime-lifecycle.md b/docs/architecture/runtime-lifecycle.md
index 9e100e0..ccbb4d1 100644
--- a/docs/architecture/runtime-lifecycle.md
+++ b/docs/architecture/runtime-lifecycle.md
@@ -90,6 +90,7 @@ Regardless of success or failure the runtime collects:
 - Hosts that want to avoid tearing down the engine can call `PyRuntime::reset_in_place()`. This keeps the underlying isolate alive, wipes the context, and replays the bootstrap assets before the next invocation.
 - `PoolResetMode::InPlace` applies the same in-place reset when a pooled handle drops, trading strict engine recreation for lower latency on reuse.
 - Each reset (manual or in-place) records `mode`, `duration_ms`, and `engine_generation` so the next invocation’s diagnostics expose how the runtime was scrubbed.
+- Warm states captured inside the runtime mark their overlays as preloaded, allowing in-place resets to skip the expensive overlay import entirely.
 
 ## Failure Modes and Recovery
 

From 17f6806804ab09465f8c7141c3c75b74c8d4c69f Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Fri, 17 Oct 2025 19:32:50 +0800
Subject: [PATCH 04/13] Add bench_echo example for runtime timing

---
 crates/aardvark-core/examples/bench_echo.rs   | 157 ++++++++++++++++++
 .../tests/runtime_pool_and_outcome.rs         |   2 +-
 docs/dev/runtime-internals.md                 |   6 +
 3 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 crates/aardvark-core/examples/bench_echo.rs

diff --git a/crates/aardvark-core/examples/bench_echo.rs b/crates/aardvark-core/examples/bench_echo.rs
new file mode 100644
index 0000000..c92e9a5
--- /dev/null
+++ b/crates/aardvark-core/examples/bench_echo.rs
@@ -0,0 +1,157 @@
+use aardvark_core::{outcome::ResultPayload, Bundle, PyRuntime, PyRuntimeConfig};
+use std::io::{Cursor, Write};
+use std::path::PathBuf;
+use std::time::{Duration, Instant};
+use zip::write::FileOptions;
+use zip::ZipWriter;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let mut args = std::env::args().skip(1);
+    let iterations: usize = args
+        .next()
+        .map(|value| {
+            value
+                .parse()
+                .expect("iterations must be a positive integer")
+        })
+        .unwrap_or(100);
+    let payload_len: usize = args
+        .next()
+        .map(|value| {
+            value
+                .parse()
+                .expect("payload length must be a positive integer")
+        })
+        .unwrap_or(1024);
+
+    println!(
+        "bench_echo: iterations={} payload_len={} bytes",
+        iterations, payload_len
+    );
+
+    let mut config = PyRuntimeConfig::default();
+    config.snapshot.save_to = Some(PathBuf::from("target/bench_echo.snapshot"));
+    let mut runtime = PyRuntime::new(config)?;
+    let bundle = build_echo_bundle(payload_len)?;
+
+    let warm_session = runtime.prepare_session(bundle.clone(), "main:main")?;
+    let warm_outcome = runtime.run_session(&warm_session)?;
+    assert!(
+        warm_outcome.is_success(),
+        "warmup run failed: {:?}",
+        warm_outcome.status
+    );
+    runtime.capture_warm_state()?;
+    runtime.reset_in_place()?;
+
+    let mut phases = PhaseStats::default();
+
+    for _ in 0..iterations {
+        runtime.reset_in_place()?;
+
+        let prepare_start = Instant::now();
+        let session = runtime.prepare_session(bundle.clone(), "main:main")?;
+        let prepare = prepare_start.elapsed();
+
+        let run_start = Instant::now();
+        let outcome = runtime.run_session(&session)?;
+        let run = run_start.elapsed();
+
+        if let Some(ResultPayload::Text(_value)) = outcome.payload() {
+            // optional inspection: `_value` now ignored.
+        }
+
+        phases.record(prepare, run);
+    }
+
+    println!(
+        "phases: prepare=avg {:.2} ms (min {:.2}, max {:.2}) · run=avg {:.2} ms (min {:.2}, max {:.2}) · total=avg {:.2} ms (min {:.2}, max {:.2})",
+        phases.prepare.avg_ms(),
+        phases.prepare.min_ms(),
+        phases.prepare.max_ms(),
+        phases.run.avg_ms(),
+        phases.run.min_ms(),
+        phases.run.max_ms(),
+        phases.total.avg_ms(),
+        phases.total.min_ms(),
+        phases.total.max_ms()
+    );
+
+    Ok(())
+}
+
+fn build_echo_bundle(payload_len: usize) -> Result<Bundle, Box<dyn std::error::Error>> {
+    let source = format!(
+        r#"PAYLOAD = "x" * {payload_len}
+
+def main():
+    return PAYLOAD
+"#,
+        payload_len = payload_len
+    );
+
+    let cursor = Cursor::new(Vec::new());
+    let mut writer = ZipWriter::new(cursor);
+    let options = FileOptions::default();
+    writer.start_file("main.py", options)?;
+    writer.write_all(source.as_bytes())?;
+    let cursor = writer.finish()?;
+    let bytes = cursor.into_inner();
+    Ok(Bundle::from_zip_bytes(bytes)?)
+}
+
+#[derive(Default)]
+struct PhaseStats {
+    prepare: Stat,
+    run: Stat,
+    total: Stat,
+}
+
+impl PhaseStats {
+    fn record(&mut self, prepare: Duration, run: Duration) {
+        self.prepare.push(prepare);
+        self.run.push(run);
+        self.total.push(prepare + run);
+    }
+}
+
+#[derive(Default)]
+struct Stat {
+    count: usize,
+    sum: Duration,
+    min: Duration,
+    max: Duration,
+}
+
+impl Stat {
+    fn push(&mut self, value: Duration) {
+        if self.count == 0 {
+            self.min = value;
+            self.max = value;
+        } else {
+            if value < self.min {
+                self.min = value;
+            }
+            if value > self.max {
+                self.max = value;
+            }
+        }
+        self.count += 1;
+        self.sum += value;
+    }
+
+    fn avg_ms(&self) -> f64 {
+        if self.count == 0 {
+            return 0.0;
+        }
+        (self.sum.as_secs_f64() * 1000.0) / self.count as f64
+    }
+
+    fn min_ms(&self) -> f64 {
+        self.min.as_secs_f64() * 1000.0
+    }
+
+    fn max_ms(&self) -> f64 {
+        self.max.as_secs_f64() * 1000.0
+    }
+}
diff --git a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
index cf00901..9842a4b 100644
--- a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
+++ b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
@@ -12,8 +12,8 @@ use aardvark_core::{
 use bytes::Bytes;
 use serde_json::json;
 use std::env;
-use std::path::PathBuf;
 use std::io::Write;
+use std::path::PathBuf;
 use zip::write::FileOptions;
 use zip::CompressionMethod;
 
diff --git a/docs/dev/runtime-internals.md b/docs/dev/runtime-internals.md
index 6ed0bd7..1600989 100644
--- a/docs/dev/runtime-internals.md
+++ b/docs/dev/runtime-internals.md
@@ -86,3 +86,9 @@ information about the moving parts you are likely to touch.
 5. Add integration tests to prevent regressions.
 
 Following this order keeps host APIs and in-process behaviour aligned.
+
+## Benchmarking Basics
+
+- `cargo run -p aardvark-core --example bench_echo -- [iterations] [payload_len]` exercises a tiny Python echo handler and prints per-phase timings (`prepare`, `run`, `total`).
+- The harness captures a warm snapshot up-front, so in-place resets hit the fast path (overlay already baked into the snapshot).
+- Adjust `payload_len` to explore how return sizes influence the execution phase; the `prepare` measurement stays dominated by warm-restore.

From 985e26ad9cdf0c93ba67df105d787e29ab09f424 Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Fri, 17 Oct 2025 19:36:52 +0800
Subject: [PATCH 05/13] Guard entropy patch restore for warm snapshots

---
 .../aardvark-core/src/py/entropy/entropy_import_context.py  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/aardvark-core/src/py/entropy/entropy_import_context.py b/crates/aardvark-core/src/py/entropy/entropy_import_context.py
index 33fe849..7d15980 100644
--- a/crates/aardvark-core/src/py/entropy/entropy_import_context.py
+++ b/crates/aardvark-core/src/py/entropy/entropy_import_context.py
@@ -252,7 +252,11 @@ def tempfile_context(module):
 
 @register_before_first_request("tempfile")
 def tempfile_restore_random_name_sequence(tempfile):
-    tempfile._RandomNameSequence = tempfile._orig_RandomNameSequence
+    orig = getattr(tempfile, "_orig_RandomNameSequence", None)
+    if orig is None:
+        # Warm snapshots may already include the restored sequence.
+        return
+    tempfile._RandomNameSequence = orig
     del tempfile._orig_RandomNameSequence
 
 

From b9a2ce7cc75bbd62e7fda48d97d452e1961028c0 Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Sun, 19 Oct 2025 21:01:14 +0800
Subject: [PATCH 06/13] Refine warm state test config

---
 crates/aardvark-core/tests/runtime_pool_and_outcome.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
index 9842a4b..b62fa69 100644
--- a/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
+++ b/crates/aardvark-core/tests/runtime_pool_and_outcome.rs
@@ -240,6 +240,7 @@ def main():
     Ok(())
 }
 
+#[allow(clippy::field_reassign_with_default)]
 fn verify_warm_state_skips_overlay_import() -> Result<()> {
     let mut config = PyRuntimeConfig::default();
     config.snapshot.save_to = Some(PathBuf::from("target/warm-state.snapshot"));

From f0c42d1a4ca4f855f768ba53082074f36d78f32a Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Sun, 19 Oct 2025 21:08:03 +0800
Subject: [PATCH 07/13] Refresh docs and rustdoc for reset telemetry

---
 README.md                          | 18 ++++++++++++++----
 crates/aardvark-core/src/config.rs |  8 ++++++--
 docs/api/rust-host.md              | 12 +++++++++++-
 docs/architecture/overview.md      |  6 ++++--
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 3051983..1472e72 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,10 @@ Embedded multi-language runtime for executing sandboxed bundles inside V8, with
 
 ## Why Aardvark?
 
-- **Snapshot-friendly runtimes** – Reuse warm isolates across requests, preload packages, and capture snapshots to keep cold-starts in check.
+- **Snapshot-friendly runtimes** – Reuse warm isolates across requests, bake overlays into warm snapshots, and keep cold starts predictable.
 - **Deterministic sandboxing** – Enforce per-invocation budgets for wall time, CPU, heap, filesystem writes, and outbound network hosts.
 - **Self-describing bundles** – Ship code, manifest, and dependency hints together as a ZIP; hosts can honour or override the manifest contract at runtime.
-- **First-class telemetry** – Every invocation emits structured diagnostics (stdout/stderr, exceptions, resource usage, policy violations) that hosts can feed into their own observability stack.
+- **First-class telemetry** – Every invocation emits structured diagnostics (stdout/stderr, exceptions, resource usage, policy violations, reset timings) that hosts can feed into their own observability stack.
 - **Runtime pooling** – Amortise startup cost by recycling isolates with predictable reset semantics.
 - **Dual-language engine (preview)** – Run JavaScript bundles alongside Python handlers using the same network/filesystem sandboxing. JavaScript support is read-only for now and expects bring-your-own modules.
 
@@ -126,7 +126,17 @@ fn build_warm_state(bytes: &[u8]) -> anyhow::Result<(PyRuntimeConfig, Bundle)> {
 }
 ```
 
-Any new runtime (or pool) constructed with that `PyRuntimeConfig` skips the heavy Pyodide bootstrap and restores directly from the warm snapshot.
+Any new runtime (or pool) constructed with that `PyRuntimeConfig` skips the heavy Pyodide bootstrap and restores directly from the warm snapshot. Snapshots captured inside the runtime automatically mark their overlays as preloaded so in-place resets avoid re-importing site-packages.
+
+## Benchmarking the runtime
+
+For a quick sanity check of `prepare` versus `run` timings, use the built-in bench example:
+
+```
+cargo run -p aardvark-core --example bench_echo -- 100 1024
+```
+
+Arguments are `[iterations] [payload_len]` (both optional). The harness warms the runtime, captures a warm snapshot, and prints avg/min/max milliseconds for `prepare`, `run`, and `total`. Use it to correlate host-side measurements with the core runtime.
 
 `docs/api/rust-host.md` expands on pooling, invocation strategies, and telemetry export. For JavaScript bundles, pass `language = "javascript"` in the manifest or descriptor and ship a self-contained bundle produced by your JS build tool.
 
@@ -153,7 +163,7 @@ The core library is published as `aardvark-core`. Before cutting any experimenta
 - Network sandboxing is allowlist-based per session; there is no per-request override yet.
 - Filesystem quota enforcement only covers the `/session` tree.
 - Streaming outputs and incremental logs are not available; handlers must return a single payload.
-- Warm snapshots are tied to the Pyodide build and manifest used when you captured them; changing either requires baking a new snapshot.
+- Warm snapshots are tied to the Pyodide build and manifest used when you captured them; changing either requires baking a new snapshot. When you assemble warm states manually, remember to flag them as `overlay_preloaded` so resets avoid redundant imports.
 - Runtime pool resets still execute synchronously on the thread that next checks out a runtime; there is no background reset worker yet.
 - API stability is not guaranteed; expect breaking changes while the runtime matures.
 
diff --git a/crates/aardvark-core/src/config.rs b/crates/aardvark-core/src/config.rs
index 9518a25..eedc694 100644
--- a/crates/aardvark-core/src/config.rs
+++ b/crates/aardvark-core/src/config.rs
@@ -123,7 +123,10 @@ impl WarmState {
         }
     }
 
-    /// Returns a new warm state flagged as overlay-preloaded.
+    /// Flags the warm state as already containing the overlay contents inside the snapshot.
+    ///
+    /// Hosts that assemble a warm state manually can call this to skip the overlay import
+    /// step during `prepare_environment`.
     pub fn into_overlay_preloaded(mut self) -> Self {
         self.overlay_preloaded = true;
         self
@@ -139,7 +142,8 @@ impl WarmState {
         self.overlay.clone()
     }
 
-    /// Indicates whether the overlay contents were baked into the snapshot.
+    /// Indicates whether the overlay contents were baked into the snapshot, allowing
+    /// the runtime to skip `import_overlay` when restoring the warm state.
     pub fn overlay_preloaded(&self) -> bool {
         self.overlay_preloaded
     }
diff --git a/docs/api/rust-host.md b/docs/api/rust-host.md
index 4ff31d2..5bc850b 100644
--- a/docs/api/rust-host.md
+++ b/docs/api/rust-host.md
@@ -245,7 +245,17 @@ fn record(outcome: &ExecutionOutcome) {
 }
 ```
 
-`SandboxTelemetry` implements `Clone` so you can send it to background workers without keeping the original outcome alive.
+`SandboxTelemetry` implements `Clone` so you can send it to background workers without keeping the original outcome alive. It mirrors `Diagnostics::reset`, exposing the reset mode, duration, and engine generation so you can correlate pool behaviour with host metrics. Shared buffers arrive as zero-copy handles; prefer `SharedBufferHandle::as_slice()` to keep them zero-copy unless you truly need an owned copy.
+
+## Quick benchmark harness
+
+To compare host-side timings with the core runtime, run the example bench:
+
+```
+cargo run -p aardvark-core --example bench_echo -- 100 1024
+```
+
+Arguments are `[iterations] [payload_len]`. The harness warms the runtime, captures a warm snapshot, and prints avg/min/max for `prepare`, `run`, and `total` so you can verify pooling behaviour in isolation.
 
 ## Known gaps
 
diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md
index 739c261..8d6ddfd 100644
--- a/docs/architecture/overview.md
+++ b/docs/architecture/overview.md
@@ -8,6 +8,7 @@ This document introduces Aardvark’s execution model from the host’s point of
 - Allow hosts to preload dependencies (via manifests, overlays, and snapshots) so cold-start cost stays predictable.
 - Enforce resource limits inside the same process: CPU, wall time, heap, filesystem writes, and outbound network access.
 - Keep the host-facing API small enough for Rust, but expose structured diagnostics so other host languages can wrap it later.
+- Surface reset timings and sandbox telemetry so pooling strategies remain observable without tracing every call.
 
 ## Layers at a Glance
 
@@ -39,7 +40,7 @@ graph TD
 4. **Watchdogs** – Wall-clock and CPU watchdogs arm before calling into the guest. Heap usage is checked both before and after execution.
 5. **Sandbox enforcement** – The JS layer enforces network allowlists (HTTPS by default), filesystem mode/quota, and host capability gates for native bridges. Violations are raised back to Rust.
 6. **Outcome synthesis** – Captured stdout/stderr, console messages, payloads, sandbox telemetry, and policy violations are combined into `ExecutionOutcome`.
-7. **Reset** – Depending on `ResetPolicy`, runtimes either reset automatically to the baseline snapshot or rely on the pool handle drop path to do so.
+7. **Reset** – Depending on `ResetPolicy`, runtimes either rebuild the engine (`reset_to_snapshot`) or scrub it in place (`reset_in_place`). Warm states captured inside the runtime bake the overlay into the snapshot, so in-place resets reuse site-packages without rehydrating tarballs.
 
 The same flow is used whether the runtime comes from a pool or is standalone. Pooling only changes lifecycle management around steps 2 and 7.
 
@@ -72,12 +73,13 @@ sequenceDiagram
 - **Descriptor-first contract** – Manifests are optional at runtime. Hosts can provide `InvocationDescriptor`s directly when they need to override limits or use fully dynamic pipelines. Manifests exist to make bundles self-describing for less opinionated hosts.
 - **“Everything is a bundle”** – Packages, manifests, and entrypoints travel together inside a single ZIP. This keeps the host API simple and avoids filesystem mutation outside the sandbox when code is deployed.
 - **Telemetry as a first-class product** – Diagnostics always attach CPU, filesystem, and network telemetry even when the invocation fails. Hosts can surface policy violations without parsing logs.
+- **Reset visibility** – Each invocation records how the runtime was reset (recreate vs in-place), how long it took, and which engine generation served the handler, making pool behaviour observable without diving into logs.
 
 ## Current Limitations
 
 - Only Linux/macOS targets are exercised. Windows builds are untested and expected to fail.
 - Shared buffer handles present zero-copy views backed by the runtime; the host may still materialize owned copies when required.
 - JavaScript bundles must be fully self-contained. Ship pre-bundled modules (e.g., via esbuild/webpack) because the runtime does not resolve npm packages or fetch external scripts.
-- Snapshot overlays assume Pyodide 0.28.2. Future Pyodide upgrades will require regenerating overlay metadata and schema version bumping.
+- Snapshot overlays assume Pyodide 0.28.2. Future Pyodide upgrades will require regenerating overlay metadata and schema version bumping. When hosts build warm states out of band they must flag them as overlay-preloaded to avoid redundant imports.
 - Network sandboxing is allowlist-based per session. There is no per-request override yet, and DNS leakage is not mitigated beyond host matching.
 - Filesystem quota enforcement only tracks writes within the virtual session directory. If code escapes to other WASM-visible mounts it will currently fail closed but without detailed accounting.

From 91b48b83c1e5cc4e474ec1e226327c53cac5e50e Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Sun, 19 Oct 2025 21:58:22 +0800
Subject: [PATCH 08/13] Document threading and pooling model

---
 docs/dev/runtime-internals.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/docs/dev/runtime-internals.md b/docs/dev/runtime-internals.md
index 1600989..f4f4862 100644
--- a/docs/dev/runtime-internals.md
+++ b/docs/dev/runtime-internals.md
@@ -92,3 +92,28 @@ Following this order keeps host APIs and in-process behaviour aligned.
 - `cargo run -p aardvark-core --example bench_echo -- [iterations] [payload_len]` exercises a tiny Python echo handler and prints per-phase timings (`prepare`, `run`, `total`).
 - The harness captures a warm snapshot up-front, so in-place resets hit the fast path (overlay already baked into the snapshot).
 - Adjust `payload_len` to explore how return sizes influence the execution phase; the `prepare` measurement stays dominated by warm-restore.
+
+## Threading & Pooling Model
+
+- **Single-threaded core.** `JsRuntime` owns a V8 isolate; every call into the engine must happen on the same OS thread that created it. The runtime is not `Send`/`Sync`, and `v8::Locker` guards the isolate.
+- **Host-driven parallelism.** To run handlers concurrently, hosts create or pool multiple runtimes—one per worker thread (or process). Each checkout stays on the borrowing thread until it is dropped.
+- **Pooling vs. manual reuse.** If you only run sequential invocations, holding a `PyRuntime` and calling `reset_in_place()` yourself is equivalent to pooling. Pools add value when you need lifecycle isolation (drop tainted runtimes) or multiple threads sharing a limited set of isolates.
+- **No implicit async.** Aardvark does not spawn worker threads or background reset tasks; everything is synchronous. If you wrap it in async code, use `spawn_blocking` or your own thread pool.
+
+```mermaid
+graph LR
+  T1[Host Thread 1] -->|checkout| Pool
+  T2[Host Thread 2] -->|checkout| Pool
+  Pool -->|runtime A| RuntimeA
+  Pool -->|runtime B| RuntimeB
+  RuntimeA --> T1
+  RuntimeB --> T2
+  T1 -->|drop| Pool
+  T2 -->|drop| Pool
+```
+
+1. Host threads check out runtimes when they need to execute a bundle. If none are available, they block.
+2. The runtime is used entirely on that thread (`prepare_session`, `run_session`). Moving it to another thread is undefined behaviour.
+3. Dropping the handle returns it to the pool, which performs the configured reset (in-place or full rebuild) before making it available again.
+
+🚫 **Do not** move `PyRuntime` or `PooledRuntime` values across threads. Need cross-thread execution? Spawn a dedicated worker thread per runtime and communicate via channels in your host layer.

From 81c466d50effc8859a7b667f580e57bb207cb3e6 Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Sun, 19 Oct 2025 22:05:14 +0800
Subject: [PATCH 09/13] Remove triage docs

---
 docs/dev/README.md |  2 --
 docs/dev/triage.md | 62 ----------------------------------------------
 2 files changed, 64 deletions(-)
 delete mode 100644 docs/dev/triage.md

diff --git a/docs/dev/README.md b/docs/dev/README.md
index 37a198f..ad5d402 100644
--- a/docs/dev/README.md
+++ b/docs/dev/README.md
@@ -11,8 +11,6 @@ organised so you can jump straight to the task at hand:
   bridge, Pyodide boot sequence, and sandbox shims.
 - [`release.md`](release.md) – tagging, changelog hygiene, and crates.io publish
   checklist.
-- [`triage.md`](triage.md) – how we track issues, prioritise bugs, and decide
-  what lands next.
 
 These documents mirror the public architecture notes but focus on *how* to make
 changes safely, not just on what the runtime does.
diff --git a/docs/dev/triage.md b/docs/dev/triage.md
deleted file mode 100644
index 59b8a92..0000000
--- a/docs/dev/triage.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# Issue Triage & Planning
-
-This guide explains how we ingest bug reports, feature requests, and technical
-investigations.
-
-## Intake
-
-- New issues land in the "Triage" column of the project board.
-- Each issue must have at least:
-  - Reproduction steps or failing test case (for bugs)
-  - Clear problem statement or user story (for features)
-  - Owner (person responsible for next action)
-
-## Classification
-
-1. **Severity**
-   - `S1` – production outage or data risk
-   - `S2` – broken core functionality or hard failure in supported workflow
-   - `S3` – degraded experience or missing guardrails
-   - `S4` – polish, tooling, docs-only issues
-
-2. **Area**
-   - Runtime (Rust, watchdogs, pooling)
-   - JS sandbox (network/filesystem/capabilities)
-   - Pyodide packaging & snapshots
-   - Host API / diagnostics
-   - Tooling & docs
-
-3. **Type**
-   - Bug
-   - Enhancement
-   - Investigation
-
-## Grooming Checklist
-
-- Ensure each issue has acceptance criteria.
-- Link related docs (`docs/api`, `docs/architecture`, relevant `docs/dev`
-  articles).
-- For bugs, capture whether the issue reproduces on current `master`.
-- For feature requests, note which sandbox guarantees or telemetry changes are
-  required.
-
-## Planning Cadence
-
-- Weekly triage meeting: review new issues, assign owners, and set severity.
-- Bi-weekly planning: slot `S1/S2` items into the current milestone and
-  negotiate capacity for enhancements.
-- Maintain a "Future Ideas" list when scope is unclear.
-
-## Definition of Done
-
-- Code merged with tests (unit, integration, or smoke as appropriate).
-- Docs updated: user-facing (`docs/api`/`docs/architecture`) and developer
-  references (`docs/dev`).
-- Telemetry and diagnostics audited for regressions.
-- Release notes entry drafted if the change is user-visible.
-
-## Technical Debt Tracking
-
-- Use labels like `tech-debt`, `refactor`, `cleanup`.
-- Debt items should explain the risk (maintenance cost, performance, correctness).
-- Revisit debt backlog monthly to prevent runaway build-up.

From 67881859b8fc525deacb47aee9671dd38ada86c2 Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Sun, 19 Oct 2025 22:27:04 +0800
Subject: [PATCH 10/13] Add perf harness and documentation

---
 .mise.toml                             |   1 +
 Cargo.lock                             | 230 ++++++++++++--
 Cargo.toml                             |   1 +
 README.md                              |   1 +
 docs/perf/overview.md                  |  94 ++++++
 perf/fixtures/run_host.py              |  58 ++++
 perf/fixtures/scenarios/__init__.py    |   9 +
 perf/fixtures/scenarios/echo.py        |   2 +
 perf/fixtures/scenarios/numpy_case.py  |  10 +
 perf/fixtures/scenarios/pandas_case.py |  13 +
 perf/runner/Cargo.toml                 |  20 ++
 perf/runner/src/main.rs                | 410 +++++++++++++++++++++++++
 perf/scripts/render_markdown.py        |  47 +++
 13 files changed, 863 insertions(+), 33 deletions(-)
 create mode 100644 docs/perf/overview.md
 create mode 100644 perf/fixtures/run_host.py
 create mode 100644 perf/fixtures/scenarios/__init__.py
 create mode 100644 perf/fixtures/scenarios/echo.py
 create mode 100644 perf/fixtures/scenarios/numpy_case.py
 create mode 100644 perf/fixtures/scenarios/pandas_case.py
 create mode 100644 perf/runner/Cargo.toml
 create mode 100644 perf/runner/src/main.rs
 create mode 100755 perf/scripts/render_markdown.py

diff --git a/.mise.toml b/.mise.toml
index 7f16c36..5cde2cb 100644
--- a/.mise.toml
+++ b/.mise.toml
@@ -5,4 +5,5 @@ rust = "1.90"
 zig = "0.15"
 node = "22"
 wasmtime = "37"
+python = "3.12"
 #tinygo = "0.39"
diff --git a/Cargo.lock b/Cargo.lock
index 3ca7451..79ceea4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10,7 +10,7 @@ dependencies = [
  "anyhow",
  "bzip2",
  "chrono",
- "clap",
+ "clap 4.5.48",
  "reqwest",
  "serde",
  "serde_json",
@@ -48,6 +48,19 @@ dependencies = [
  "zip",
 ]
 
+[[package]]
+name = "aardvark-perf"
+version = "0.1.0"
+dependencies = [
+ "aardvark-core",
+ "anyhow",
+ "libc",
+ "serde",
+ "serde_json",
+ "structopt",
+ "zip",
+]
+
 [[package]]
 name = "addr2line"
 version = "0.25.1"
@@ -81,6 +94,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "ansi_term"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "anstream"
 version = "0.6.21"
@@ -159,6 +181,17 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
 
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "autocfg"
 version = "1.5.0"
@@ -192,7 +225,7 @@ version = "0.72.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
 dependencies = [
- "bitflags",
+ "bitflags 2.9.4",
  "cexpr",
  "clang-sys",
  "itertools",
@@ -203,9 +236,15 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn",
+ "syn 2.0.106",
 ]
 
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
 [[package]]
 name = "bitflags"
 version = "2.9.4"
@@ -348,6 +387,21 @@ dependencies = [
  "libloading",
 ]
 
+[[package]]
+name = "clap"
+version = "2.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags 1.3.2",
+ "strsim 0.8.0",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
+]
+
 [[package]]
 name = "clap"
 version = "4.5.48"
@@ -367,7 +421,7 @@ dependencies = [
  "anstream",
  "anstyle",
  "clap_lex",
- "strsim",
+ "strsim 0.11.1",
 ]
 
 [[package]]
@@ -376,10 +430,10 @@ version = "4.5.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c"
 dependencies = [
- "heck",
+ "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -478,7 +532,7 @@ dependencies = [
  "diplomat_core",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -498,7 +552,7 @@ dependencies = [
  "serde",
  "smallvec",
  "strck",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -509,7 +563,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -655,7 +709,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -746,12 +800,30 @@ dependencies = [
  "crc32fast",
 ]
 
+[[package]]
+name = "heck"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
+dependencies = [
+ "unicode-segmentation",
+]
+
 [[package]]
 name = "heck"
 version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "hex"
 version = "0.4.3"
@@ -1061,7 +1133,7 @@ version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
 dependencies = [
- "bitflags",
+ "bitflags 2.9.4",
  "cfg-if",
  "libc",
 ]
@@ -1162,7 +1234,7 @@ version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb"
 dependencies = [
- "bitflags",
+ "bitflags 2.9.4",
  "libc",
  "redox_syscall",
 ]
@@ -1403,7 +1475,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
 dependencies = [
  "proc-macro2",
- "syn",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
 ]
 
 [[package]]
@@ -1520,7 +1616,7 @@ version = "0.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
 dependencies = [
- "bitflags",
+ "bitflags 2.9.4",
 ]
 
 [[package]]
@@ -1624,7 +1720,7 @@ version = "0.38.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
 dependencies = [
- "bitflags",
+ "bitflags 2.9.4",
  "errno",
  "libc",
  "linux-raw-sys 0.4.15",
@@ -1637,7 +1733,7 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
 dependencies = [
- "bitflags",
+ "bitflags 2.9.4",
  "errno",
  "libc",
  "linux-raw-sys 0.11.0",
@@ -1740,7 +1836,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -1837,18 +1933,59 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
 [[package]]
 name = "strsim"
 version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
+[[package]]
+name = "structopt"
+version = "0.3.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10"
+dependencies = [
+ "clap 2.34.0",
+ "lazy_static",
+ "structopt-derive",
+]
+
+[[package]]
+name = "structopt-derive"
+version = "0.4.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
+dependencies = [
+ "heck 0.3.3",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "subtle"
 version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.106"
@@ -1877,7 +2014,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -1929,6 +2066,15 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
 
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.69"
@@ -1955,7 +2101,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -1966,7 +2112,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -2040,7 +2186,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -2074,7 +2220,7 @@ version = "0.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
 dependencies = [
- "bitflags",
+ "bitflags 2.9.4",
  "bytes",
  "futures-util",
  "http",
@@ -2117,7 +2263,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -2186,6 +2332,18 @@ version = "1.0.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
 
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
+
 [[package]]
 name = "untrusted"
 version = "0.9.0"
@@ -2239,7 +2397,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8827809a2884fb68530d678a8ef15b1ed1344bbf844879194d68c140c6f844f9"
 dependencies = [
  "bindgen",
- "bitflags",
+ "bitflags 2.9.4",
  "fslock",
  "gzip-header",
  "home",
@@ -2255,6 +2413,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
 
+[[package]]
+name = "vec_map"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+
 [[package]]
 name = "version_check"
 version = "0.9.5"
@@ -2326,7 +2490,7 @@ dependencies = [
  "log",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
  "wasm-bindgen-shared",
 ]
 
@@ -2361,7 +2525,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -2468,7 +2632,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -2479,7 +2643,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -2705,7 +2869,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "camino",
- "clap",
+ "clap 4.5.48",
 ]
 
 [[package]]
@@ -2728,7 +2892,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
  "synstructure",
 ]
 
@@ -2749,7 +2913,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
@@ -2769,7 +2933,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
  "synstructure",
 ]
 
@@ -2809,7 +2973,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.106",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index d6ed4b1..f954178 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,6 +4,7 @@ members = [
   "crates/aardvark-cli",
   "integration-tests",
   "xtask",
+  "perf/runner",
 ]
 resolver = "2"
 
diff --git a/README.md b/README.md
index 1472e72..76bf997 100644
--- a/README.md
+++ b/README.md
@@ -145,6 +145,7 @@ Arguments are `[iterations] [payload_len]` (both optional). The harness warms th
 - Architecture guidance lives under `docs/architecture/`. Start with `overview.md` for a top-down explanation, then branch into resource-limits, sandbox internals, and telemetry.
 - API reference under `docs/api/` covers the manifest schema, host integration, handler contracts, and diagnostics handling with examples.
 - Developer onboarding material is available in `docs/dev/` for contributors extending the project.
+- Performance notes and benchmark workflow live in `docs/perf/overview.md`.
 
 ## Publishing Notes
 
diff --git a/docs/perf/overview.md b/docs/perf/overview.md
new file mode 100644
index 0000000..76180ab
--- /dev/null
+++ b/docs/perf/overview.md
@@ -0,0 +1,94 @@
+# Performance Benchmarks
+
+This suite measures a few representative workloads across the Aardvark runtime
+and a native CPython interpreter:
+
+- **Echo** – returns a fixed 1 KB string.
+- **NumPy** – multiplies two 200×200 matrices using `numpy`.
+- **Pandas** – aggregates a 50 000‑row DataFrame.
+
+Each workload is executed through Aardvark (warm snapshot, in-place resets) and
+through the host Python interpreter. The harness records average/min/max
+wall-clock latency per invocation and the peak RSS reported by the OS.
+
+## Requirements
+
+- `uv` for ephemeral Python environments: <https://docs.astral.sh/uv/>
+- `mise` (or any toolchain manager capable of installing Python 3.12) – we use
+  it in documentation for reproducible instructions.
+- Pyodide assets already downloaded (see [Host Integration – Preparing Pyodide
+  assets](../api/rust-host.md#preparing-pyodide-assets)).
+
+Ensure a matching CPython is available (Pyodide 0.28.2 targets Python 3.12):
+
+```sh
+mise install python@3.12
+mise exec python@3.12 -- python --version
+```
+
+## Running the Benchmarks
+
+From the repository root:
+
+```sh
+cargo run -p aardvark-perf -- all --iterations 25 \
+  --json target/perf/results.json \
+  --csv target/perf/results.csv
+```
+
+Sample console output:
+
+```
+| Scenario | Mode | Avg ms | Min ms | Max ms | RSS (KiB) |
+|----------|------|--------|--------|--------|-----------|
+| echo     | aardvark | 128.55 | 123.12 | 135.44 | 215000 |
+| echo     | host-python | 1.42 | 1.30 | 1.71 | 10234 |
+...
+```
+
+The JSON/CSV files contain the same data for further analysis.
+
+### Single Scenario
+
+To benchmark one combination:
+
+```sh
+cargo run -p aardvark-perf -- scenario \
+  --scenario pandas \
+  --mode aardvark \
+  --iterations 50
+```
+
+## Host Python Runner
+
+The harness shells out to:
+
+```sh
+uv run --python 3.12 --with numpy --with pandas \
+  python perf/fixtures/run_host.py --scenario pandas --iterations 25
+```
+
+`uv` ensures the requested packages are available without modifying the user’s
+environment.
+
+## Generating Markdown Tables
+
+A helper script converts the JSON output into a Markdown table for reports:
+
+```sh
+python perf/scripts/render_markdown.py target/perf/results.json > target/perf/results.md
+```
+
+The script reads the JSON emitted by `aardvark-perf` and prints a table grouped
+by scenario.
+
+## Extending the Suite
+
+- Add new Python workloads under `perf/fixtures/scenarios/` and register them in
+  `SCENARIOS`.
+- Update `Scenario` in `perf/runner/src/main.rs` with the matching metadata
+  (packages, manifest).
+- For more granular metrics (per-phase timings, CPU, warm snapshot size), extend
+  the `BenchResult` struct and add the necessary instrumentation in
+  `bench_aardvark`. Keep a note in the internal diary when introducing new
+  metrics so we can track follow-up work.
diff --git a/perf/fixtures/run_host.py b/perf/fixtures/run_host.py
new file mode 100644
index 0000000..d4e2f49
--- /dev/null
+++ b/perf/fixtures/run_host.py
@@ -0,0 +1,58 @@
+import argparse
+import json
+import resource
+import sys
+import time
+
+from scenarios import SCENARIOS
+
+
+def timing_stats(samples):
+    if not samples:
+        return {"avg_ms": 0.0, "min_ms": 0.0, "max_ms": 0.0}
+    avg = sum(samples) / len(samples)
+    return {
+        "avg_ms": avg * 1000.0,
+        "min_ms": min(samples) * 1000.0,
+        "max_ms": max(samples) * 1000.0,
+    }
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--scenario", required=True)
+    parser.add_argument("--iterations", type=int, default=10)
+    args = parser.parse_args()
+
+    scenario = args.scenario.lower()
+    try:
+        handler = SCENARIOS[scenario]
+    except KeyError as exc:
+        raise SystemExit(f"unknown scenario: {scenario}") from exc
+
+    samples = []
+    for _ in range(args.iterations):
+        start = time.perf_counter()
+        result = handler()
+        _ = result  # ensure work executes; result ignored
+        samples.append(time.perf_counter() - start)
+
+    usage = resource.getrusage(resource.RUSAGE_SELF)
+    rss = usage.ru_maxrss
+    if sys.platform == "darwin":
+        rss_kib = rss // 1024
+    else:
+        rss_kib = rss
+
+    payload = {
+        "scenario": scenario,
+        "iterations": args.iterations,
+        "total": timing_stats(samples),
+        "rss_kib": int(rss_kib),
+        "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
+    }
+    json.dump(payload, fp=sys.stdout)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/perf/fixtures/scenarios/__init__.py b/perf/fixtures/scenarios/__init__.py
new file mode 100644
index 0000000..e9e1f91
--- /dev/null
+++ b/perf/fixtures/scenarios/__init__.py
@@ -0,0 +1,9 @@
+from . import echo
+from . import numpy_case
+from . import pandas_case
+
+SCENARIOS = {
+    "echo": echo.main,
+    "numpy": numpy_case.main,
+    "pandas": pandas_case.main,
+}
diff --git a/perf/fixtures/scenarios/echo.py b/perf/fixtures/scenarios/echo.py
new file mode 100644
index 0000000..948d14e
--- /dev/null
+++ b/perf/fixtures/scenarios/echo.py
@@ -0,0 +1,2 @@
+def main():
+    return "x" * 1000
diff --git a/perf/fixtures/scenarios/numpy_case.py b/perf/fixtures/scenarios/numpy_case.py
new file mode 100644
index 0000000..c8f5596
--- /dev/null
+++ b/perf/fixtures/scenarios/numpy_case.py
@@ -0,0 +1,10 @@
+import numpy as np
+
+
+def main():
+    rng = np.random.default_rng(42)
+    a = rng.random((200, 200), dtype=np.float64)
+    b = rng.random((200, 200), dtype=np.float64)
+    c = a @ b
+    # Return a representative scalar to keep payload small
+    return float(c[0, 0])
diff --git a/perf/fixtures/scenarios/pandas_case.py b/perf/fixtures/scenarios/pandas_case.py
new file mode 100644
index 0000000..389dcc4
--- /dev/null
+++ b/perf/fixtures/scenarios/pandas_case.py
@@ -0,0 +1,13 @@
+import numpy as np
+import pandas as pd
+
+
+def main():
+    rng = np.random.default_rng(123)
+    rows = 50_000
+    categories = rng.integers(0, 20, size=rows)
+    values = rng.normal(loc=0.0, scale=1.0, size=rows)
+    df = pd.DataFrame({"category": categories, "value": values})
+    summary = df.groupby("category").agg(value_mean=("value", "mean"))
+    # Convert to a plain mapping to keep payload JSON-friendly
+    return {int(idx): float(val) for idx, val in summary["value_mean"].items()}
diff --git a/perf/runner/Cargo.toml b/perf/runner/Cargo.toml
new file mode 100644
index 0000000..3120798
--- /dev/null
+++ b/perf/runner/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "aardvark-perf"
+edition = "2021"
+version = "0.1.0"
+license = "Apache-2.0"
+authors = ["Aardvark Labs <dev@aardvark.example>"]
+description = "Performance harness for Aardvark runtime"
+
+[dependencies]
+aardvark-core = { path = "../../crates/aardvark-core" }
+anyhow = "1.0"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+structopt = { version = "0.3", features = ["color"] }
+
+[dependencies.zip]
+workspace = true
+
+[target.'cfg(unix)'.dependencies]
+libc = "0.2"
diff --git a/perf/runner/src/main.rs b/perf/runner/src/main.rs
new file mode 100644
index 0000000..2fa5649
--- /dev/null
+++ b/perf/runner/src/main.rs
@@ -0,0 +1,410 @@
+use std::fs::File;
+use std::io::Write;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use std::time::Duration;
+
+use aardvark_core::{Bundle, PyRuntime, PyRuntimeConfig};
+use anyhow::{anyhow, Context, Result};
+use serde::Serialize;
+use structopt::StructOpt;
+
+#[derive(StructOpt, Debug)]
+#[structopt(
+    name = "aardvark-perf",
+    about = "Performance harness for Aardvark runtime"
+)]
+enum Cli {
+    /// Run benchmarks for all scenarios (Aardvark + host Python) and emit reports
+    All {
+        #[structopt(long, default_value = "10")]
+        iterations: usize,
+        #[structopt(long)]
+        json: Option<PathBuf>,
+        #[structopt(long)]
+        csv: Option<PathBuf>,
+    },
+    /// Run a single scenario/mode combination
+    Scenario {
+        #[structopt(long, possible_values = Scenario::VARIANTS, case_insensitive = true)]
+        scenario: Scenario,
+        #[structopt(long, possible_values = Mode::VARIANTS, case_insensitive = true)]
+        mode: Mode,
+        #[structopt(long, default_value = "10")]
+        iterations: usize,
+    },
+}
+
+#[derive(Copy, Clone, Debug, Serialize)]
+enum Scenario {
+    Echo,
+    Numpy,
+    Pandas,
+}
+
+#[derive(Copy, Clone, Debug, Serialize)]
+enum Mode {
+    Aardvark,
+    HostPython,
+}
+
+#[derive(Serialize)]
+struct BenchResult {
+    scenario: Scenario,
+    mode: Mode,
+    iterations: usize,
+    total: TimingStats,
+    prepare: Option<TimingStats>,
+    run: Option<TimingStats>,
+    rss_kib: Option<u64>,
+}
+
+#[derive(Serialize, serde::Deserialize, Default, Clone)]
+struct TimingStats {
+    avg_ms: f64,
+    min_ms: f64,
+    max_ms: f64,
+}
+
+fn main() -> Result<()> {
+    let cli = Cli::from_args();
+    match cli {
+        Cli::All {
+            iterations,
+            json,
+            csv,
+        } => {
+            let mut results = Vec::new();
+            for scenario in [Scenario::Echo, Scenario::Numpy, Scenario::Pandas] {
+                results.push(bench_aardvark(scenario, iterations)?);
+                results.push(bench_host(scenario, iterations)?);
+            }
+            if let Some(path) = json {
+                write_json(&path, &results)?;
+            }
+            if let Some(path) = csv {
+                write_csv(&path, &results)?;
+            }
+            print_summary(&results);
+        }
+        Cli::Scenario {
+            scenario,
+            mode,
+            iterations,
+        } => {
+            let result = match mode {
+                Mode::Aardvark => bench_aardvark(scenario, iterations)?,
+                Mode::HostPython => bench_host(scenario, iterations)?,
+            };
+            println!("{}", serde_json::to_string_pretty(&result)?);
+        }
+    }
+    Ok(())
+}
+
+fn bench_aardvark(scenario: Scenario, iterations: usize) -> Result<BenchResult> {
+    let python_source = scenario_source(scenario);
+    let manifest = scenario_manifest(scenario);
+    let bundle = build_bundle(python_source, manifest.as_bytes())?;
+
+    let snapshot_path = bundle_snapshot_path(scenario)?;
+    let mut config = PyRuntimeConfig::default();
+    if snapshot_path.exists() {
+        config.snapshot.load_from = Some(snapshot_path.clone());
+    }
+    config.snapshot.save_to = Some(snapshot_path);
+
+    let mut runtime = PyRuntime::new(config)?;
+
+    // Warm-up run to install packages and capture warm state.
+    let (session, _) = runtime.prepare_session_with_manifest(bundle.clone())?;
+    runtime.run_session(&session)?;
+    runtime.capture_warm_state()?;
+    runtime.reset_in_place()?;
+
+    let mut prepare = Vec::with_capacity(iterations);
+    let mut run = Vec::with_capacity(iterations);
+    let mut total = Vec::with_capacity(iterations);
+
+    for _ in 0..iterations {
+        runtime.reset_in_place()?;
+        let prep_start = std::time::Instant::now();
+        let (session, _) = runtime.prepare_session_with_manifest(bundle.clone())?;
+        let prep_elapsed = prep_start.elapsed();
+
+        let run_start = std::time::Instant::now();
+        let outcome = runtime.run_session(&session)?;
+        assert!(outcome.is_success(), "handler failed: {:?}", outcome.status);
+        let run_elapsed = run_start.elapsed();
+
+        prepare.push(prep_elapsed);
+        run.push(run_elapsed);
+        total.push(prep_elapsed + run_elapsed);
+    }
+
+    Ok(BenchResult {
+        scenario,
+        mode: Mode::Aardvark,
+        iterations,
+        total: timing_stats(&total),
+        prepare: Some(timing_stats(&prepare)),
+        run: Some(timing_stats(&run)),
+        rss_kib: max_rss_kib(),
+    })
+}
+
+fn bench_host(scenario: Scenario, iterations: usize) -> Result<BenchResult> {
+    let script = Path::new("perf/fixtures/run_host.py");
+    let mut cmd = Command::new("uv");
+    cmd.arg("run");
+    cmd.arg(format!("--python={}", host_python_version()));
+    for pkg in scenario_packages(scenario) {
+        cmd.arg(format!("--with={pkg}"));
+    }
+    cmd.arg("python");
+    cmd.arg(script);
+    cmd.arg("--scenario");
+    cmd.arg(scenario.name());
+    cmd.arg("--iterations");
+    cmd.arg(iterations.to_string());
+
+    let output = cmd
+        .output()
+        .with_context(|| "failed to run host python benchmark")?;
+    if !output.status.success() {
+        return Err(anyhow!(
+            "host benchmark failed: {}",
+            String::from_utf8_lossy(&output.stderr)
+        ));
+    }
+    let result: HostResult = serde_json::from_slice(&output.stdout)
+        .with_context(|| "failed to parse host benchmark output")?;
+    Ok(BenchResult {
+        scenario,
+        mode: Mode::HostPython,
+        iterations,
+        total: result.total,
+        prepare: None,
+        run: None,
+        rss_kib: Some(result.rss_kib),
+    })
+}
+
+fn scenario_source(scenario: Scenario) -> &'static str {
+    match scenario {
+        Scenario::Echo => include_str!("../../fixtures/scenarios/echo.py"),
+        Scenario::Numpy => include_str!("../../fixtures/scenarios/numpy_case.py"),
+        Scenario::Pandas => include_str!("../../fixtures/scenarios/pandas_case.py"),
+    }
+}
+
+fn scenario_manifest(scenario: Scenario) -> String {
+    let packages = scenario_packages(scenario);
+    serde_json::json!({
+        "schemaVersion": "1.0",
+        "entrypoint": "main:main",
+        "packages": packages,
+    })
+    .to_string()
+}
+
+fn scenario_packages(scenario: Scenario) -> &'static [&'static str] {
+    match scenario {
+        Scenario::Echo => &[],
+        Scenario::Numpy => &["numpy"],
+        Scenario::Pandas => &["numpy", "pandas"],
+    }
+}
+
+fn build_bundle(source: &str, manifest: &[u8]) -> Result<Bundle> {
+    use zip::write::FileOptions;
+    use zip::CompressionMethod;
+
+    let mut buffer = Vec::new();
+    {
+        let mut writer = zip::ZipWriter::new(std::io::Cursor::new(&mut buffer));
+        let options = FileOptions::default().compression_method(CompressionMethod::Deflated);
+        writer.start_file("main.py", options)?;
+        writer.write_all(source.as_bytes())?;
+        writer.start_file("aardvark.manifest.json", options)?;
+        writer.write_all(manifest)?;
+        writer.finish()?;
+    }
+    Ok(Bundle::from_zip_bytes(buffer)?)
+}
+
+fn timing_stats(samples: &[Duration]) -> TimingStats {
+    if samples.is_empty() {
+        return TimingStats::default();
+    }
+    let min = samples
+        .iter()
+        .map(|d| d.as_secs_f64())
+        .fold(f64::INFINITY, f64::min);
+    let max = samples
+        .iter()
+        .map(|d| d.as_secs_f64())
+        .fold(f64::NEG_INFINITY, f64::max);
+    let sum: f64 = samples.iter().map(|d| d.as_secs_f64()).sum();
+    let avg = sum / samples.len() as f64;
+    TimingStats {
+        avg_ms: avg * 1000.0,
+        min_ms: min * 1000.0,
+        max_ms: max * 1000.0,
+    }
+}
+
+fn write_json(path: &Path, results: &[BenchResult]) -> Result<()> {
+    let mut file =
+        File::create(path).with_context(|| format!("failed to write {}", path.display()))?;
+    file.write_all(serde_json::to_string_pretty(results)?.as_bytes())?;
+    Ok(())
+}
+
+fn write_csv(path: &Path, results: &[BenchResult]) -> Result<()> {
+    let mut file =
+        File::create(path).with_context(|| format!("failed to write {}", path.display()))?;
+    writeln!(
+        file,
+        "scenario,mode,iterations,avg_ms,min_ms,max_ms,rss_kib,prepare_avg_ms,run_avg_ms"
+    )?;
+    for result in results {
+        let prepare_avg = result
+            .prepare
+            .as_ref()
+            .map(|s| format!("{:.2}", s.avg_ms))
+            .unwrap_or_default();
+        let run_avg = result
+            .run
+            .as_ref()
+            .map(|s| format!("{:.2}", s.avg_ms))
+            .unwrap_or_default();
+        writeln!(
+            file,
+            "{},{},{},{:.2},{:.2},{:.2},{},{},{}",
+            result.scenario.name(),
+            result.mode.name(),
+            result.iterations,
+            result.total.avg_ms,
+            result.total.min_ms,
+            result.total.max_ms,
+            result.rss_kib.unwrap_or_default(),
+            prepare_avg,
+            run_avg,
+        )?;
+    }
+    Ok(())
+}
+
+fn print_summary(results: &[BenchResult]) {
+    println!("| Scenario | Mode | Avg ms | Min ms | Max ms | RSS (KiB) |");
+    println!("|----------|------|--------|--------|--------|-----------|");
+    for r in results {
+        println!(
+            "| {} | {} | {:.2} | {:.2} | {:.2} | {} |",
+            r.scenario.name(),
+            r.mode.name(),
+            r.total.avg_ms,
+            r.total.min_ms,
+            r.total.max_ms,
+            r.rss_kib.unwrap_or_default()
+        );
+    }
+}
+
+fn bundle_snapshot_path(scenario: Scenario) -> Result<PathBuf> {
+    let dir = PathBuf::from("target/perf_snapshots");
+    std::fs::create_dir_all(&dir)?;
+    let mut path = dir;
+    path.push(format!("{}.bin", scenario.name()));
+    Ok(path)
+}
+
+fn host_python_version() -> &'static str {
+    "3.12"
+}
+
+#[derive(Serialize, serde::Deserialize)]
+struct HostResult {
+    total: TimingStats,
+    rss_kib: u64,
+}
+
+trait ScenarioExt {
+    fn name(&self) -> &'static str;
+}
+
+impl ScenarioExt for Scenario {
+    fn name(&self) -> &'static str {
+        match self {
+            Scenario::Echo => "echo",
+            Scenario::Numpy => "numpy",
+            Scenario::Pandas => "pandas",
+        }
+    }
+}
+
+impl Scenario {
+    const VARIANTS: &'static [&'static str] = &["echo", "numpy", "pandas"];
+}
+
+impl std::str::FromStr for Scenario {
+    type Err = String;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        match s.to_ascii_lowercase().as_str() {
+            "echo" => Ok(Scenario::Echo),
+            "numpy" => Ok(Scenario::Numpy),
+            "pandas" => Ok(Scenario::Pandas),
+            other => Err(format!("unknown scenario '{other}'")),
+        }
+    }
+}
+
+impl Mode {
+    fn name(&self) -> &'static str {
+        match self {
+            Mode::Aardvark => "aardvark",
+            Mode::HostPython => "host-python",
+        }
+    }
+}
+
+impl Mode {
+    const VARIANTS: &'static [&'static str] = &["aardvark", "host-python"];
+}
+
+impl std::str::FromStr for Mode {
+    type Err = String;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        match s.to_ascii_lowercase().as_str() {
+            "aardvark" => Ok(Mode::Aardvark),
+            "host-python" | "host" | "python" => Ok(Mode::HostPython),
+            other => Err(format!("unknown mode '{other}'")),
+        }
+    }
+}
+
+#[cfg(unix)]
+fn max_rss_kib() -> Option<u64> {
+    unsafe {
+        let mut usage: libc::rusage = std::mem::zeroed();
+        if libc::getrusage(libc::RUSAGE_SELF, &mut usage) != 0 {
+            return None;
+        }
+        #[cfg(target_os = "macos")]
+        {
+            Some((usage.ru_maxrss as u64) / 1024)
+        }
+        #[cfg(not(target_os = "macos"))]
+        {
+            Some(usage.ru_maxrss as u64)
+        }
+    }
+}
+
+#[cfg(not(unix))]
+fn max_rss_kib() -> Option<u64> {
+    None
+}
diff --git a/perf/scripts/render_markdown.py b/perf/scripts/render_markdown.py
new file mode 100755
index 0000000..b81b6db
--- /dev/null
+++ b/perf/scripts/render_markdown.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+"""Render benchmark JSON output as Markdown tables."""
+import argparse
+import json
+from collections import defaultdict
+
+
+def load(path):
+    with open(path, "r", encoding="utf-8") as fh:
+        return json.load(fh)
+
+
+def render(results):
+    grouped = defaultdict(list)
+    for entry in results:
+        grouped[entry["scenario"]].append(entry)
+
+    lines = []
+    for scenario in sorted(grouped):
+        lines.append(f"### {scenario.capitalize()}")
+        lines.append("| Mode | Avg ms | Min ms | Max ms | RSS (KiB) |")
+        lines.append("|------|--------|--------|--------|-----------|")
+        for item in sorted(grouped[scenario], key=lambda e: e["mode"]):
+            lines.append(
+                "| {mode} | {avg:.2f} | {min:.2f} | {max:.2f} | {rss} |".format(
+                    mode=item["mode"],
+                    avg=item["total"]["avg_ms"],
+                    min=item["total"]["min_ms"],
+                    max=item["total"]["max_ms"],
+                    rss=item.get("rss_kib", ""),
+                )
+            )
+        lines.append("")
+    return "\n".join(lines)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("results_json", help="Path to JSON produced by aardvark-perf")
+    args = parser.parse_args()
+
+    results = load(args.results_json)
+    print(render(results))
+
+
+if __name__ == "__main__":
+    main()

From fcfcc81d4ac3b01f5d4593585110e47b92728494 Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Sun, 19 Oct 2025 22:36:55 +0800
Subject: [PATCH 11/13] Require uv in perf host runner with friendly error

---
 Cargo.lock              | 15 ++++++++++++++-
 perf/runner/Cargo.toml  |  1 +
 perf/runner/src/main.rs |  7 +++++--
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 79ceea4..59ffa86 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -58,6 +58,7 @@ dependencies = [
  "serde",
  "serde_json",
  "structopt",
+ "which 4.4.2",
  "zip",
 ]
 
@@ -2404,7 +2405,7 @@ dependencies = [
  "miniz_oxide",
  "paste",
  "ry_temporal_capi",
- "which",
+ "which 6.0.3",
 ]
 
 [[package]]
@@ -2577,6 +2578,18 @@ dependencies = [
  "rustls-pki-types",
 ]
 
+[[package]]
+name = "which"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+dependencies = [
+ "either",
+ "home",
+ "once_cell",
+ "rustix 0.38.44",
+]
+
 [[package]]
 name = "which"
 version = "6.0.3"
diff --git a/perf/runner/Cargo.toml b/perf/runner/Cargo.toml
index 3120798..8ee3092 100644
--- a/perf/runner/Cargo.toml
+++ b/perf/runner/Cargo.toml
@@ -12,6 +12,7 @@ anyhow = "1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 structopt = { version = "0.3", features = ["color"] }
+which = "4.4"
 
 [dependencies.zip]
 workspace = true
diff --git a/perf/runner/src/main.rs b/perf/runner/src/main.rs
index 2fa5649..8b49239 100644
--- a/perf/runner/src/main.rs
+++ b/perf/runner/src/main.rs
@@ -8,6 +8,7 @@ use aardvark_core::{Bundle, PyRuntime, PyRuntimeConfig};
 use anyhow::{anyhow, Context, Result};
 use serde::Serialize;
 use structopt::StructOpt;
+use which::which;
 
 #[derive(StructOpt, Debug)]
 #[structopt(
@@ -154,8 +155,10 @@ fn bench_aardvark(scenario: Scenario, iterations: usize) -> Result<BenchResult>
 }
 
 fn bench_host(scenario: Scenario, iterations: usize) -> Result<BenchResult> {
-    let script = Path::new("perf/fixtures/run_host.py");
-    let mut cmd = Command::new("uv");
+    let script = Path::new(env!("CARGO_MANIFEST_DIR")).join("../fixtures/run_host.py");
+    let uv = which("uv")
+        .context("`uv` command not found on PATH. Install from https://docs.astral.sh/uv/ or ensure it is available before running the perf suite.")?;
+    let mut cmd = Command::new(uv);
     cmd.arg("run");
     cmd.arg(format!("--python={}", host_python_version()));
     for pkg in scenario_packages(scenario) {

From f257bf08ad30d14d64d7e248c1637c0c4017b245 Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Sun, 19 Oct 2025 22:47:12 +0800
Subject: [PATCH 12/13] Add Makefile helpers for perf suite

---
 Makefile              | 28 ++++++++++++++++++++++++++++
 README.md             |  1 +
 docs/perf/overview.md | 12 ++++++++----
 3 files changed, 37 insertions(+), 4 deletions(-)
 create mode 100644 Makefile

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..cd33a96
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,28 @@
+.PHONY: pyodide-full perf-all perf-md setup-python
+
+PYODIDE_VERSION ?= 0.28.2
+PYODIDE_VARIANT ?= full
+PYODIDE_DIR ?= $(PWD)/.aardvark/pyodide/$(PYODIDE_VERSION)/$(PYODIDE_VARIANT)
+ITERATIONS ?= 25
+PERF_JSON ?= target/perf/results.json
+PERF_CSV ?= target/perf/results.csv
+PERF_MD ?= target/perf/results.md
+
+setup-python:
+	@echo "Installing Python $(PYODIDE_VERSION) toolchain via mise" 
+	@mise install python@$(PYODIDE_VERSION)
+
+pyodide-full:
+	@echo "Fetching Pyodide $(PYODIDE_VERSION) ($(PYODIDE_VARIANT)) into $(PYODIDE_DIR)"
+	@cargo run -p aardvark-cli --bin cargo-aardvark -- fetch-pyodide \
+		--version $(PYODIDE_VERSION) --variant $(PYODIDE_VARIANT)
+
+perf-all:
+	@mkdir -p $(dir $(PERF_JSON))
+	AARDVARK_PYODIDE_PACKAGE_DIR=$(PYODIDE_DIR) cargo run -p aardvark-perf -- \
+		all --iterations $(ITERATIONS) --json $(PERF_JSON) --csv $(PERF_CSV)
+
+perf-md: perf-all
+	@mkdir -p $(dir $(PERF_MD))
+	python perf/scripts/render_markdown.py $(PERF_JSON) > $(PERF_MD)
+	@echo "Markdown written to $(PERF_MD)"
diff --git a/README.md b/README.md
index 76bf997..0be8098 100644
--- a/README.md
+++ b/README.md
@@ -146,6 +146,7 @@ Arguments are `[iterations] [payload_len]` (both optional). The harness warms th
 - API reference under `docs/api/` covers the manifest schema, host integration, handler contracts, and diagnostics handling with examples.
 - Developer onboarding material is available in `docs/dev/` for contributors extending the project.
 - Performance notes and benchmark workflow live in `docs/perf/overview.md`.
+- The included `Makefile` has helpers (`make pyodide-full`, `make perf-all`) to fetch Pyodide assets and run the perf suite.
 
 ## Publishing Notes
 
diff --git a/docs/perf/overview.md b/docs/perf/overview.md
index 76180ab..42af7c7 100644
--- a/docs/perf/overview.md
+++ b/docs/perf/overview.md
@@ -31,9 +31,7 @@ mise exec python@3.12 -- python --version
 From the repository root:
 
 ```sh
-cargo run -p aardvark-perf -- all --iterations 25 \
-  --json target/perf/results.json \
-  --csv target/perf/results.csv
+make perf-all ITERATIONS=25
 ```
 
 Sample console output:
@@ -46,7 +44,7 @@ Sample console output:
 ...
 ```
 
-The JSON/CSV files contain the same data for further analysis.
+The JSON/CSV files contain the same data for further analysis and live under `target/perf/`.
 
 ### Single Scenario
 
@@ -79,6 +77,12 @@ A helper script converts the JSON output into a Markdown table for reports:
 python perf/scripts/render_markdown.py target/perf/results.json > target/perf/results.md
 ```
 
+Or, if you prefer the Makefile wrapper:
+
+```sh
+make perf-md
+```
+
 The script reads the JSON emitted by `aardvark-perf` and prints a table grouped
 by scenario.
 

From f4498810aad4264e4d1ad4e0bdb098403b1ff34a Mon Sep 17 00:00:00 2001
From: Marcin Operacz <marcin.operacz@gmail.com>
Date: Sun, 19 Oct 2025 22:50:53 +0800
Subject: [PATCH 13/13] Default perf harness to curated pyodide cache

---
 .gitignore            |  1 +
 Makefile              | 15 ++++++++-------
 README.md             |  2 +-
 docs/perf/overview.md |  4 ++++
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index b17f7a7..f9dcecb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ example/*rd/
 
 runner-feature-requests/
 internal_docs/
+.aardvark/
diff --git a/Makefile b/Makefile
index cd33a96..a80a9cb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,19 +1,20 @@
-.PHONY: pyodide-full perf-all perf-md setup-python
+.PHONY: pyodide-fetch perf-all perf-md setup-python
 
-PYODIDE_VERSION ?= 0.28.2
-PYODIDE_VARIANT ?= full
-PYODIDE_DIR ?= $(PWD)/.aardvark/pyodide/$(PYODIDE_VERSION)/$(PYODIDE_VARIANT)
+# Default to the curated Pyodide cache shipped with the repo (cp313 build).
+PYODIDE_DIR ?= $(PWD)/tmp/pyodide
 ITERATIONS ?= 25
 PERF_JSON ?= target/perf/results.json
 PERF_CSV ?= target/perf/results.csv
 PERF_MD ?= target/perf/results.md
+PYODIDE_VERSION ?= 0.28.2
+PYODIDE_VARIANT ?= full
 
 setup-python:
-	@echo "Installing Python $(PYODIDE_VERSION) toolchain via mise" 
+	@echo "Installing Python $(PYODIDE_VERSION) toolchain via mise"
 	@mise install python@$(PYODIDE_VERSION)
 
-pyodide-full:
-	@echo "Fetching Pyodide $(PYODIDE_VERSION) ($(PYODIDE_VARIANT)) into $(PYODIDE_DIR)"
+pyodide-fetch:
+	@echo "Fetching upstream Pyodide $(PYODIDE_VERSION) ($(PYODIDE_VARIANT)) into .aardvark/pyodide"
 	@cargo run -p aardvark-cli --bin cargo-aardvark -- fetch-pyodide \
 		--version $(PYODIDE_VERSION) --variant $(PYODIDE_VARIANT)
 
diff --git a/README.md b/README.md
index 0be8098..9c63ecd 100644
--- a/README.md
+++ b/README.md
@@ -146,7 +146,7 @@ Arguments are `[iterations] [payload_len]` (both optional). The harness warms th
 - API reference under `docs/api/` covers the manifest schema, host integration, handler contracts, and diagnostics handling with examples.
 - Developer onboarding material is available in `docs/dev/` for contributors extending the project.
 - Performance notes and benchmark workflow live in `docs/perf/overview.md`.
-- The included `Makefile` has helpers (`make pyodide-full`, `make perf-all`) to fetch Pyodide assets and run the perf suite.
+- The included `Makefile` has helpers (`make perf-all`, `make perf-md`). By default it uses the curated Pyodide cache under `tmp/pyodide`; use `make pyodide-fetch` if you need the upstream release.
 
 ## Publishing Notes
 
diff --git a/docs/perf/overview.md b/docs/perf/overview.md
index 42af7c7..0d0f10c 100644
--- a/docs/perf/overview.md
+++ b/docs/perf/overview.md
@@ -28,6 +28,8 @@ mise exec python@3.12 -- python --version
 
 ## Running the Benchmarks
 
+By default the harness expects the curated Pyodide cache checked into `tmp/pyodide` (built against CPython 3.13 to match the Aardvark snapshot). If you maintain your own cache, point `AARDVARK_PYODIDE_PACKAGE_DIR` at it before running the benchmarks.
+
 From the repository root:
 
 ```sh
@@ -46,6 +48,8 @@ Sample console output:
 
 The JSON/CSV files contain the same data for further analysis and live under `target/perf/`.
 
+> **Note:** `make pyodide-fetch` pulls the upstream Pyodide release (CPython 3.11). It does **not** match our curated snapshot and is provided only if you need the stock build for other experiments.
+
 ### Single Scenario
 
 To benchmark one combination: