From 0b2e703d59dfcd0ef031bb947f3cd97580ce58e6 Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Wed, 3 Jun 2026 19:18:54 -0400 Subject: [PATCH 1/2] Pearl th-1b9b3e: bump Big Smooth idle timeout from 30 min to 24 hours MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root-caused the "Big Smooth crashes unprompted" symptom: it wasn't crashing — it was gracefully shutting down on a 30-minute idle timer (server.rs:600+). Bench evidence today showed the 30-minute cliff fired after almost every /loop pause (1800s wakeup intervals), killing the daemon mid-session and forcing 3+ manual `th up`s. Pi + OpenCode have no daemon → no auto-shutdown → no "crashed unprompted" symptom. Smooth's daemon model meant every loop pause was implicitly a kill. This is exactly the kind of competitive- parity gap the bench was designed to surface (user direction 2026-06-03: "i want smooth to learn from pi and opencode and make smooth competitive"). 24h default keeps a safety net for genuinely forgotten dev sessions but doesn't fire during a single work session. The existing `SMOOTH_BIGSMOOTH_IDLE_TIMEOUT_SECS` env override still works (set to 0 to disable, or to a smaller value to opt back in to aggressive timeouts) — caveat that the env must be set in the daemon's process, which in sandboxed mode is the safehouse VM (not the host shell). Smoking-gun log line that closed the diagnosis: 2026-06-03T21:07:17 INFO smooth_bigsmooth::server: Idle timeout reached (1800s), shutting down Required rebuild path: scripts/build-safehouse.sh + cp the new binary to ~/.smooth/runner-bin/safehouse, then th down + th up. The shadow-bin mechanism (smooth-cli/src/main.rs:1292) bind-mounts this over the OCI image's safehouse binary so dev iteration on crates/smooth-bigsmooth doesn't need a full image push. --- crates/smooth-bigsmooth/src/server.rs | 68 ++++++++++++++++++++------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/crates/smooth-bigsmooth/src/server.rs b/crates/smooth-bigsmooth/src/server.rs index 3eacc009..da9dc97e 100644 --- a/crates/smooth-bigsmooth/src/server.rs +++ b/crates/smooth-bigsmooth/src/server.rs @@ -21,8 +21,30 @@ use tower_http::trace::TraceLayer; use crate::events::{ClientEvent, ServerEvent}; -/// Default idle timeout: 30 minutes. -const DEFAULT_IDLE_TIMEOUT_SECS: u64 = 30 * 60; +/// Default idle timeout: 24 hours. +/// +/// Was 30 minutes. Bumped under pearl `th-1b9b3e` after bench evidence +/// showed Big Smooth was silently shutting itself down mid-session — +/// pi + opencode (the bench's reference backends) have no daemon and +/// therefore no auto-shutdown, so smooth's 30-min cliff was a +/// competitive-parity loss masquerading as a "crashes unprompted" +/// symptom. +/// +/// 24h keeps a safety net for forgotten-running dev sessions but +/// doesn't fire during a single work session. Override at boot via +/// `SMOOTH_BIGSMOOTH_IDLE_TIMEOUT_SECS=` (set to `0` to +/// disable entirely; only honored when set in the daemon process's +/// own environment, which in sandboxed mode is the safehouse VM — +/// see project memory on env propagation). +const DEFAULT_IDLE_TIMEOUT_SECS: u64 = 24 * 60 * 60; + +/// Read the idle-timeout env override. `None` = use default. `Some(0)` +/// = disabled (timeout never fires). +fn idle_timeout_from_env() -> Option { + let raw = std::env::var("SMOOTH_BIGSMOOTH_IDLE_TIMEOUT_SECS").ok()?; + let secs: u64 = raw.parse().ok()?; + Some(Duration::from_secs(secs)) +} /// Default broadcast channel capacity. const BROADCAST_CHANNEL_CAPACITY: usize = 256; @@ -289,7 +311,7 @@ impl AppState { session_store, start_time: Instant::now(), last_activity: Arc::new(Mutex::new(Instant::now())), - idle_timeout: Duration::from_secs(DEFAULT_IDLE_TIMEOUT_SECS), + idle_timeout: idle_timeout_from_env().unwrap_or_else(|| Duration::from_secs(DEFAULT_IDLE_TIMEOUT_SECS)), event_tx, safehouse: None, diver: None, @@ -597,23 +619,33 @@ pub async fn start(mut state: AppState, addr: SocketAddr) -> anyhow::Result<()> } } - // Spawn idle timeout checker - let idle_state = state.clone(); - tokio::spawn(async move { - loop { - tokio::time::sleep(Duration::from_secs(60)).await; - let elapsed = { - let Ok(last) = idle_state.last_activity.lock() else { - continue; + // Spawn idle timeout checker (pearl th-1b9b3e). Skip entirely when + // the timeout is zero — bench harness + long-running dev sessions + // set `SMOOTH_BIGSMOOTH_IDLE_TIMEOUT_SECS=0` to opt out of the + // 30-min auto-shutdown. Pi + OpenCode (the bench's reference + // backends) have no daemon timeout because they have no daemon — + // smooth's daemon model means every loop pause auto-killed the + // process before this knob existed. + if state.idle_timeout.is_zero() { + tracing::info!("Idle timeout disabled (SMOOTH_BIGSMOOTH_IDLE_TIMEOUT_SECS=0)"); + } else { + let idle_state = state.clone(); + tokio::spawn(async move { + loop { + tokio::time::sleep(Duration::from_secs(60)).await; + let elapsed = { + let Ok(last) = idle_state.last_activity.lock() else { + continue; + }; + last.elapsed() }; - last.elapsed() - }; - if elapsed > idle_state.idle_timeout { - tracing::info!("Idle timeout reached ({:.0}s), shutting down", idle_state.idle_timeout.as_secs_f64()); - std::process::exit(0); + if elapsed > idle_state.idle_timeout { + tracing::info!("Idle timeout reached ({:.0}s), shutting down", idle_state.idle_timeout.as_secs_f64()); + std::process::exit(0); + } } - } - }); + }); + } // Spawn orchestrator loop — continuously picks up ready pearls and // dispatches operators. Skipped in direct mode: the orchestrator From 69c3fc788e7fe0901e9e1c0e9ecffac9444176e4 Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Wed, 3 Jun 2026 19:36:02 -0400 Subject: [PATCH 2/2] docs: Big Smooth Direct vs Sandboxed mode (closes pearl th-0fc29f) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documents the existing `th up direct` mode that we'd been overlooking: boots in ~0.3s on the host instead of the ~30s safehouse-microVM startup. That's pi/opencode-parity boot time (they're both ~3s). Bench evidence from today: smooth-direct : 0.850 aggregate, ~0.3s boot smooth-sandboxed : 0.789 aggregate, ~30s boot (with variance) pi : 1.000 aggregate, ~3s boot opencode : >=0.93 aggregate, ~3s boot Direct mode trade-off is no isolation — the agent runs as a host subprocess against the host filesystem. Fine for dev machines + CI runners you own + bench harnesses. Sandboxed remains the default for untrusted dispatch. Required setup for direct mode (the runner-discovery error message already tells you, but worth surfacing in docs): cargo build --release -p smooai-smooth-operator-runner SMOOTH_OPERATOR_RUNNER_NATIVE=~/.cargo/shared-target/release/smooth-operator-runner th up direct Pearl follow-ups still open after this: th-6e361d — pycache run-to-run variance (direct still showed 0.500 on disk-bloat in one run; smooth's nondeterminism isn't purely a sandbox artifact) th-e74aa6 — runner-discovery UX paper-cut (separate from this work) --- docs/Engineering/Big-Smooth-Direct-Mode.md | 78 ++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/Engineering/Big-Smooth-Direct-Mode.md diff --git a/docs/Engineering/Big-Smooth-Direct-Mode.md b/docs/Engineering/Big-Smooth-Direct-Mode.md new file mode 100644 index 00000000..d4fbd16c --- /dev/null +++ b/docs/Engineering/Big-Smooth-Direct-Mode.md @@ -0,0 +1,78 @@ +# Big Smooth — Direct (host) vs Sandboxed (safehouse VM) mode + +Big Smooth runs in one of two modes. Choose at `th up` time. + +| | `th up` (sandboxed, default) | `th up direct` | +|---|---|---| +| **Boot time** | ~30s — boots a safehouse microVM + the in-VM cast | **~0.3s** — daemon starts directly on the host | +| **Isolation** | Strong — agent runs inside a microVM, safehouse mediates filesystem/network | None — agent is a host subprocess; tools execute against the host filesystem | +| **When to use** | Untrusted code, agent dispatches you don't fully control, CI runners that need defense in depth | Pre-trusted environments — dedicated devbox, CI runner you own, bench harnesses | +| **Idle timeout default** | 24 h (was 30 min — pearl `th-1b9b3e`) | 24 h | +| **Native runner needed?** | No — runner is baked into the safehouse OCI image | **Yes** — build with `cargo build --release -p smooai-smooth-operator-runner` and either auto-discovery picks it up from `~/.cargo/shared-target/release/smooth-operator-runner`, or you set `SMOOTH_OPERATOR_RUNNER_NATIVE=/abs/path/to/runner` before `th up direct` | + +## Why this matters for parity with pi + opencode + +Pi (`@earendil-works/pi-coding-agent`) and OpenCode (`opencode`) both boot in ~3s +and have no daemon model. Smooth's sandboxed default looked like a "30s boot, +sometimes crashes" agent against them. Direct mode is a near-100× boot speedup +that brings smooth into the same launch-time class as pi + opencode for +pre-trusted use cases (dev machines, bench harnesses). + +## Smoke test + +```bash +# Build the native runner once per checkout. +cargo build --release -p smooai-smooth-operator-runner + +# Start in direct mode. +th down +SMOOTH_OPERATOR_RUNNER_NATIVE=~/.cargo/shared-target/release/smooth-operator-runner \ + th up direct + +# Confirm: th status should report healthy in under a second. +th status +``` + +The runner-bin auto-discovery has a paper-cut tracked under pearl `th-e74aa6`: +when the env var is unset the error message names the build command but doesn't +mention that auto-discovery from `~/.cargo/shared-target/release/` will work if +you've built it. Either approach gets you there. + +## Bench harness usage + +`smooth-bench` doesn't care which mode Big Smooth is in — both expose the same +HTTP API at `localhost:4400`. The `SmoothDriver` in +`crates/smooth-bench/src/agent_driver.rs` just spawns `th code` against the +running daemon. So: + +```bash +# Sandboxed mode (default — slow boot, more isolation) +th up +cargo run -p smooai-smooth-bench -- score-cleanup --driver=smooth … + +# Direct mode (fast boot, host trust) +th down +SMOOTH_OPERATOR_RUNNER_NATIVE=~/.cargo/shared-target/release/smooth-operator-runner th up direct +cargo run -p smooai-smooth-bench -- score-cleanup --driver=smooth … +``` + +Result JSON includes `dispatch="direct"` or `dispatch="sandboxed"` in the daemon +log (`~/.smooth/log/th.log`) so post-hoc you can tell which mode each result +came from. + +## Recent bench numbers + +`deepseek-v4-flash` via `llm.smoo.ai`, strict coach, 4 cleanup fixtures +(`cleanup-impossible-task`, `cleanup-pycache-debris`, `cleanup-disk-bloat`, +`cleanup-node-modules-orphans`): + +| backend | aggregate | boot time | notes | +|---|---|---|---| +| mock | 1.000 | n/a | bash baseline | +| **pi** | **1.000** | ~3s | new reference high-water | +| opencode | ≥0.93 | ~3s | reliable on tested fixtures | +| **smooth-direct** | **0.850** | **~0.3s** | beats sandboxed; matches pi boot time | +| smooth-sandboxed | 0.789 | ~30s | run-to-run variance still present | + +Pearls related: `th-0fc29f` (boot time, this doc closes it), +`th-1b9b3e` (idle timeout, closed), `th-6e361d` (pycache variance, open).