diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 9ace5c404..35cf4ef49 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -428,13 +428,15 @@ jobs: # MSB_TEST_ISOLATE_HOME=1 turns on per-test ~/.microsandbox isolation # (see crates/test-utils), so nextest can run tests in parallel without # sqlite/image-cache contention. + # Keep real microVM/network concurrency bounded even on large self-hosted + # runners; num-cpus fan-out can overwhelm guest package installs. - name: Run integration tests env: MSB_TEST_ISOLATE_HOME: "1" run: | export PATH="$HOME/.microsandbox/bin:$PATH" export LD_LIBRARY_PATH="${{ github.workspace }}/build:$HOME/.microsandbox/lib" - cargo nextest run -p microsandbox --tests --run-ignored=only + cargo nextest run -p microsandbox --tests --run-ignored=only --test-threads 4 # --------------------------------------------------------------------------- # Node.js SDK smoke tests (requires KVM) diff --git a/Cargo.lock b/Cargo.lock index 8389392ce..a83a7a15a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -85,7 +85,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -96,7 +96,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1238,7 +1238,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1381,7 +1381,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1501,6 +1501,15 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + [[package]] name = "futures" version = "0.3.32" @@ -2197,6 +2206,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "inotify" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd5b3eaf1a28b758ac0faa5a4254e8ab2705605496f1b1f3fbbc3988ad73d199" +dependencies = [ + "bitflags 2.11.0", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + [[package]] name = "inout" version = "0.1.4" @@ -2341,6 +2370,26 @@ dependencies = [ "zeroize", ] +[[package]] +name = "kqueue" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac30106d7dce88daf4a3fcb4879ea939476d5074a9b7ddd0fb97fa4bed5596a" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07293a4e297ac234359b510362495713f75ea345d5307140414f20c69ffeb087" +dependencies = [ + "bitflags 2.11.0", + "libc", +] + [[package]] name = "kvm-bindings" version = "0.14.0" @@ -2579,6 +2628,7 @@ version = "0.4.6" dependencies = [ "astral-tokio-tar", "async-compression", + "base64 0.22.1", "bytes", "chrono", "ciborium", @@ -2602,6 +2652,7 @@ dependencies = [ "microsandbox-runtime", "microsandbox-utils", "nix 0.30.1", + "notify", "rand 0.10.1", "reqwest", "scopeguard", @@ -2624,10 +2675,13 @@ name = "microsandbox-cli" version = "0.4.6" dependencies = [ "anyhow", + "base64 0.22.1", + "bytes", "chrono", "clap", "console", "dirs", + "futures", "indicatif", "ipnetwork", "libc", @@ -3231,13 +3285,40 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "notify" +version = "8.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3" +dependencies = [ + "bitflags 2.11.0", + "fsevent-sys", + "inotify", + "kqueue", + "libc", + "log", + "mio", + "notify-types", + "walkdir", + "windows-sys 0.60.2", +] + +[[package]] +name = "notify-types" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42b8cfee0e339a0337359f3c88165702ac6e600dc01c0cc9579a92d62b08477a" +dependencies = [ + "bitflags 2.11.0", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4185,7 +4266,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4253,7 +4334,7 @@ dependencies = [ "security-framework 3.7.0", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4768,7 +4849,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5122,7 +5203,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5524,7 +5605,7 @@ checksum = "f2f6fb2847f6742cd76af783a2a2c49e9375d0a111c7bef6f71cd9e738c72d6e" dependencies = [ "memoffset", "tempfile", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5984,7 +6065,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b6960d45f..39bd4773c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -76,6 +76,7 @@ indicatif = "0.18" ipnetwork = { version = "0.21.0", features = ["serde"] } libc = "0.2" nix = "0.30" +notify = "8" oci-client = "0.16" oci-spec = "0.9.0" rand = "0.10.1" diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 929080183..435b60d5a 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -29,10 +29,13 @@ net = [ [dependencies] anyhow.workspace = true +base64.workspace = true +bytes.workspace = true chrono.workspace = true clap.workspace = true console.workspace = true dirs.workspace = true +futures.workspace = true indicatif.workspace = true ipnetwork = { workspace = true, optional = true } libc.workspace = true diff --git a/crates/cli/lib/commands/logs.rs b/crates/cli/lib/commands/logs.rs index 1202c38fe..5193958ff 100644 --- a/crates/cli/lib/commands/logs.rs +++ b/crates/cli/lib/commands/logs.rs @@ -1,24 +1,36 @@ //! `msb logs` command — read the captured output of a sandbox. //! -//! Reads `/logs/exec.log` (the JSON Lines file produced by +//! Backed by `microsandbox::logs::read_logs` for the historical +//! snapshot and `microsandbox::logs::log_stream` for `--follow`. Both +//! read `/logs/exec.log` (the JSON Lines file produced by //! the runtime's relay tap, see `crates/runtime/lib/exec_log.rs`), -//! decodes each entry, and renders it to the terminal per -//! `design/runtime/sandbox-logs.md` D5. +//! plus `runtime.log` and `kernel.log` when `--source system` is in +//! scope. This command decodes each entry and renders it to the +//! terminal per `design/runtime/sandbox-logs.md` D5. //! //! Supports filtering by source (stdout/stderr/system), time window, -//! tail count, regex search, follow mode (polling), and JSON-Lines -//! passthrough. ANSI escape sequences are passed through to TTYs and -//! stripped on pipes by default (matching `ls`/`grep` convention). +//! tail count, regex search, follow mode (filesystem-watch driven), +//! and JSON-Lines passthrough. ANSI escape sequences are passed +//! through to TTYs and stripped on pipes by default (matching +//! `ls`/`grep` convention). use std::io::{IsTerminal, Write}; use std::path::Path; +use std::pin::pin; use std::time::Duration; use anyhow::{Context, anyhow}; -use chrono::{DateTime, Utc}; +use base64::Engine as _; +use chrono::{DateTime, SecondsFormat, Utc}; use clap::{Args, ValueEnum}; use console::style; -use microsandbox_utils::log_text::{base64_decode, split_leading_timestamp, strip_ansi}; +use futures::StreamExt; +use microsandbox::MicrosandboxError; +use microsandbox::logs::{ + self, LogEntry as EngineLogEntry, LogOptions, LogSource, LogStreamOptions, LogStreamStart, +}; +use microsandbox_runtime::boot_error::BootError; +use microsandbox_utils::log_text::{base64_decode, strip_ansi}; use regex::Regex; use serde::Deserialize; @@ -161,7 +173,7 @@ struct LogEntry { /// Execute the `msb logs` command. pub async fn run(args: LogsArgs) -> anyhow::Result<()> { - let log_dir = microsandbox::sandbox::logs::log_dir_for(&args.name); + let log_dir = logs::log_dir_for(&args.name); if !log_dir.exists() { return Err(anyhow!( "no logs directory for sandbox {:?} (sandbox not found?)", @@ -169,7 +181,8 @@ pub async fn run(args: LogsArgs) -> anyhow::Result<()> { )); } - let sources = resolve_sources(&args.source); + let mask = resolve_sources(&args.source); + let engine_sources = mask.to_engine_sources(); let since = parse_time_arg(args.since.as_deref())?; let until = parse_time_arg(args.until.as_deref())?; let grep_re = match args.grep.as_deref() { @@ -187,25 +200,63 @@ pub async fn run(args: LogsArgs) -> anyhow::Result<()> { // boot-error.json sits next to exec.log in the same log_dir). render_boot_error_if_present(&log_dir, &args.name, args.json)?; - // Initial dump (history). - let mut entries = read_all_entries(&log_dir, sources)?; - apply_filters(&mut entries, since, until, grep_re.as_ref(), args.tail); - render_entries(&entries, &args, color_policy)?; - - // Optional follow mode — poll the file for new entries. - if args.follow { - let last_t = entries.last().map(|e| e.t.clone()); - follow_loop( - &log_dir, - sources, - &args, - color_policy, - last_t, - grep_re.as_ref(), - ) - .await?; + // Snapshot: drain the chronologically-sorted history. `read_logs` + // applies tail / since / until / source filtering; --grep is + // applied locally so it can match against the decoded body string. + let snapshot_opts = LogOptions { + tail: args.tail, + since, + until, + sources: engine_sources.clone(), + }; + let snapshot = logs::read_logs_snapshot(&args.name, &snapshot_opts) + .await + .context("reading logs")?; + for entry in &snapshot.entries { + let cli_entry = engine_entry_to_cli(entry); + if grep_matches(grep_re.as_ref(), &cli_entry.d) { + render_entry(&cli_entry, &args, color_policy)?; + } + } + + if !args.follow { + return Ok(()); } + // Follow: resume from the exact snapshot end cursor so entries + // written between the snapshot drain and stream startup are not + // skipped. + let stream_opts = LogStreamOptions { + sources: engine_sources, + start: LogStreamStart::From(snapshot.cursor), + until, + follow: true, + }; + let mut stream = pin!( + logs::log_stream(&args.name, &stream_opts) + .await + .context("starting log stream")? + ); + while let Some(item) = stream.next().await { + match item { + Ok(entry) => { + let cli_entry = engine_entry_to_cli(&entry); + if grep_matches(grep_re.as_ref(), &cli_entry.d) { + render_entry(&cli_entry, &args, color_policy)?; + } + } + Err(MicrosandboxError::MissedRotation { + dropped_from_offset, + }) => { + eprintln!( + "log follower fell behind: missed rotation at offset {dropped_from_offset}. \ + restart `msb logs -f` to resume." + ); + return Ok(()); + } + Err(e) => return Err(anyhow!(e)), + } + } Ok(()) } @@ -252,8 +303,23 @@ struct SourceMask { } impl SourceMask { - fn includes_exec_sources(&self) -> bool { - self.stdout || self.stderr || self.output + /// Translate the mask into the engine's flat source list. Order + /// is fixed (stdout, stderr, output, system) for stable behavior. + fn to_engine_sources(self) -> Vec { + let mut out = Vec::with_capacity(4); + if self.stdout { + out.push(LogSource::Stdout); + } + if self.stderr { + out.push(LogSource::Stderr); + } + if self.output { + out.push(LogSource::Output); + } + if self.system { + out.push(LogSource::System); + } + out } } @@ -262,7 +328,7 @@ impl SourceMask { //-------------------------------------------------------------------------------------------------- fn render_boot_error_if_present(log_dir: &Path, name: &str, json_mode: bool) -> anyhow::Result<()> { - let boot_err = match microsandbox_runtime::boot_error::BootError::read(log_dir) { + let boot_err = match BootError::read(log_dir) { Ok(Some(b)) => b, Ok(None) => return Ok(()), Err(_) => return Ok(()), @@ -288,167 +354,45 @@ fn render_boot_error_if_present(log_dir: &Path, name: &str, json_mode: bool) -> } //-------------------------------------------------------------------------------------------------- -// Functions: Helpers — reading +// Functions: Helpers — engine bridge //-------------------------------------------------------------------------------------------------- -/// Read all entries from `exec.log` (and its rotated siblings) plus -/// optional system sources, ordered chronologically. -fn read_all_entries(log_dir: &Path, sources: SourceMask) -> anyhow::Result> { - let mut entries: Vec = Vec::new(); - - if sources.includes_exec_sources() { - // Rotated files first (.3 → .2 → .1 → current) so output is - // chronologically ordered. - for suffix in [".3", ".2", ".1", ""].iter() { - let path = if suffix.is_empty() { - log_dir.join("exec.log") - } else { - log_dir.join(format!("exec.log{suffix}")) - }; - if !path.exists() { - continue; - } - append_jsonl_entries(&path, &mut entries, sources)?; - } - } - - if sources.system { - // Cross-merge runtime.log and kernel.log as `s: "system"`. - // Both are unstructured text; we synthesize timestamps from - // file mtimes (kernel.log) or per-line tracing prefixes - // (runtime.log). - append_text_log_as_system(&log_dir.join("runtime.log"), &mut entries); - append_text_log_as_system(&log_dir.join("kernel.log"), &mut entries); - - // Stable sort by parsed timestamp. We parse rather than - // string-compare because runtime.log uses microsecond-precision - // RFC 3339 (`.615119Z`) while exec.log uses millisecond - // (`.969Z`) — lexical compare across mixed precisions gives - // the wrong order. - entries.sort_by_key(|e| parse_entry_time(&e.t).unwrap_or(DateTime::::MIN_UTC)); - } - - Ok(entries) -} - -fn append_jsonl_entries( - path: &Path, - out: &mut Vec, - sources: SourceMask, -) -> anyhow::Result<()> { - let bytes = std::fs::read(path).with_context(|| format!("reading {}", path.display()))?; - let text = String::from_utf8_lossy(&bytes); - for line in text.lines() { - if line.is_empty() { - continue; - } - match serde_json::from_str::(line) { - Ok(entry) => { - if entry_passes_source_mask(&entry, sources) { - out.push(entry); - } - } - Err(_) => { - // Skip malformed lines — never let one bad entry - // poison the whole file. - } - } - } - Ok(()) -} - -fn entry_passes_source_mask(entry: &LogEntry, mask: SourceMask) -> bool { - match entry.s.as_str() { - "stdout" => mask.stdout, - "stderr" => mask.stderr, - "output" => mask.output, - "system" => mask.system, - _ => true, // Unknown source: include defensively. - } -} - -/// Read a plain-text log file (runtime.log / kernel.log) and append -/// each line as a synthetic `s: "system"` entry. -fn append_text_log_as_system(path: &Path, out: &mut Vec) { - if !path.exists() { - return; - } - let bytes = match std::fs::read(path) { - Ok(b) => b, - Err(_) => return, +/// Convert an engine entry into the CLI's local representation. The +/// engine has already decoded base64 bodies; we keep the `e: "b64"` +/// channel alive for `--json` output by re-encoding raw bytes that +/// aren't valid UTF-8, so downstream consumers can round-trip them. +fn engine_entry_to_cli(entry: &EngineLogEntry) -> LogEntry { + let s = match entry.source { + LogSource::Stdout => "stdout", + LogSource::Stderr => "stderr", + LogSource::Output => "output", + LogSource::System => "system", + }; + let (d, e) = match std::str::from_utf8(&entry.data) { + Ok(text) => (text.to_string(), None), + Err(_) => ( + base64::engine::general_purpose::STANDARD.encode(&entry.data), + Some("b64".to_string()), + ), }; - let text = String::from_utf8_lossy(&bytes); - let mtime_iso = file_mtime_rfc3339(path).unwrap_or_else(now_rfc3339); - for line in text.lines() { - if line.is_empty() { - continue; - } - // tracing-formatted lines start with ANSI color escapes - // around the timestamp (`\x1b[2m2026-…Z\x1b[0m INFO …`). - // Strip ANSI first so the leading-timestamp parser sees a - // bare RFC 3339 token. Fall back to file mtime if that - // still fails (e.g. unstructured kernel.log). - let stripped = strip_ansi(line); - let (t, body) = match split_leading_timestamp(&stripped) { - Some((t, body)) => (t.to_string(), body.to_string()), - None => (mtime_iso.clone(), stripped.clone()), - }; - out.push(LogEntry { - t, - s: "system".into(), - d: format!("{}\n", body), - id: None, - e: None, - }); + LogEntry { + t: entry.timestamp.to_rfc3339_opts(SecondsFormat::Millis, true), + s: s.to_string(), + d, + id: entry.session_id, + e, } } -fn file_mtime_rfc3339(path: &Path) -> Option { - let meta = std::fs::metadata(path).ok()?; - let modified = meta.modified().ok()?; - let dt: DateTime = modified.into(); - Some(dt.to_rfc3339_opts(chrono::SecondsFormat::Millis, true)) -} - -fn now_rfc3339() -> String { - Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true) +fn grep_matches(re: Option<&Regex>, body: &str) -> bool { + re.is_none_or(|r| r.is_match(body)) } //-------------------------------------------------------------------------------------------------- -// Functions: Helpers — filters +// Functions: Helpers — time parsing //-------------------------------------------------------------------------------------------------- -fn apply_filters( - entries: &mut Vec, - since: Option>, - until: Option>, - grep: Option<&Regex>, - tail: Option, -) { - if let Some(s) = since { - entries.retain(|e| match parse_entry_time(&e.t) { - Some(t) => t >= s, - None => true, - }); - } - if let Some(u) = until { - entries.retain(|e| match parse_entry_time(&e.t) { - Some(t) => t < u, - None => true, - }); - } - if let Some(re) = grep { - entries.retain(|e| re.is_match(&e.d)); - } - if let Some(n) = tail - && entries.len() > n - { - let drop = entries.len() - n; - entries.drain(0..drop); - } -} - fn parse_time_arg(input: Option<&str>) -> anyhow::Result>> { let Some(raw) = input else { return Ok(None); @@ -480,54 +424,31 @@ fn parse_duration(raw: &str) -> Option { Some(Duration::from_secs(secs)) } -fn parse_entry_time(t: &str) -> Option> { - DateTime::parse_from_rfc3339(t) - .ok() - .map(|d| d.with_timezone(&Utc)) -} - //-------------------------------------------------------------------------------------------------- // Functions: Helpers — rendering //-------------------------------------------------------------------------------------------------- -fn write_entry_json(entry: &LogEntry, out: &mut impl Write) -> anyhow::Result<()> { - // Re-emit verbatim as a single JSON Lines line. We serialize from - // our parsed struct so that any malformed fields are normalized. - let line = serde_json::to_string(&serde_json::json!({ - "t": entry.t, - "s": entry.s, - "d": entry.d, - "id": entry.id, - "e": entry.e, - }))?; - writeln!(out, "{line}")?; - Ok(()) -} - -fn render_entries(entries: &[LogEntry], args: &LogsArgs, color: ColorMode) -> anyhow::Result<()> { +fn render_entry(entry: &LogEntry, args: &LogsArgs, color: ColorMode) -> anyhow::Result<()> { if args.json { + // Re-emit verbatim as a single JSON Lines line. We serialize + // from our parsed struct so that any malformed fields are + // normalized. + let line = serde_json::to_string(&serde_json::json!({ + "t": entry.t, + "s": entry.s, + "d": entry.d, + "id": entry.id, + "e": entry.e, + }))?; let stdout = std::io::stdout(); let mut out = stdout.lock(); - for entry in entries { - write_entry_json(entry, &mut out)?; - } + writeln!(out, "{line}")?; return Ok(()); } - - for entry in entries { - render_one(entry, args, color)?; - } - Ok(()) + render_one(entry, args, color) } fn render_one(entry: &LogEntry, args: &LogsArgs, color: ColorMode) -> anyhow::Result<()> { - if args.json { - let stdout = std::io::stdout(); - let mut out = stdout.lock(); - write_entry_json(entry, &mut out)?; - return Ok(()); - } - // Resolve the body bytes (decode base64 if e == "b64"; else use d). let body = decode_body(entry); let body = apply_color_policy(&body, color); @@ -705,80 +626,6 @@ fn prefix_with_timestamp(t: &str, id_prefix: Option<&str>, body: &str) -> String out } -//-------------------------------------------------------------------------------------------------- -// Functions: Helpers — follow mode -//-------------------------------------------------------------------------------------------------- - -async fn follow_loop( - log_dir: &Path, - sources: SourceMask, - args: &LogsArgs, - color: ColorMode, - mut last_t: Option, - grep_re: Option<&Regex>, -) -> anyhow::Result<()> { - let path = log_dir.join("exec.log"); - let (mut last_size, mut last_inode) = match std::fs::metadata(&path) { - Ok(m) => (m.len(), inode_from_meta(&m)), - Err(_) => (0u64, 0u64), - }; - - loop { - tokio::time::sleep(Duration::from_millis(200)).await; - - let Ok(meta) = std::fs::metadata(&path) else { - // File missing — sandbox stopped or removed. Exit cleanly. - break; - }; - let inode = inode_from_meta(&meta); - let size = meta.len(); - - // Detect rotation (inode changed, or size shrank): re-read the - // whole file from the top. - let need_full_reread = inode != last_inode || size < last_size; - - if !need_full_reread && size == last_size { - continue; - } - - let mut new_entries: Vec = Vec::new(); - if sources.includes_exec_sources() { - append_jsonl_entries(&path, &mut new_entries, sources)?; - } - - // Filter to only entries newer than the last we rendered. - let cutoff = last_t.clone(); - new_entries.retain(|e| match cutoff.as_deref() { - Some(c) => e.t.as_str() > c, - None => true, - }); - - if let Some(re) = grep_re { - new_entries.retain(|e| re.is_match(&e.d)); - } - - for entry in &new_entries { - render_one(entry, args, color)?; - last_t = Some(entry.t.clone()); - } - - last_size = size; - last_inode = inode; - } - Ok(()) -} - -#[cfg(unix)] -fn inode_from_meta(meta: &std::fs::Metadata) -> u64 { - use std::os::unix::fs::MetadataExt; - meta.ino() -} - -#[cfg(not(unix))] -fn inode_from_meta(_meta: &std::fs::Metadata) -> u64 { - 0 -} - //-------------------------------------------------------------------------------------------------- // Tests //-------------------------------------------------------------------------------------------------- @@ -849,43 +696,62 @@ mod tests { } #[test] - fn apply_filters_tail_keeps_last_n() { - let mut entries: Vec = (0..5) - .map(|i| LogEntry { - t: format!("2026-04-30T00:00:0{i}.000Z"), - s: "stdout".into(), - d: format!("line {i}"), - id: Some(1), - e: None, - }) - .collect(); - apply_filters(&mut entries, None, None, None, Some(2)); - assert_eq!(entries.len(), 2); - assert_eq!(entries[0].d, "line 3"); - assert_eq!(entries[1].d, "line 4"); + fn grep_matches_returns_true_when_no_pattern() { + assert!(grep_matches(None, "anything")); } #[test] - fn apply_filters_grep() { - let mut entries: Vec = vec![ - LogEntry { - t: "1".into(), - s: "stdout".into(), - d: "ok".into(), - id: Some(1), - e: None, - }, - LogEntry { - t: "2".into(), - s: "stdout".into(), - d: "error: bad".into(), - id: Some(1), - e: None, - }, - ]; - let re = Regex::new("error").unwrap(); - apply_filters(&mut entries, None, None, Some(&re), None); - assert_eq!(entries.len(), 1); - assert_eq!(entries[0].d, "error: bad"); + fn grep_matches_filters_by_pattern() { + let re = Regex::new("err").unwrap(); + assert!(grep_matches(Some(&re), "error: bad")); + assert!(!grep_matches(Some(&re), "ok")); + } + + #[test] + fn source_mask_to_engine_sources_maps_each_flag() { + let mask = SourceMask { + stdout: true, + stderr: false, + output: true, + system: true, + }; + assert_eq!( + mask.to_engine_sources(), + vec![LogSource::Stdout, LogSource::Output, LogSource::System], + ); + } + + #[test] + fn engine_entry_to_cli_preserves_utf8_body() { + let entry = EngineLogEntry { + timestamp: DateTime::parse_from_rfc3339("2026-04-30T00:00:00.000Z") + .unwrap() + .with_timezone(&Utc), + source: LogSource::Stdout, + session_id: Some(7), + data: bytes::Bytes::from_static(b"hello world"), + cursor: microsandbox::logs::LogCursor::empty(), + }; + let cli = engine_entry_to_cli(&entry); + assert_eq!(cli.s, "stdout"); + assert_eq!(cli.id, Some(7)); + assert_eq!(cli.d, "hello world"); + assert!(cli.e.is_none()); + } + + #[test] + fn engine_entry_to_cli_base64s_non_utf8_body() { + let entry = EngineLogEntry { + timestamp: DateTime::parse_from_rfc3339("2026-04-30T00:00:00.000Z") + .unwrap() + .with_timezone(&Utc), + source: LogSource::Output, + session_id: None, + data: bytes::Bytes::from_static(&[0xff, 0xfe, 0xfd]), + cursor: microsandbox::logs::LogCursor::empty(), + }; + let cli = engine_entry_to_cli(&entry); + assert_eq!(cli.e.as_deref(), Some("b64")); + assert_eq!(cli.d, "//79"); } } diff --git a/crates/microsandbox/Cargo.toml b/crates/microsandbox/Cargo.toml index f10e172f4..2c8aaa2d6 100644 --- a/crates/microsandbox/Cargo.toml +++ b/crates/microsandbox/Cargo.toml @@ -28,6 +28,7 @@ net = ["dep:microsandbox-network", "microsandbox-runtime/net"] [dependencies] astral-tokio-tar.workspace = true async-compression = { workspace = true, features = ["tokio", "zstd", "gzip"] } +base64.workspace = true bytes.workspace = true chrono.workspace = true ciborium.workspace = true @@ -48,6 +49,7 @@ microsandbox-protocol = { version = "0.4.6", path = "../protocol" } microsandbox-runtime = { version = "0.4.6", path = "../runtime", default-features = false } microsandbox-utils = { version = "0.4.6", path = "../utils" } nix = { workspace = true, features = ["process", "signal"] } +notify.workspace = true rand.workspace = true reqwest.workspace = true scopeguard.workspace = true diff --git a/crates/microsandbox/lib/error.rs b/crates/microsandbox/lib/error.rs index 72ecdcf05..879c7b74f 100644 --- a/crates/microsandbox/lib/error.rs +++ b/crates/microsandbox/lib/error.rs @@ -154,6 +154,25 @@ pub enum MicrosandboxError { #[error("metrics disabled for sandbox: {0}")] MetricsDisabled(String), + /// A log stream fell behind enough that the file it was reading + /// rotated out of the on-disk retention window. The stream + /// yields this error and ends; restart from + /// `LogStreamStart::Beginning`, `LogStreamStart::Since(now)`, + /// or `LogStreamStart::From(c)` with the cursor of the last + /// entry that was successfully consumed. + #[error("log stream missed rotation (dropped from offset {dropped_from_offset})")] + MissedRotation { + /// Byte offset within the lost file at which streamed + /// entries stop. Useful for diagnostics. + dropped_from_offset: u64, + }, + + /// A cursor passed to `log_stream` via `LogStreamStart::From` + /// could not be located in the current rotation chain. + /// Yielded once at stream start, then the stream ends. + #[error("invalid log cursor: {0}")] + InvalidCursor(String), + /// A custom error message. #[error("{0}")] Custom(String), diff --git a/crates/microsandbox/lib/lib.rs b/crates/microsandbox/lib/lib.rs index acd70f015..1285780f4 100644 --- a/crates/microsandbox/lib/lib.rs +++ b/crates/microsandbox/lib/lib.rs @@ -14,6 +14,7 @@ pub mod config; #[allow(dead_code)] pub(crate) mod db; pub mod image; +pub mod logs; pub mod runtime; pub mod sandbox; pub mod setup; diff --git a/crates/microsandbox/lib/logs/cursor.rs b/crates/microsandbox/lib/logs/cursor.rs new file mode 100644 index 000000000..f4cb50989 --- /dev/null +++ b/crates/microsandbox/lib/logs/cursor.rs @@ -0,0 +1,223 @@ +//! The opaque per-source resume handle, [`LogCursor`], plus its +//! string encoding (`Display` + `FromStr`) and parse error type. +//! +//! The cursor is variable-length: it carries one [`FilePosition`] +//! per log file the stream is configured to read. Each position is +//! a `(generation, offset)` pair. The set of files (and thus the +//! cursor's length) is determined at stream-creation time by the +//! caller's requested sources and the `LogFileConfig` list passed +//! to [`super::log_stream`]. + +use base64::Engine; +use base64::engine::general_purpose::STANDARD as BASE64; + +//-------------------------------------------------------------------------------------------------- +// FilePosition +//-------------------------------------------------------------------------------------------------- + +/// Position of one log file within a [`LogCursor`]. `generation` +/// identifies the file's underlying inode (or platform-equivalent); +/// `offset` is the byte position just after the most-recent entry +/// observed in that file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub(super) struct FilePosition { + pub(super) generation: u64, + pub(super) offset: u64, +} + +//-------------------------------------------------------------------------------------------------- +// LogCursor +//-------------------------------------------------------------------------------------------------- + +/// Opaque per-source resume handle returned on every +/// [`LogEntry`](super::LogEntry) and accepted by +/// [`LogStreamStart::From`](super::LogStreamStart::From). +/// +/// Carries one position per log file the stream is reading. The +/// order matches the file order configured at stream creation. +/// Treat the inner data as opaque; round-trip the value via +/// [`Display`](std::fmt::Display) (or +/// [`to_string`](std::string::ToString::to_string)) and +/// [`str::parse`] for persistence or transport. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +pub struct LogCursor { + pub(super) positions: Vec, +} + +/// Bytes per [`FilePosition`] on the wire (`generation` u64 LE + +/// `offset` u64 LE). +const POSITION_BYTES: usize = 8 + 8; + +impl LogCursor { + /// Empty cursor — used as the initial state for a stream that + /// has not emitted anything yet, and as the cursor on entries + /// before the stream layer stamps them. + pub fn empty() -> Self { + Self::default() + } + + #[allow(dead_code)] // used by SDK bindings. + pub(crate) fn exec(&self) -> (u64, u64) { + self.positions + .first() + .map(|p| (p.generation, p.offset)) + .unwrap_or((0, 0)) + } + + #[allow(dead_code)] // used by SDK bindings. + pub(crate) fn runtime(&self) -> u64 { + self.positions.get(1).map(|p| p.offset).unwrap_or(0) + } + + #[allow(dead_code)] // used by SDK bindings. + pub(crate) fn kernel(&self) -> u64 { + self.positions.get(2).map(|p| p.offset).unwrap_or(0) + } +} + +impl std::fmt::Display for LogCursor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let count = self.positions.len(); + let mut buf = vec![0u8; 1 + count * POSITION_BYTES]; + buf[0] = count as u8; + for (i, pos) in self.positions.iter().enumerate() { + let off = 1 + i * POSITION_BYTES; + buf[off..off + 8].copy_from_slice(&pos.generation.to_le_bytes()); + buf[off + 8..off + 16].copy_from_slice(&pos.offset.to_le_bytes()); + } + f.write_str(&BASE64.encode(buf)) + } +} + +impl std::str::FromStr for LogCursor { + type Err = LogCursorParseError; + + fn from_str(s: &str) -> Result { + let bytes = BASE64 + .decode(s.as_bytes()) + .map_err(|_| LogCursorParseError::BadBase64)?; + if bytes.is_empty() { + return Err(LogCursorParseError::WrongLength(0)); + } + let count = bytes[0] as usize; + let expected = 1 + count * POSITION_BYTES; + if bytes.len() != expected { + return Err(LogCursorParseError::WrongLength(bytes.len())); + } + let mut positions = Vec::with_capacity(count); + for i in 0..count { + let off = 1 + i * POSITION_BYTES; + positions.push(FilePosition { + generation: u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap()), + offset: u64::from_le_bytes(bytes[off + 8..off + 16].try_into().unwrap()), + }); + } + Ok(Self { positions }) + } +} + +//-------------------------------------------------------------------------------------------------- +// LogCursorParseError +//-------------------------------------------------------------------------------------------------- + +/// Error returned when parsing a [`LogCursor`] from a string. +#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] +pub enum LogCursorParseError { + /// The string is not valid base64 in the standard alphabet. + #[error("cursor is not valid base64")] + BadBase64, + /// The decoded payload length doesn't match the count byte + /// (`1 + count * 16`). + #[error("cursor payload has unexpected length ({0} bytes)")] + WrongLength(usize), +} + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn round_trip_three_positions() { + let c = LogCursor { + positions: vec![ + FilePosition { + generation: 0xdead_beef_1234_5678, + offset: 42, + }, + FilePosition { + generation: 0, + offset: 100, + }, + FilePosition { + generation: 0, + offset: 200, + }, + ], + }; + let s = c.to_string(); + let parsed: LogCursor = s.parse().unwrap(); + assert_eq!(parsed, c); + } + + #[test] + fn round_trip_empty() { + let c = LogCursor::empty(); + let s = c.to_string(); + let parsed: LogCursor = s.parse().unwrap(); + assert_eq!(parsed, c); + } + + #[test] + fn round_trip_single_position() { + let c = LogCursor { + positions: vec![FilePosition { + generation: 42, + offset: 100, + }], + }; + let s = c.to_string(); + let parsed: LogCursor = s.parse().unwrap(); + assert_eq!(parsed, c); + } + + #[test] + fn parse_rejects_wrong_length() { + let mut payload = vec![0u8; 6]; + payload[0] = 2; + let s = BASE64.encode(&payload); + assert!(matches!( + s.parse::(), + Err(LogCursorParseError::WrongLength(6)) + )); + } + + #[test] + fn parse_rejects_garbage() { + assert!(matches!( + "not-base64!".parse::(), + Err(LogCursorParseError::BadBase64) + )); + } + + #[test] + fn sdk_accessors_handle_short_cursors() { + let c = LogCursor::empty(); + assert_eq!(c.exec(), (0, 0)); + assert_eq!(c.runtime(), 0); + assert_eq!(c.kernel(), 0); + + let c = LogCursor { + positions: vec![FilePosition { + generation: 5, + offset: 9, + }], + }; + assert_eq!(c.exec(), (5, 9)); + assert_eq!(c.runtime(), 0); + assert_eq!(c.kernel(), 0); + } +} diff --git a/crates/microsandbox/lib/logs/mod.rs b/crates/microsandbox/lib/logs/mod.rs new file mode 100644 index 000000000..3d96891cc --- /dev/null +++ b/crates/microsandbox/lib/logs/mod.rs @@ -0,0 +1,205 @@ +//! Rotation-aware multi-file log streaming. +//! +//! Two public entry points keyed by sandbox name: +//! +//! - [`read_logs`] returns a snapshot `Vec` filtered with +//! [`LogOptions`]. Reads everything currently on disk, sorts by +//! timestamp, and returns. +//! - [`log_stream`] returns a [`futures::Stream`] over the same +//! files, suitable for live-tailing or replaying a fixed range. +//! Uses filesystem change notifications (the `notify` crate) for +//! live updates with a fallback poll, and stamps each entry with +//! an opaque [`LogCursor`] for exact per-source resume. +//! +//! # Files read +//! +//! - `exec.log` + rotated siblings (`exec.log.1` ... `exec.log.4`): +//! JSON Lines, captured stdout / stderr / pty output written by +//! the runtime relay tap. Rotates at 10 MiB per file, retains up +//! to four historical files on disk (~40 MiB ceiling). +//! - `runtime.log`: plain text, runtime diagnostics. Only read when +//! `System` is in the requested sources. Does not rotate. +//! - `kernel.log`: plain text, guest kernel console. Only read when +//! `System` is in the requested sources. Does not rotate. +//! +//! Adding a new log file type is one entry in `LOG_FILES`. +//! +//! # Ordering contract +//! +//! - [`read_logs`] returns entries in strict chronological order +//! (it sorts by timestamp before returning). +//! - [`log_stream`] preserves chronological order **within each +//! source** but emits **across sources** in "as parsed" order — +//! a `runtime.log` entry timestamped slightly earlier than an +//! `exec.log` entry may be yielded after it if the `exec.log` +//! read landed first. Use [`read_logs`] if you need strict +//! global ordering. +//! +//! # Keeping up +//! +//! [`log_stream`] holds an open file descriptor on each file it is +//! reading. Because rotation is a `rename` (not a delete), the FD +//! remains valid across rotations: the stream can drain whatever +//! the producer wrote to the now-rotated file before transitioning +//! to the new active file. +//! +//! However, the producer caps disk retention at four rotated files +//! (~40 MiB). If a consumer falls behind enough that the inode it +//! was reading rotates past that retention window before the +//! stream catches up, the file is overwritten and lost. When that +//! happens, the stream yields +//! [`crate::MicrosandboxError::MissedRotation`] +//! and ends. Hard-fail by design — restart from +//! [`LogStreamStart::Beginning`], [`LogStreamStart::Since`] with +//! the current time, or [`LogStreamStart::From`] with the cursor +//! of the last entry successfully consumed. + +mod cursor; +mod parser; +mod stream; +mod types; + +pub use cursor::{LogCursor, LogCursorParseError}; +pub use stream::{LogStreamOptions, LogStreamStart}; +pub use types::{LogEntry, LogOptions, LogSource}; + +use std::path::PathBuf; + +use futures::Stream; + +use stream::{LogEngine, LogFileConfig, LogFileFormat}; + +use crate::{MicrosandboxError, MicrosandboxResult}; + +//-------------------------------------------------------------------------------------------------- +// LOG_FILES +//-------------------------------------------------------------------------------------------------- + +/// The set of log files microsandbox produces. Add a new file +/// type by adding an entry here — the [`LogEngine`] opens a +/// reader for any entry whose `produces` list intersects the +/// caller's requested sources. +const LOG_FILES: &[LogFileConfig] = &[ + LogFileConfig { + filename: "exec.log", + format: LogFileFormat::Jsonl, + max_rotation_index: 4, + produces: &[ + LogSource::Stdout, + LogSource::Stderr, + LogSource::Output, + LogSource::System, + ], + }, + LogFileConfig { + filename: "runtime.log", + format: LogFileFormat::Text, + max_rotation_index: 0, + produces: &[LogSource::System], + }, + LogFileConfig { + filename: "kernel.log", + format: LogFileFormat::Text, + max_rotation_index: 0, + produces: &[LogSource::System], + }, +]; + +//-------------------------------------------------------------------------------------------------- +// Public API +//-------------------------------------------------------------------------------------------------- + +/// Chronologically sorted log snapshot plus the cursor at the end +/// of the drained on-disk content. +#[derive(Debug, Clone)] +pub struct LogSnapshot { + /// Entries matching the requested [`LogOptions`]. + pub entries: Vec, + + /// Cursor positioned after all content consumed for the + /// snapshot, including entries later removed by `tail` / + /// `until` filtering. + pub cursor: LogCursor, +} + +/// Compute the on-disk log directory for a sandbox name. +pub fn log_dir_for(name: &str) -> PathBuf { + crate::config::config() + .sandboxes_dir() + .join(name) + .join("logs") +} + +/// Read all matching log entries for the named sandbox. +/// +/// Returns entries sorted by timestamp (strict chronological order +/// across all sources). Returns +/// [`MicrosandboxError::SandboxNotFound`] if the sandbox's log +/// directory doesn't exist. +/// +/// Implemented as a drain of [`log_stream`] with `follow: false`, +/// sorted post-collect; `until` and `tail` are applied +/// post-collect because the stream's per-source ordering doesn't +/// match snapshot's "filter after sort" contract. +pub async fn read_logs(name: &str, opts: &LogOptions) -> MicrosandboxResult> { + Ok(read_logs_snapshot(name, opts).await?.entries) +} + +/// Read all matching log entries and return the snapshot end cursor. +/// +/// This is useful when handing a bounded historical read to +/// [`log_stream`] with [`LogStreamStart::From`] without losing log +/// lines written between the snapshot drain and follow startup. +pub async fn read_logs_snapshot(name: &str, opts: &LogOptions) -> MicrosandboxResult { + let stream_opts = LogStreamOptions { + sources: opts.sources.clone(), + // Push `since` into the parser when possible so early + // entries are discarded at parse time rather than after. + start: opts.since.map(LogStreamStart::Since).unwrap_or_default(), + until: None, + follow: false, + }; + let log_dir = log_dir_for(name); + if !tokio::fs::try_exists(&log_dir).await.unwrap_or(false) { + return Err(MicrosandboxError::SandboxNotFound(name.to_string())); + } + let sources = LogSource::effective(&stream_opts.sources); + let engine = LogEngine::new( + log_dir, + LOG_FILES, + sources, + &stream_opts.start, + stream_opts.until, + stream_opts.follow, + ) + .await?; + let (mut entries, cursor) = engine.drain_sorted_snapshot().await?; + opts.apply_to(&mut entries); + Ok(LogSnapshot { entries, cursor }) +} + +/// Stream log entries for the named sandbox. +/// +/// Returns [`MicrosandboxError::SandboxNotFound`] if the sandbox's +/// log directory doesn't exist. Within each source, entries are +/// chronological; across sources, ordering is "as parsed." +pub async fn log_stream( + name: &str, + opts: &LogStreamOptions, +) -> MicrosandboxResult> + Send + 'static + use<>> { + let log_dir = log_dir_for(name); + if !tokio::fs::try_exists(&log_dir).await.unwrap_or(false) { + return Err(MicrosandboxError::SandboxNotFound(name.to_string())); + } + let sources = LogSource::effective(&opts.sources); + let engine = LogEngine::new( + log_dir, + LOG_FILES, + sources, + &opts.start, + opts.until, + opts.follow, + ) + .await?; + Ok(engine.into_stream()) +} diff --git a/crates/microsandbox/lib/logs/parser.rs b/crates/microsandbox/lib/logs/parser.rs new file mode 100644 index 000000000..dc2e3b429 --- /dev/null +++ b/crates/microsandbox/lib/logs/parser.rs @@ -0,0 +1,600 @@ +//! Reading one log file: the [`FileHandle`] wrapper around an open +//! file (tracking inode + offset for rotation-aware identity), and +//! the format-specific [`ParserKind`] dispatcher for JSON Lines +//! and plain text with RFC 3339 prefixes. +//! +//! [`ParserKind::parse_from`] takes a mutable [`FileHandle`], reads +//! up to one chunk, parses, and returns `(entry, position)` pairs. +//! Parsers don't stamp cursors — the stream layer composes the +//! cross-file [`LogCursor`] from the per-file positions returned +//! here. + +use std::path::Path; + +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use microsandbox_utils::log_text::{base64_decode, split_leading_timestamp, strip_ansi}; +use serde::Deserialize; +use tokio::io::{AsyncReadExt, AsyncSeekExt}; + +use crate::{MicrosandboxError, MicrosandboxResult}; + +use super::cursor::{FilePosition, LogCursor}; +use super::types::{LogEntry, LogSource}; + +//-------------------------------------------------------------------------------------------------- +// Constants +//-------------------------------------------------------------------------------------------------- + +/// Per-tick read ceiling. Caps how many bytes one read iteration +/// pulls off disk so very large backfills don't materialize the +/// whole file into memory before yielding the first entry. +const MAX_READ_BYTES: usize = 1024 * 1024; + +//-------------------------------------------------------------------------------------------------- +// FileHandle +//-------------------------------------------------------------------------------------------------- + +/// Open file handle paired with its inode and current read offset. +/// The handle survives renames (rotation), so once opened we can +/// keep reading the same inode regardless of where its path moves. +pub(super) struct FileHandle { + pub(super) file: tokio::fs::File, + pub(super) inode: u64, + pub(super) offset: u64, +} + +impl FileHandle { + /// Asynchronously open a file as a [`FileHandle`]. Returns + /// `Ok(None)` if the file doesn't exist (treated as "producer + /// hasn't created it yet, retry later"); other I/O errors + /// propagate. + pub(super) async fn open(path: &Path) -> MicrosandboxResult> { + let file = match tokio::fs::File::open(path).await { + Ok(f) => f, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(e) => return Err(MicrosandboxError::Io(e)), + }; + let inode = Self::inode_of_file(&file).await?; + Ok(Some(Self { + file, + inode, + offset: 0, + })) + } + + /// Compute the file-identity discriminator (inode on Unix; a + /// hash of file metadata on other platforms) from a sync + /// `Metadata`. + #[cfg(unix)] + pub(super) fn generation_of_meta(meta: &std::fs::Metadata) -> u64 { + use std::os::unix::fs::MetadataExt; + meta.ino() + } + + #[cfg(not(unix))] + pub(super) fn generation_of_meta(meta: &std::fs::Metadata) -> u64 { + use std::hash::{Hash, Hasher}; + let mut h = std::collections::hash_map::DefaultHasher::new(); + if let Ok(created) = meta.created() + && let Ok(d) = created.duration_since(std::time::UNIX_EPOCH) + { + d.as_nanos().hash(&mut h); + } + meta.len().hash(&mut h); + h.finish() + } + + /// Convenience: stat a path and return its current generation, + /// or `None` if the path can't be stat'd. + pub(super) async fn generation_of_path(path: &Path) -> Option { + tokio::fs::metadata(path) + .await + .ok() + .map(|m| Self::generation_of_meta(&m)) + } + + /// Inode of an already-open tokio file handle. + pub(super) async fn inode_of_file(file: &tokio::fs::File) -> MicrosandboxResult { + let meta = file.metadata().await.map_err(MicrosandboxError::Io)?; + Ok(Self::generation_of_meta(&meta)) + } + + /// File modification time of a path, falling back to `Utc::now` + /// when the path can't be stat'd. Used by the text parser as + /// the timestamp for lines that don't carry a parseable + /// leading RFC 3339. + pub(super) async fn mtime_of_path(path: &Path) -> DateTime { + tokio::fs::metadata(path) + .await + .ok() + .and_then(|m| m.modified().ok()) + .map(|t| -> DateTime { t.into() }) + .unwrap_or_else(Utc::now) + } +} + +//-------------------------------------------------------------------------------------------------- +// RawEntry — the JSONL wire format +//-------------------------------------------------------------------------------------------------- + +#[derive(Debug, Deserialize)] +pub(super) struct RawEntry { + pub(super) t: String, + pub(super) s: String, + #[serde(default)] + pub(super) d: String, + #[serde(default)] + pub(super) id: Option, + #[serde(default)] + pub(super) e: Option, +} + +impl RawEntry { + /// Decode the body bytes, applying base64 decoding when the + /// entry is tagged with `e: "b64"` (raw-mode capture). + pub(super) fn decode_body(&self) -> Bytes { + if self.e.as_deref() == Some("b64") { + match base64_decode(&self.d) { + Some(bytes) => Bytes::from(bytes), + None => Bytes::from(self.d.clone().into_bytes()), + } + } else { + Bytes::from(self.d.clone().into_bytes()) + } + } +} + +//-------------------------------------------------------------------------------------------------- +// ParserKind +//-------------------------------------------------------------------------------------------------- + +pub(super) struct ParsedChunk { + pub(super) entries: Vec<(LogEntry, FilePosition)>, + pub(super) position: Option, +} + +/// How a reader parses bytes from its source file. +pub(super) enum ParserKind { + /// JSON Lines (one record per line). Filters by `sources`. + Jsonl { sources: Vec }, + /// Plain text, one entry per line, with optional leading + /// RFC 3339 timestamp; lines without a parseable timestamp + /// fall back to the file's `mtime`. Always emits entries + /// tagged as [`LogSource::System`]. + Text, +} + +impl ParserKind { + /// Read a chunk from `src` and parse it. Advances `src.offset` + /// by the bytes consumed. + pub(super) async fn parse_from( + &self, + src: &mut FileHandle, + primary_path: &Path, + since: Option>, + ) -> MicrosandboxResult { + let mut buf = vec![0u8; MAX_READ_BYTES]; + src.file + .seek(std::io::SeekFrom::Start(src.offset)) + .await + .map_err(MicrosandboxError::Io)?; + let read = src + .file + .read(&mut buf) + .await + .map_err(MicrosandboxError::Io)?; + if read == 0 { + return Ok(ParsedChunk { + entries: Vec::new(), + position: None, + }); + } + let mut entries = Vec::new(); + let consumed = match self { + Self::Jsonl { sources } => Self::parse_jsonl( + &buf[..read], + src.offset, + src.inode, + sources, + since, + &mut entries, + ), + Self::Text => { + let mtime = FileHandle::mtime_of_path(primary_path).await; + Self::parse_text( + &buf[..read], + src.offset, + src.inode, + mtime, + since, + &mut entries, + ) + } + }; + src.offset += consumed as u64; + let position = (consumed > 0).then_some(FilePosition { + generation: src.inode, + offset: src.offset, + }); + Ok(ParsedChunk { entries, position }) + } + + /// Parse JSON Lines bytes. Emits `(entry, position)` pairs; + /// each `position` carries the file's `generation` and the + /// byte offset just after the entry's terminating newline. + pub(super) fn parse_jsonl( + bytes: &[u8], + base_offset: u64, + generation: u64, + sources: &[LogSource], + since: Option>, + out: &mut Vec<(LogEntry, FilePosition)>, + ) -> usize { + Self::walk_lines(bytes, base_offset, out, |line, entry_end_offset| { + let raw: RawEntry = serde_json::from_slice(line).ok()?; + let source = match raw.s.as_str() { + "stdout" => LogSource::Stdout, + "stderr" => LogSource::Stderr, + "output" => LogSource::Output, + "system" => LogSource::System, + _ => return None, + }; + if !sources.contains(&source) { + return None; + } + let timestamp = DateTime::parse_from_rfc3339(&raw.t) + .ok()? + .with_timezone(&Utc); + if since.is_some_and(|s| timestamp < s) { + return None; + } + Some(( + LogEntry { + timestamp, + source, + session_id: raw.id, + data: raw.decode_body(), + cursor: LogCursor::empty(), + }, + FilePosition { + generation, + offset: entry_end_offset, + }, + )) + }) + } + + /// Parse plain-text bytes. Lines without a parseable leading + /// RFC 3339 timestamp fall back to `mtime_fallback`. Always + /// emits entries tagged as [`LogSource::System`]. + pub(super) fn parse_text( + bytes: &[u8], + base_offset: u64, + generation: u64, + mtime_fallback: DateTime, + since: Option>, + out: &mut Vec<(LogEntry, FilePosition)>, + ) -> usize { + Self::walk_lines(bytes, base_offset, out, |raw_line, entry_end_offset| { + let line = String::from_utf8_lossy(raw_line); + let stripped = strip_ansi(&line); + let (timestamp, body) = match split_leading_timestamp(&stripped) { + Some((s, rest)) => ( + DateTime::parse_from_rfc3339(s) + .map(|d| d.with_timezone(&Utc)) + .unwrap_or(mtime_fallback), + rest.trim_start().to_string(), + ), + None => (mtime_fallback, stripped.clone()), + }; + if since.is_some_and(|s| timestamp < s) { + return None; + } + Some(( + LogEntry { + timestamp, + source: LogSource::System, + session_id: None, + data: Bytes::from(format!("{body}\n").into_bytes()), + cursor: LogCursor::empty(), + }, + FilePosition { + generation, + offset: entry_end_offset, + }, + )) + }) + } + + /// Walk `bytes` line-by-line (`\n`-delimited), invoking + /// `handler` on each complete line with its end-of-line file + /// offset. Trailing partial lines (no terminating `\n`) are + /// left unconsumed. Returns the number of bytes consumed. + fn walk_lines( + bytes: &[u8], + base_offset: u64, + out: &mut Vec<(LogEntry, FilePosition)>, + mut handler: F, + ) -> usize + where + F: FnMut(&[u8], u64) -> Option<(LogEntry, FilePosition)>, + { + let mut pos = 0usize; + while pos < bytes.len() { + let nl_rel = match bytes[pos..].iter().position(|&b| b == b'\n') { + Some(n) => n, + None => break, + }; + let line_end = pos + nl_rel; + let line = &bytes[pos..line_end]; + let entry_end_offset = base_offset + (line_end as u64) + 1; + if !line.is_empty() + && let Some(pair) = handler(line, entry_end_offset) + { + out.push(pair); + } + pos = line_end + 1; + } + pos + } +} + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use chrono::TimeZone; + + fn ts(s: &str) -> DateTime { + DateTime::parse_from_rfc3339(s).unwrap().with_timezone(&Utc) + } + + // -- parse_jsonl --------------------------------------------------- + + #[test] + fn jsonl_parses_single_complete_line() { + let bytes = br#"{"t":"2026-04-30T20:32:59.000Z","s":"stdout","d":"hi\n","id":7} +"#; + let mut out = Vec::new(); + let consumed = ParserKind::parse_jsonl(bytes, 0, 42, &[LogSource::Stdout], None, &mut out); + assert_eq!(consumed, bytes.len()); + assert_eq!(out.len(), 1); + let (entry, pos) = &out[0]; + assert_eq!(entry.source, LogSource::Stdout); + assert_eq!(entry.session_id, Some(7)); + assert_eq!(entry.data, Bytes::from("hi\n".as_bytes())); + assert_eq!(entry.timestamp, ts("2026-04-30T20:32:59Z")); + assert_eq!(pos.generation, 42); + assert_eq!(pos.offset, bytes.len() as u64); + } + + #[test] + fn jsonl_leaves_trailing_partial_line_unconsumed() { + let mut bytes = Vec::new(); + bytes.extend_from_slice( + br#"{"t":"2026-04-30T20:32:59.000Z","s":"stdout","d":"a","id":1} +"#, + ); + let after_first = bytes.len(); + bytes.extend_from_slice(br#"{"t":"2026-04-30T20:33:00.000Z","s":"stdout","d":"b"#); + + let mut out = Vec::new(); + let consumed = ParserKind::parse_jsonl(&bytes, 0, 1, &[LogSource::Stdout], None, &mut out); + assert_eq!(consumed, after_first); + assert_eq!(out.len(), 1); + } + + #[test] + fn jsonl_filters_by_sources() { + let bytes = b"{\"t\":\"2026-04-30T00:00:00Z\",\"s\":\"stdout\",\"d\":\"a\"}\n\ + {\"t\":\"2026-04-30T00:00:01Z\",\"s\":\"stderr\",\"d\":\"b\"}\n\ + {\"t\":\"2026-04-30T00:00:02Z\",\"s\":\"system\",\"d\":\"c\"}\n"; + let mut out = Vec::new(); + ParserKind::parse_jsonl(bytes, 0, 0, &[LogSource::Stderr], None, &mut out); + assert_eq!(out.len(), 1); + assert_eq!(out[0].0.source, LogSource::Stderr); + assert_eq!(out[0].0.data, Bytes::from("b".as_bytes())); + } + + #[test] + fn jsonl_filters_by_since() { + let bytes = b"{\"t\":\"2026-04-30T00:00:00Z\",\"s\":\"stdout\",\"d\":\"early\"}\n\ + {\"t\":\"2026-04-30T00:00:05Z\",\"s\":\"stdout\",\"d\":\"late\"}\n"; + let mut out = Vec::new(); + let since = Some(ts("2026-04-30T00:00:03Z")); + ParserKind::parse_jsonl(bytes, 0, 0, &[LogSource::Stdout], since, &mut out); + assert_eq!(out.len(), 1); + assert_eq!(out[0].0.data, Bytes::from("late".as_bytes())); + } + + #[test] + fn jsonl_skips_malformed_lines() { + let bytes = b"this is not json\n\ + {\"t\":\"2026-04-30T00:00:00Z\",\"s\":\"stdout\",\"d\":\"ok\"}\n\ + {malformed json\n"; + let mut out = Vec::new(); + ParserKind::parse_jsonl(bytes, 0, 0, &[LogSource::Stdout], None, &mut out); + assert_eq!(out.len(), 1); + assert_eq!(out[0].0.data, Bytes::from("ok".as_bytes())); + } + + #[test] + fn jsonl_skips_unknown_source_values() { + let bytes = b"{\"t\":\"2026-04-30T00:00:00Z\",\"s\":\"alien\",\"d\":\"x\"}\n"; + let mut out = Vec::new(); + ParserKind::parse_jsonl(bytes, 0, 0, &[LogSource::Stdout], None, &mut out); + assert!(out.is_empty()); + } + + #[test] + fn jsonl_decodes_base64_body_when_tagged() { + let bytes = + b"{\"t\":\"2026-04-30T00:00:00Z\",\"s\":\"stdout\",\"d\":\"aGVsbG8=\",\"e\":\"b64\"}\n"; + let mut out = Vec::new(); + ParserKind::parse_jsonl(bytes, 0, 0, &[LogSource::Stdout], None, &mut out); + assert_eq!(out.len(), 1); + assert_eq!(out[0].0.data, Bytes::from("hello".as_bytes())); + } + + #[test] + fn jsonl_position_offsets_are_relative_to_base() { + let bytes = b"{\"t\":\"2026-04-30T00:00:00Z\",\"s\":\"stdout\",\"d\":\"a\"}\n\ + {\"t\":\"2026-04-30T00:00:01Z\",\"s\":\"stdout\",\"d\":\"b\"}\n"; + let mut out = Vec::new(); + ParserKind::parse_jsonl(bytes, 1000, 99, &[LogSource::Stdout], None, &mut out); + assert_eq!(out.len(), 2); + assert_eq!(out[0].1.generation, 99); + assert!(out[0].1.offset > 1000); + assert_eq!(out[1].1.offset, 1000 + bytes.len() as u64); + } + + #[test] + fn jsonl_skips_empty_lines() { + let bytes = b"\n\ + {\"t\":\"2026-04-30T00:00:00Z\",\"s\":\"stdout\",\"d\":\"a\"}\n\ + \n"; + let mut out = Vec::new(); + ParserKind::parse_jsonl(bytes, 0, 0, &[LogSource::Stdout], None, &mut out); + assert_eq!(out.len(), 1); + } + + // -- parse_text ---------------------------------------------------- + + #[test] + fn text_parses_lines_with_rfc3339_prefix() { + let bytes = b"2026-04-30T20:30:00.000Z INFO starting up\n\ + 2026-04-30T20:30:01.000Z WARN something\n"; + let mut out = Vec::new(); + let mtime = Utc.with_ymd_and_hms(2030, 1, 1, 0, 0, 0).unwrap(); + ParserKind::parse_text(bytes, 0, 7, mtime, None, &mut out); + assert_eq!(out.len(), 2); + assert_eq!(out[0].0.source, LogSource::System); + assert_eq!(out[0].0.timestamp, ts("2026-04-30T20:30:00Z")); + assert_eq!(out[0].0.data, Bytes::from("INFO starting up\n".as_bytes())); + assert_eq!(out[0].1.generation, 7); + } + + #[test] + fn text_falls_back_to_mtime_without_timestamp_prefix() { + let bytes = b"[ 0.123] kernel boot message\n"; + let mut out = Vec::new(); + let mtime = ts("2026-01-15T10:00:00Z"); + ParserKind::parse_text(bytes, 0, 0, mtime, None, &mut out); + assert_eq!(out.len(), 1); + assert_eq!(out[0].0.timestamp, mtime); + } + + #[test] + fn text_replaces_invalid_utf8_lossily() { + let bytes = b"2026-04-30T20:30:00.000Z bad \xff line\n"; + let mut out = Vec::new(); + ParserKind::parse_text(bytes, 0, 0, Utc::now(), None, &mut out); + assert_eq!(out.len(), 1); + assert_eq!( + std::str::from_utf8(&out[0].0.data).unwrap(), + "bad \u{fffd} line\n" + ); + } + + #[test] + fn text_strips_ansi_escapes() { + let bytes = b"\x1b[31m2026-04-30T20:30:00.000Z\x1b[0m ERROR boom\n"; + let mut out = Vec::new(); + let mtime = Utc::now(); + ParserKind::parse_text(bytes, 0, 0, mtime, None, &mut out); + assert_eq!(out.len(), 1); + assert_eq!(out[0].0.timestamp, ts("2026-04-30T20:30:00Z")); + } + + #[test] + fn text_filters_by_since() { + let bytes = b"2026-04-30T00:00:00Z early line\n\ + 2026-04-30T00:00:10Z late line\n"; + let mut out = Vec::new(); + let mtime = Utc::now(); + let since = Some(ts("2026-04-30T00:00:05Z")); + ParserKind::parse_text(bytes, 0, 0, mtime, since, &mut out); + assert_eq!(out.len(), 1); + assert!( + std::str::from_utf8(&out[0].0.data) + .unwrap() + .contains("late line") + ); + } + + #[test] + fn text_leaves_partial_trailing_line_unconsumed() { + let mut bytes = Vec::new(); + bytes.extend_from_slice(b"2026-04-30T00:00:00Z complete\n"); + let after_first = bytes.len(); + bytes.extend_from_slice(b"2026-04-30T00:00:01Z partial"); + + let mut out = Vec::new(); + let consumed = ParserKind::parse_text(&bytes, 0, 0, Utc::now(), None, &mut out); + assert_eq!(consumed, after_first); + assert_eq!(out.len(), 1); + } + + #[test] + fn text_always_emits_system_source() { + let bytes = b"hello world\n"; + let mut out = Vec::new(); + ParserKind::parse_text(bytes, 0, 0, Utc::now(), None, &mut out); + assert_eq!(out.len(), 1); + assert_eq!(out[0].0.source, LogSource::System); + assert_eq!(out[0].0.session_id, None); + } + + #[test] + fn text_position_offsets_are_relative_to_base() { + let bytes = b"2026-04-30T00:00:00Z first\n2026-04-30T00:00:01Z second\n"; + let mut out = Vec::new(); + ParserKind::parse_text(bytes, 500, 99, Utc::now(), None, &mut out); + assert_eq!(out.len(), 2); + assert_eq!(out[0].1.generation, 99); + assert!(out[0].1.offset > 500); + assert_eq!(out[1].1.offset, 500 + bytes.len() as u64); + } + + // -- RawEntry::decode_body ----------------------------------------- + + #[test] + fn decode_body_returns_plain_bytes_without_base64_tag() { + let raw = RawEntry { + t: "x".into(), + s: "stdout".into(), + d: "hello".into(), + id: None, + e: None, + }; + assert_eq!(raw.decode_body(), Bytes::from("hello".as_bytes())); + } + + #[test] + fn decode_body_base64_decodes_when_tagged_b64() { + let raw = RawEntry { + t: "x".into(), + s: "stdout".into(), + d: "aGVsbG8=".into(), + id: None, + e: Some("b64".into()), + }; + assert_eq!(raw.decode_body(), Bytes::from("hello".as_bytes())); + } + + #[test] + fn decode_body_falls_back_to_raw_on_invalid_base64() { + let raw = RawEntry { + t: "x".into(), + s: "stdout".into(), + d: "not valid b64!".into(), + id: None, + e: Some("b64".into()), + }; + assert_eq!(raw.decode_body(), Bytes::from("not valid b64!".as_bytes())); + } +} diff --git a/crates/microsandbox/lib/logs/stream.rs b/crates/microsandbox/lib/logs/stream.rs new file mode 100644 index 000000000..3664f4bbc --- /dev/null +++ b/crates/microsandbox/lib/logs/stream.rs @@ -0,0 +1,934 @@ +//! Streaming engine. The [`LogEngine`] takes a +//! `&[LogFileConfig]` describing the streamable files, filters them +//! by the caller's requested sources, opens a reader for each +//! surviving file, and merges their outputs into one async +//! [`futures::Stream`] of [`LogEntry`] values — with optional +//! follow semantics and per-source cursor resume. + +use std::collections::VecDeque; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; + +use chrono::{DateTime, Utc}; +use futures::Stream; +use futures::stream; +use notify::Watcher; + +use crate::{MicrosandboxError, MicrosandboxResult}; + +use super::cursor::{FilePosition, LogCursor}; +use super::parser::{FileHandle, ParsedChunk, ParserKind}; +use super::types::{LogEntry, LogSource}; + +//-------------------------------------------------------------------------------------------------- +// Constants +//-------------------------------------------------------------------------------------------------- + +const FALLBACK_POLL_INTERVAL: Duration = Duration::from_secs(2); + +//-------------------------------------------------------------------------------------------------- +// LogFileConfig +//-------------------------------------------------------------------------------------------------- + +/// On-disk format of a log file. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum LogFileFormat { + /// JSON Lines, one record per line. + Jsonl, + /// Plain text, one entry per line, RFC 3339 timestamp prefix + /// optional. All entries surface as [`LogSource::System`]. + Text, +} + +/// Static description of one streamable log file. The engine takes +/// a `&[LogFileConfig]` at construction time and builds one reader +/// per entry whose `produces` intersects the caller's requested +/// sources. +pub(crate) struct LogFileConfig { + /// Base filename within the directory passed to + /// [`super::log_stream`]. + pub(crate) filename: &'static str, + /// On-disk format. + pub(crate) format: LogFileFormat, + /// Highest rotation index. `0` means non-rotating — the file at + /// the bare name is the only one ever read; on inode change + /// the reader silently resets to the new file. + pub(crate) max_rotation_index: usize, + /// Which [`LogSource`] values entries from this file may + /// carry. Used at stream construction to decide whether to + /// open a reader for the file at all. + pub(crate) produces: &'static [LogSource], +} + +//-------------------------------------------------------------------------------------------------- +// RotationConfig +//-------------------------------------------------------------------------------------------------- + +/// Per-reader rotation policy. `max_index == 0` denotes a +/// non-rotating file. `max_index > 0` enables the chain walker. +struct RotationConfig { + log_dir: PathBuf, + filename: String, + max_index: usize, +} + +impl RotationConfig { + fn rotates(&self) -> bool { + self.max_index > 0 + } + + /// Resolve the on-disk path for a rotation index. `0` is the + /// live file (bare filename), `N>0` is `.N`. + fn path(&self, index: usize) -> PathBuf { + if index == 0 { + self.log_dir.join(&self.filename) + } else { + self.log_dir.join(format!("{}.{index}", self.filename)) + } + } + + /// Find which rotation slot currently holds the file with the + /// given inode generation, or `None` if it has rotated out of + /// the retention window. + async fn find_index(&self, wanted: u64) -> Option { + for index in 0..=self.max_index { + if let Some(found) = FileHandle::generation_of_path(&self.path(index)).await + && found == wanted + { + return Some(index); + } + } + None + } + + /// Append every existing rotated file (`.max_index` → `.1`) to + /// `backfill` in oldest-first order so the reader drains + /// chronologically before reaching the live file. + async fn queue_all_rotated( + &self, + backfill: &mut VecDeque, + ) -> MicrosandboxResult<()> { + for index in (1..=self.max_index).rev() { + if let Some(src) = FileHandle::open(&self.path(index)).await? { + backfill.push_back(src); + } + } + Ok(()) + } +} + +//-------------------------------------------------------------------------------------------------- +// LogStreamStart +//-------------------------------------------------------------------------------------------------- + +/// Where a [`super::log_stream`] starts emitting from. +#[derive(Debug, Clone, Default)] +#[non_exhaustive] +pub enum LogStreamStart { + /// Begin at the oldest available entry across the rotation + /// chain. With `System` in sources, plain-text files like + /// `runtime.log` / `kernel.log` are also drained from their + /// start. + #[default] + Beginning, + + /// Begin at the first entry whose timestamp is at or after this + /// instant. + Since(DateTime), + + /// Resume each source from the per-source position recorded in + /// this cursor. + /// + /// Returns [`MicrosandboxError::InvalidCursor`] upfront if any + /// rotating file's generation can no longer be located in the + /// current rotation chain. + From(LogCursor), +} + +//-------------------------------------------------------------------------------------------------- +// LogStreamOptions +//-------------------------------------------------------------------------------------------------- + +/// Options for [`super::log_stream`]. +#[derive(Debug, Clone, Default)] +pub struct LogStreamOptions { + /// Sources to include. If empty, defaults to + /// `Stdout` + `Stderr` + `Output`. Include `System` to also + /// interleave plain-text system files (and `system`-tagged + /// JSONL entries) into the stream. + pub sources: Vec, + + /// Where to begin emitting from. + pub start: LogStreamStart, + + /// Optional upper bound on entry timestamp. The stream ends as + /// soon as an entry with timestamp at or after this is observed + /// (without emitting it). + pub until: Option>, + + /// If `true`, the stream stays open past current EOF on each + /// source and yields new entries as the producers append. If + /// `false`, the stream ends once the historical content has + /// been drained. + pub follow: bool, +} + +//-------------------------------------------------------------------------------------------------- +// LogEngine +//-------------------------------------------------------------------------------------------------- + +/// Outcome of one engine step: keep going, end the stream cleanly, +/// or surface a terminal error to the consumer. +enum StepResult { + Continue, + Terminate, + Error(MicrosandboxError), +} + +/// One reader plus its most-recently-emitted [`FilePosition`]. The +/// position is folded into each entry's [`LogCursor`] so consumers +/// can resume per-source. +struct ReaderState { + reader: Reader, + last_position: FilePosition, +} + +struct PositionedLogEntry { + entry: LogEntry, + reader_idx: usize, + position: FilePosition, +} + +pub(crate) struct LogEngine { + since: Option>, + until: Option>, + follow: bool, + + readers: Vec, + initial_positions: Vec, + pending: VecDeque, + + _watcher: Option, + event_rx: Option>, + + finished: bool, +} + +impl LogEngine { + /// Open one reader per [`LogFileConfig`] whose `produces` set + /// intersects `sources`. When `follow` is true, also subscribe + /// to filesystem change events on `log_dir` so [`step`] can wake + /// promptly on new writes. + pub(crate) async fn new( + log_dir: PathBuf, + log_files: &'static [LogFileConfig], + sources: Vec, + start: &LogStreamStart, + until: Option>, + follow: bool, + ) -> MicrosandboxResult { + let since = match start { + LogStreamStart::Since(t) => Some(*t), + _ => None, + }; + + let selected: Vec<&LogFileConfig> = log_files + .iter() + .filter(|c| c.produces.iter().any(|s| sources.contains(s))) + .collect(); + + let mut readers = Vec::with_capacity(selected.len()); + let mut initial_positions = Vec::with_capacity(selected.len()); + for (idx, config) in selected.iter().enumerate() { + let initial_position = match start { + LogStreamStart::From(c) => c.positions.get(idx).copied().unwrap_or_default(), + _ => FilePosition::default(), + }; + let reader = Reader::open(config, &log_dir, &sources, start, initial_position).await?; + initial_positions.push(initial_position); + readers.push(ReaderState { + reader, + last_position: initial_position, + }); + } + + let (watcher, event_rx) = if follow { + let (w, rx) = Self::build_watcher(&log_dir)?; + (Some(w), Some(rx)) + } else { + (None, None) + }; + + Ok(Self { + since, + until, + follow, + readers, + initial_positions, + pending: VecDeque::new(), + _watcher: watcher, + event_rx, + finished: false, + }) + } + + /// Snapshot the watermark across every active source as a + /// [`LogCursor`]. Called per emitted entry so the cursor on + /// that entry reflects the complete cross-source state at emit + /// time. + fn snapshot_cursor(&self) -> LogCursor { + LogCursor { + positions: self.readers.iter().map(|r| r.last_position).collect(), + } + } + + pub(crate) async fn drain_sorted_snapshot( + mut self, + ) -> MicrosandboxResult<(Vec, LogCursor)> { + let mut entries = Vec::new(); + loop { + while let Some(entry) = self.pending.pop_front() { + entries.push(entry); + } + if self.finished { + break; + } + match self.step().await { + StepResult::Continue => continue, + StepResult::Terminate => break, + StepResult::Error(e) => return Err(e), + } + } + let end_cursor = self.snapshot_cursor(); + entries.sort_by_key(|e| e.entry.timestamp); + + let mut positions = self.initial_positions.clone(); + for entry in &mut entries { + positions[entry.reader_idx] = entry.position; + entry.entry.cursor = LogCursor { + positions: positions.clone(), + }; + } + + Ok((entries.into_iter().map(|e| e.entry).collect(), end_cursor)) + } + + /// Drive every reader once: parse any newly-available entries, + /// stamp them with a fresh cursor, and queue them in `pending`. + /// If nothing parsed and `follow` is on, await a filesystem + /// event (or a poll fallback) before signaling `Continue`. + async fn step(&mut self) -> StepResult { + let mut any_progress = false; + + for idx in 0..self.readers.len() { + let since = self.since; + let parsed = match self.readers[idx].reader.read_chunk(since).await { + Ok(p) => p, + Err(e) => return StepResult::Error(e), + }; + if parsed.position.is_some() || !parsed.entries.is_empty() { + any_progress = true; + } + for (mut entry, position) in parsed.entries { + self.readers[idx].last_position = position; + entry.cursor = self.snapshot_cursor(); + self.pending.push_back(PositionedLogEntry { + entry, + reader_idx: idx, + position, + }); + } + if let Some(position) = parsed.position { + self.readers[idx].last_position = position; + } + } + + if any_progress { + return StepResult::Continue; + } + + if !self.follow { + return StepResult::Terminate; + } + + let Some(rx) = self.event_rx.as_mut() else { + return StepResult::Terminate; + }; + tokio::select! { + _ = rx.recv() => StepResult::Continue, + _ = tokio::time::sleep(FALLBACK_POLL_INTERVAL) => StepResult::Continue, + } + } + + /// Wire up a non-recursive [`notify`] watcher on `log_dir` and + /// return its event receiver. The watcher's callback runs on a + /// background thread; we forward only the wake signal so the + /// async [`step`] loop can park efficiently between writes. + fn build_watcher( + log_dir: &Path, + ) -> MicrosandboxResult<( + notify::RecommendedWatcher, + tokio::sync::mpsc::UnboundedReceiver<()>, + )> { + let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); + let tx = Arc::new(tx); + let tx_clone = Arc::clone(&tx); + let mut watcher = notify::recommended_watcher(move |res: notify::Result| { + if let Ok(event) = res { + use notify::EventKind; + if matches!( + event.kind, + EventKind::Modify(_) | EventKind::Create(_) | EventKind::Remove(_) + ) { + let _ = tx_clone.send(()); + } + } + }) + .map_err(|e| MicrosandboxError::Custom(format!("log watcher init failed: {e}")))?; + + watcher + .watch(log_dir, notify::RecursiveMode::NonRecursive) + .map_err(|e| MicrosandboxError::Custom(format!("log watcher subscribe failed: {e}")))?; + + Ok((watcher, rx)) + } + + /// Consume the engine and yield its entries as a [`futures::Stream`]. + /// Drains `pending` first, then drives [`step`] until termination; + /// on error the stream yields one `Err` and ends. + pub(crate) fn into_stream( + self, + ) -> impl Stream> + Send + 'static + use<> { + stream::unfold(self, |mut state| async move { + loop { + if let Some(positioned) = state.pending.pop_front() { + if let Some(until) = state.until + && positioned.entry.timestamp >= until + { + return None; + } + return Some((Ok(positioned.entry), state)); + } + if state.finished { + return None; + } + match state.step().await { + StepResult::Continue => continue, + StepResult::Terminate => return None, + StepResult::Error(e) => { + state.finished = true; + return Some((Err(e), state)); + } + } + } + }) + } +} + +//-------------------------------------------------------------------------------------------------- +// Reader +//-------------------------------------------------------------------------------------------------- + +struct Reader { + primary_path: PathBuf, + parser: ParserKind, + rotation: RotationConfig, + backfill: VecDeque, + live: Option, + initial_live_offset: u64, +} + +impl Reader { + /// Prepare a reader for one log file. For rotating files we + /// pre-queue the rotated siblings (oldest-first) when starting + /// from `Beginning`, or walk the rotation chain to locate the + /// cursor's generation when resuming from a [`LogCursor`]. + async fn open( + config: &LogFileConfig, + log_dir: &Path, + sources: &[LogSource], + start: &LogStreamStart, + initial_position: FilePosition, + ) -> MicrosandboxResult { + let rotation = RotationConfig { + log_dir: log_dir.to_path_buf(), + filename: config.filename.to_string(), + max_index: config.max_rotation_index, + }; + let primary_path = rotation.path(0); + let parser = match config.format { + LogFileFormat::Jsonl => ParserKind::Jsonl { + sources: sources.to_vec(), + }, + LogFileFormat::Text => ParserKind::Text, + }; + + let mut backfill = VecDeque::new(); + let mut initial_live_offset = 0u64; + + if rotation.rotates() { + let resuming_from_cursor = matches!(start, LogStreamStart::From(_)) + && initial_position != FilePosition::default(); + if resuming_from_cursor { + let Some(index) = rotation.find_index(initial_position.generation).await else { + return Err(MicrosandboxError::InvalidCursor(format!( + "{} generation {} is not in the current rotation chain", + config.filename, initial_position.generation + ))); + }; + if index == 0 { + initial_live_offset = initial_position.offset; + } else { + if let Some(mut src) = FileHandle::open(&rotation.path(index)).await? { + src.offset = initial_position.offset; + backfill.push_back(src); + } + for i in (1..index).rev() { + if let Some(src) = FileHandle::open(&rotation.path(i)).await? { + backfill.push_back(src); + } + } + } + } else { + rotation.queue_all_rotated(&mut backfill).await?; + } + } else { + initial_live_offset = initial_position.offset; + } + + Ok(Self { + primary_path, + parser, + rotation, + backfill, + live: None, + initial_live_offset, + }) + } + + /// Read whatever's available now: drain any queued rotated + /// files first, then read from the live file, then check for + /// rotation and reseat the live FD if the inode changed. + /// Returns an empty `Vec` when nothing new is on disk yet. + async fn read_chunk( + &mut self, + since: Option>, + ) -> MicrosandboxResult { + while let Some(src) = self.backfill.front_mut() { + let chunk = self + .parser + .parse_from(src, &self.primary_path, since) + .await?; + if chunk.position.is_some() || !chunk.entries.is_empty() { + return Ok(chunk); + } + self.backfill.pop_front(); + } + + if self.live.is_none() { + let Some(mut src) = FileHandle::open(&self.primary_path).await? else { + return Ok(ParsedChunk { + entries: Vec::new(), + position: None, + }); + }; + src.offset = self.initial_live_offset; + self.live = Some(src); + } + + let live = self.live.as_mut().unwrap(); + let chunk = self + .parser + .parse_from(live, &self.primary_path, since) + .await?; + if chunk.position.is_some() || !chunk.entries.is_empty() { + return Ok(chunk); + } + + let current_inode = match FileHandle::generation_of_path(&self.primary_path).await { + Some(g) => g, + None => { + return Ok(ParsedChunk { + entries: Vec::new(), + position: None, + }); + } + }; + if current_inode == live.inode { + return Ok(ParsedChunk { + entries: Vec::new(), + position: None, + }); + } + + if self.rotation.rotates() { + self.handle_rotation().await + } else { + self.replace_live().await + } + } + + /// Live inode changed: locate where the formerly-live file + /// rotated to, queue any intermediate files that appeared + /// (rotated past in between reads), and open the new live FD. + /// Returns [`MicrosandboxError::MissedRotation`] when the file + /// has rotated past the retention window. + async fn handle_rotation(&mut self) -> MicrosandboxResult { + let live = self + .live + .as_ref() + .expect("handle_rotation called with no live"); + let live_inode = live.inode; + let live_offset = live.offset; + let Some(rotation_index) = self.rotation.find_index(live_inode).await else { + return Err(MicrosandboxError::MissedRotation { + dropped_from_offset: live_offset, + }); + }; + let mut newly_queued: Vec = Vec::new(); + for i in (1..rotation_index).rev() { + if let Some(src) = FileHandle::open(&self.rotation.path(i)).await? { + newly_queued.push(src); + } + } + for src in newly_queued { + self.backfill.push_back(src); + } + if let Some(new_live) = FileHandle::open(&self.primary_path).await? { + self.live = Some(new_live); + } + Ok(ParsedChunk { + entries: Vec::new(), + position: None, + }) + } + + /// Non-rotating file's inode changed (e.g. truncate + rewrite): + /// reopen the live FD against the new inode. Any data on the + /// previous inode that wasn't read is lost by design — this + /// case is reserved for files that don't promise retention. + async fn replace_live(&mut self) -> MicrosandboxResult { + if let Some(src) = FileHandle::open(&self.primary_path).await? { + self.live = Some(src); + } + Ok(ParsedChunk { + entries: Vec::new(), + position: None, + }) + } +} + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use bytes::Bytes; + use futures::StreamExt; + + const LOG_FILES: &[LogFileConfig] = &[ + LogFileConfig { + filename: "exec.log", + format: LogFileFormat::Jsonl, + max_rotation_index: 4, + produces: &[ + LogSource::Stdout, + LogSource::Stderr, + LogSource::Output, + LogSource::System, + ], + }, + LogFileConfig { + filename: "runtime.log", + format: LogFileFormat::Text, + max_rotation_index: 0, + produces: &[LogSource::System], + }, + LogFileConfig { + filename: "kernel.log", + format: LogFileFormat::Text, + max_rotation_index: 0, + produces: &[LogSource::System], + }, + ]; + + fn make_jsonl_line(ts: &str, source: &str, data: &str, id: Option) -> String { + match id { + Some(id) => format!(r#"{{"t":"{ts}","s":"{source}","d":"{data}","id":{id}}}"#), + None => format!(r#"{{"t":"{ts}","s":"{source}","d":"{data}"}}"#), + } + } + + fn write_lines(path: &Path, lines: &[String]) { + let mut blob = String::new(); + for l in lines { + blob.push_str(l); + blob.push('\n'); + } + std::fs::write(path, blob).unwrap(); + } + + fn make_dir_with_exec(lines: &[String]) -> tempfile::TempDir { + let dir = tempfile::tempdir().unwrap(); + write_lines(&dir.path().join("exec.log"), lines); + dir + } + + async fn make_state( + log_dir: PathBuf, + opts: &LogStreamOptions, + ) -> MicrosandboxResult { + let sources = LogSource::effective(&opts.sources); + LogEngine::new( + log_dir, + LOG_FILES, + sources, + &opts.start, + opts.until, + opts.follow, + ) + .await + } + + async fn collect_with_timeout( + stream: S, + timeout: Duration, + ) -> Vec> + where + S: Stream> + Unpin, + { + let mut out = Vec::new(); + let mut s = stream; + let deadline = tokio::time::Instant::now() + timeout; + loop { + let now = tokio::time::Instant::now(); + if now >= deadline { + break; + } + let remaining = deadline - now; + match tokio::time::timeout(remaining, s.next()).await { + Ok(Some(item)) => out.push(item), + Ok(None) => break, + Err(_) => break, + } + } + out + } + + #[tokio::test] + async fn backfill_without_follow_drains_and_ends() { + let lines = vec![ + make_jsonl_line("2026-04-30T20:32:59.000Z", "stdout", "a", Some(1)), + make_jsonl_line("2026-04-30T20:33:00.000Z", "stdout", "b", Some(1)), + ]; + let dir = make_dir_with_exec(&lines); + let state = make_state( + dir.path().to_path_buf(), + &LogStreamOptions { + follow: false, + ..Default::default() + }, + ) + .await + .unwrap(); + let items = + collect_with_timeout(Box::pin(state.into_stream()), Duration::from_secs(2)).await; + assert_eq!(items.len(), 2); + let entries: Vec<_> = items.into_iter().map(|r| r.unwrap()).collect(); + assert_eq!(entries[0].data, Bytes::from("a".as_bytes())); + assert_eq!(entries[1].data, Bytes::from("b".as_bytes())); + assert!(entries[1].cursor.positions[0].offset > entries[0].cursor.positions[0].offset); + } + + #[tokio::test] + async fn follow_picks_up_new_appends() { + let initial = vec![make_jsonl_line( + "2026-04-30T20:32:59.000Z", + "stdout", + "first", + Some(1), + )]; + let dir = make_dir_with_exec(&initial); + let path = dir.path().to_path_buf(); + let state = make_state( + path.clone(), + &LogStreamOptions { + follow: true, + ..Default::default() + }, + ) + .await + .unwrap(); + let stream = state.into_stream(); + tokio::pin!(stream); + + let first = tokio::time::timeout(Duration::from_secs(3), stream.next()) + .await + .expect("first entry within 3s") + .expect("stream not ended") + .expect("not an error"); + assert_eq!(first.data, Bytes::from("first".as_bytes())); + + let mut blob = String::new(); + for l in &initial { + blob.push_str(l); + blob.push('\n'); + } + blob.push_str(&make_jsonl_line( + "2026-04-30T20:33:00.000Z", + "stdout", + "second", + Some(1), + )); + blob.push('\n'); + std::fs::write(path.join("exec.log"), blob).unwrap(); + + let second = tokio::time::timeout(Duration::from_secs(5), stream.next()) + .await + .expect("second entry within 5s") + .expect("stream not ended") + .expect("not an error"); + assert_eq!(second.data, Bytes::from("second".as_bytes())); + } + + #[tokio::test] + async fn resume_from_cursor_starts_after_that_entry() { + let lines = vec![ + make_jsonl_line("2026-04-30T20:32:59.000Z", "stdout", "a", Some(1)), + make_jsonl_line("2026-04-30T20:33:00.000Z", "stdout", "b", Some(1)), + make_jsonl_line("2026-04-30T20:33:01.000Z", "stdout", "c", Some(1)), + ]; + let dir = make_dir_with_exec(&lines); + let state = make_state(dir.path().to_path_buf(), &LogStreamOptions::default()) + .await + .unwrap(); + let items = + collect_with_timeout(Box::pin(state.into_stream()), Duration::from_secs(2)).await; + let entries: Vec<_> = items.into_iter().map(|r| r.unwrap()).collect(); + assert_eq!(entries.len(), 3); + let cursor_after_b = entries[1].cursor.clone(); + + let state2 = make_state( + dir.path().to_path_buf(), + &LogStreamOptions { + start: LogStreamStart::From(cursor_after_b), + ..Default::default() + }, + ) + .await + .unwrap(); + let items2 = + collect_with_timeout(Box::pin(state2.into_stream()), Duration::from_secs(2)).await; + let entries2: Vec<_> = items2.into_iter().map(|r| r.unwrap()).collect(); + assert_eq!(entries2.len(), 1); + assert_eq!(entries2[0].data, Bytes::from("c".as_bytes())); + } + + #[tokio::test] + async fn invalid_cursor_yields_error_upfront() { + let dir = make_dir_with_exec(&[make_jsonl_line( + "2026-04-30T20:32:59.000Z", + "stdout", + "a", + Some(1), + )]); + let bogus = LogCursor { + positions: vec![FilePosition { + generation: 0xffff_ffff_ffff_ffff, + offset: 0, + }], + }; + let result = make_state( + dir.path().to_path_buf(), + &LogStreamOptions { + start: LogStreamStart::From(bogus), + ..Default::default() + }, + ) + .await; + match result { + Err(MicrosandboxError::InvalidCursor(_)) => {} + Err(other) => panic!("expected InvalidCursor, got: {other:?}"), + Ok(_) => panic!("expected InvalidCursor, got Ok state"), + } + } + + #[tokio::test] + async fn drains_rotated_files_in_chronological_order() { + let dir = tempfile::tempdir().unwrap(); + write_lines( + &dir.path().join("exec.log.1"), + &[make_jsonl_line( + "2026-04-30T20:30:00.000Z", + "stdout", + "old", + Some(1), + )], + ); + write_lines( + &dir.path().join("exec.log"), + &[make_jsonl_line( + "2026-04-30T20:33:00.000Z", + "stdout", + "new", + Some(1), + )], + ); + let state = make_state(dir.path().to_path_buf(), &LogStreamOptions::default()) + .await + .unwrap(); + let items = + collect_with_timeout(Box::pin(state.into_stream()), Duration::from_secs(2)).await; + let entries: Vec<_> = items.into_iter().map(|r| r.unwrap()).collect(); + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].data, Bytes::from("old".as_bytes())); + assert_eq!(entries[1].data, Bytes::from("new".as_bytes())); + } + + #[tokio::test] + async fn includes_runtime_log_when_system_requested() { + let dir = tempfile::tempdir().unwrap(); + write_lines( + &dir.path().join("exec.log"), + &[make_jsonl_line( + "2026-04-30T20:33:00.000Z", + "stdout", + "user-output", + Some(1), + )], + ); + write_lines( + &dir.path().join("runtime.log"), + &["2026-04-30T20:30:00.000Z INFO starting up".to_string()], + ); + let state = make_state( + dir.path().to_path_buf(), + &LogStreamOptions { + sources: vec![LogSource::Stdout, LogSource::System], + follow: false, + ..Default::default() + }, + ) + .await + .unwrap(); + let items = + collect_with_timeout(Box::pin(state.into_stream()), Duration::from_secs(2)).await; + let entries: Vec<_> = items.into_iter().map(|r| r.unwrap()).collect(); + assert!( + entries + .iter() + .any(|e| e.data == Bytes::from("user-output".as_bytes())), + "missing user-output entry: {entries:?}" + ); + assert!( + entries.iter().any(|e| e.source == LogSource::System + && std::str::from_utf8(&e.data) + .unwrap_or("") + .contains("starting up")), + "missing runtime.log system entry: {entries:?}" + ); + } +} diff --git a/crates/microsandbox/lib/logs/types.rs b/crates/microsandbox/lib/logs/types.rs new file mode 100644 index 000000000..ebdcdca47 --- /dev/null +++ b/crates/microsandbox/lib/logs/types.rs @@ -0,0 +1,170 @@ +//! Public value types: source tags, the entry shape, and the +//! snapshot filter options. + +use bytes::Bytes; +use chrono::{DateTime, Utc}; + +use super::cursor::LogCursor; + +//-------------------------------------------------------------------------------------------------- +// LogSource +//-------------------------------------------------------------------------------------------------- + +/// Source tag on a captured log entry. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum LogSource { + /// Captured from a session's stdout (pipe mode). + Stdout, + + /// Captured from a session's stderr (pipe mode). + Stderr, + + /// Captured from a session in pty mode (stdout + stderr merged + /// at the kernel level inside the guest arrive as a single + /// stream — tagged `output` rather than pretending to be + /// `stdout`). + Output, + + /// Synthetic system entry: lifecycle markers, runtime + /// diagnostics, kernel console output. + System, +} + +impl LogSource { + /// Apply the empty-means-default rule used by both + /// [`read_logs`](super::read_logs) and + /// [`log_stream`](super::log_stream): if `requested` is empty, + /// return the default user-program sources + /// (`Stdout` + `Stderr` + `Output`); otherwise return a + /// sorted, deduplicated copy of `requested`. + pub(crate) fn effective(requested: &[Self]) -> Vec { + if requested.is_empty() { + vec![Self::Stdout, Self::Stderr, Self::Output] + } else { + let mut s = requested.to_vec(); + s.sort_by_key(|src| match src { + Self::Stdout => 0, + Self::Stderr => 1, + Self::Output => 2, + Self::System => 3, + }); + s.dedup(); + s + } + } +} + +//-------------------------------------------------------------------------------------------------- +// LogEntry +//-------------------------------------------------------------------------------------------------- + +/// A single captured log entry. +#[derive(Debug, Clone)] +pub struct LogEntry { + /// Wall-clock time the chunk was captured by the host. + pub timestamp: DateTime, + + /// Where the chunk came from. + pub source: LogSource, + + /// Per-session identifier. Set for entries captured from an + /// exec session; `None` for `system` entries which are not + /// tied to a specific session. + pub session_id: Option, + + /// Decoded body bytes. UTF-8 lossy by default; if the underlying + /// chunk was raw-mode base64, this is the decoded raw bytes. + pub data: Bytes, + + /// Opaque per-source resume handle. Snapshot of the positions + /// each source has reached after emitting this entry. Pass to + /// [`LogStreamStart::From`](super::LogStreamStart::From) to + /// resume; each source picks up independently from its + /// captured position. + pub cursor: LogCursor, +} + +//-------------------------------------------------------------------------------------------------- +// LogOptions +//-------------------------------------------------------------------------------------------------- + +/// Filters applied when reading historical log entries. +#[derive(Debug, Clone, Default)] +pub struct LogOptions { + /// Show only the last N entries after all other filters apply. + pub tail: Option, + + /// Inclusive lower bound on entry timestamp. + pub since: Option>, + + /// Exclusive upper bound on entry timestamp. + pub until: Option>, + + /// Sources to include. If empty, defaults to + /// `Stdout` + `Stderr` + `Output`. + pub sources: Vec, +} + +impl LogOptions { + /// Apply `since`, `until`, and `tail` filters to `entries` + /// in place, in that order. + pub(crate) fn apply_to(&self, entries: &mut Vec) { + if let Some(s) = self.since { + entries.retain(|e| e.timestamp >= s); + } + if let Some(u) = self.until { + entries.retain(|e| e.timestamp < u); + } + if let Some(n) = self.tail + && entries.len() > n + { + let drop = entries.len() - n; + entries.drain(0..drop); + } + } +} + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_sources_are_user_program_output() { + let s = LogSource::effective(&[]); + assert_eq!( + s, + vec![LogSource::Stdout, LogSource::Stderr, LogSource::Output] + ); + } + + #[test] + fn explicit_sources_used_when_set() { + let s = LogSource::effective(&[LogSource::System]); + assert_eq!(s, vec![LogSource::System]); + } + + #[test] + fn apply_filters_tail() { + let mut entries = (0..5) + .map(|i| LogEntry { + timestamp: DateTime::parse_from_rfc3339(&format!("2026-04-30T00:00:0{i}.000Z")) + .unwrap() + .with_timezone(&Utc), + source: LogSource::Stdout, + session_id: Some(1u64), + data: Bytes::from(format!("line {i}").into_bytes()), + cursor: LogCursor::empty(), + }) + .collect::>(); + LogOptions { + tail: Some(2), + ..Default::default() + } + .apply_to(&mut entries); + assert_eq!(entries.len(), 2); + } +} diff --git a/crates/microsandbox/lib/sandbox/config.rs b/crates/microsandbox/lib/sandbox/config.rs index daecc0584..4b380f70d 100644 --- a/crates/microsandbox/lib/sandbox/config.rs +++ b/crates/microsandbox/lib/sandbox/config.rs @@ -412,11 +412,9 @@ impl Default for SandboxConfig { #[cfg(test)] mod tests { - use microsandbox_image::ImageConfig; - - use crate::sandbox::{RootfsSource, VolumeMount}; - use super::{SandboxConfig, merge_env}; + use crate::sandbox::{RootfsSource, VolumeMount}; + use microsandbox_image::ImageConfig; #[test] fn test_merge_env_image_base_with_user_override() { diff --git a/crates/microsandbox/lib/sandbox/handle.rs b/crates/microsandbox/lib/sandbox/handle.rs index a667b9c37..e946a3b73 100644 --- a/crates/microsandbox/lib/sandbox/handle.rs +++ b/crates/microsandbox/lib/sandbox/handle.rs @@ -111,8 +111,25 @@ impl SandboxHandle { /// /// Same backing data as [`Sandbox::logs`](super::Sandbox::logs). /// Works without starting the sandbox. - pub fn logs(&self, opts: &super::LogOptions) -> MicrosandboxResult> { - super::logs::read_logs(&self.name, opts) + pub async fn logs( + &self, + opts: &crate::logs::LogOptions, + ) -> MicrosandboxResult> { + crate::logs::read_logs(&self.name, opts).await + } + + /// Stream captured output as it appears, with optional follow. + /// + /// Same backing data as [`Sandbox::log_stream`](super::Sandbox::log_stream). + /// Works without starting the sandbox; with `follow: true`, the + /// stream picks up new entries the moment they land in `exec.log`. + pub async fn log_stream( + &self, + opts: &crate::logs::LogStreamOptions, + ) -> MicrosandboxResult< + impl futures::Stream> + Send + 'static, + > { + crate::logs::log_stream(&self.name, opts).await } /// Get the latest metrics snapshot for this sandbox. diff --git a/crates/microsandbox/lib/sandbox/logs.rs b/crates/microsandbox/lib/sandbox/logs.rs deleted file mode 100644 index c1c57fac1..000000000 --- a/crates/microsandbox/lib/sandbox/logs.rs +++ /dev/null @@ -1,368 +0,0 @@ -//! Programmatic access to a sandbox's captured output (`exec.log`). -//! -//! Reads the JSON Lines file the runtime writes via the relay tap (see -//! `crates/runtime/lib/exec_log.rs`). Works on running and stopped -//! sandboxes alike — there is no protocol traffic involved; we read -//! the persisted file directly. -//! -//! For the CLI, see `crates/cli/lib/commands/logs.rs`. This module -//! exposes the same data to SDK callers as a typed iterator. - -use std::path::{Path, PathBuf}; - -use bytes::Bytes; -use chrono::{DateTime, Utc}; -use microsandbox_utils::log_text::{base64_decode, split_leading_timestamp, strip_ansi}; -use serde::Deserialize; - -use crate::{MicrosandboxError, MicrosandboxResult}; - -//-------------------------------------------------------------------------------------------------- -// Types -//-------------------------------------------------------------------------------------------------- - -/// Source tag on a captured log entry. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum LogSource { - /// Captured from the primary session's stdout (pipe mode). - Stdout, - - /// Captured from the primary session's stderr (pipe mode). - Stderr, - - /// Captured from the primary session in pty mode (stdout+stderr - /// are merged at the kernel level inside the guest, so they - /// arrive as a single stream — tagged `output` rather than - /// pretending to be `stdout`). - Output, - - /// Synthetic system entry (lifecycle marker, runtime/kernel diag). - System, -} - -/// A single captured log entry. -#[derive(Debug, Clone)] -pub struct LogEntry { - /// Wall-clock time the chunk was captured by the host. - pub timestamp: DateTime, - - /// Where the chunk came from. - pub source: LogSource, - - /// Relay-monotonic session id. Distinct from the protocol - /// correlation id (which is reused across slot recycling): every - /// session opened against the sandbox per lifetime gets a - /// unique id, starting at 1 and counting up. `None` for `system` - /// lifecycle markers, which are not tied to a specific session. - pub session_id: Option, - - /// Decoded body bytes. UTF-8 lossy by default; if the underlying - /// chunk was raw-mode base64, this is the decoded raw bytes. - pub data: Bytes, -} - -/// Filters applied when reading historical log entries. -#[derive(Debug, Clone, Default)] -pub struct LogOptions { - /// Show only the last N entries after all other filters apply. - pub tail: Option, - - /// Inclusive lower bound on entry timestamp. - pub since: Option>, - - /// Exclusive upper bound on entry timestamp. - pub until: Option>, - - /// Sources to include. If empty, defaults to - /// `Stdout` + `Stderr` (matching the CLI default). - pub sources: Vec, -} - -//-------------------------------------------------------------------------------------------------- -// Internal types -//-------------------------------------------------------------------------------------------------- - -#[derive(Debug, Deserialize)] -struct RawEntry { - t: String, - s: String, - #[serde(default)] - d: String, - #[serde(default)] - id: Option, - #[serde(default)] - e: Option, -} - -//-------------------------------------------------------------------------------------------------- -// Functions -//-------------------------------------------------------------------------------------------------- - -/// Read all matching log entries from the on-disk file for the named -/// sandbox. -/// -/// Returns an empty vector if the sandbox has no `exec.log` yet (i.e. -/// has been created but never opened a primary exec session). -pub fn read_logs(name: &str, opts: &LogOptions) -> MicrosandboxResult> { - let log_dir = log_dir_for(name); - if !log_dir.exists() { - return Err(MicrosandboxError::SandboxNotFound(name.to_string())); - } - - let sources = effective_sources(opts); - - let mut entries = read_jsonl_history(&log_dir, &sources)?; - - apply_filters(&mut entries, opts); - Ok(entries) -} - -/// Compute the on-disk log directory for a sandbox name. -pub fn log_dir_for(name: &str) -> PathBuf { - crate::config::config() - .sandboxes_dir() - .join(name) - .join("logs") -} - -//-------------------------------------------------------------------------------------------------- -// Functions: Helpers -//-------------------------------------------------------------------------------------------------- - -fn effective_sources(opts: &LogOptions) -> Vec { - if opts.sources.is_empty() { - // Default = the user-program output. Includes `Output` - // (pty merged) so a pty session's logs aren't silently - // filtered out under the default. - vec![LogSource::Stdout, LogSource::Stderr, LogSource::Output] - } else { - opts.sources.clone() - } -} - -fn read_jsonl_history(log_dir: &Path, sources: &[LogSource]) -> MicrosandboxResult> { - let mut out: Vec = Vec::new(); - - let want_exec = sources - .iter() - .any(|s| matches!(s, LogSource::Stdout | LogSource::Stderr | LogSource::Output)); - if want_exec { - // Rotated files first so output is chronological. - for suffix in [".3", ".2", ".1", ""].iter() { - let path = if suffix.is_empty() { - log_dir.join("exec.log") - } else { - log_dir.join(format!("exec.log{suffix}")) - }; - if !path.exists() { - continue; - } - append_jsonl_file(&path, &mut out, sources)?; - } - } - - if sources.iter().any(|s| matches!(s, LogSource::System)) { - // Cross-merge runtime.log and kernel.log as system lines. - for filename in ["runtime.log", "kernel.log"].iter() { - let path = log_dir.join(filename); - append_text_log_as_system(&path, &mut out); - } - // Already typed `DateTime`, so sort_by_key on the field - // is correct (no precision-loss issue here — that bug only - // affected the CLI's string-keyed sort). - out.sort_by_key(|e| e.timestamp); - } - - Ok(out) -} - -fn append_jsonl_file( - path: &Path, - out: &mut Vec, - sources: &[LogSource], -) -> MicrosandboxResult<()> { - let bytes = std::fs::read(path)?; - let text = String::from_utf8_lossy(&bytes); - for line in text.lines() { - if line.is_empty() { - continue; - } - let raw: RawEntry = match serde_json::from_str(line) { - Ok(v) => v, - Err(_) => continue, - }; - let source = match raw.s.as_str() { - "stdout" => LogSource::Stdout, - "stderr" => LogSource::Stderr, - "output" => LogSource::Output, - "system" => LogSource::System, - _ => continue, - }; - if !sources.contains(&source) { - continue; - } - let timestamp = match DateTime::parse_from_rfc3339(&raw.t) { - Ok(t) => t.with_timezone(&Utc), - Err(_) => continue, - }; - let data = decode_body(&raw); - out.push(LogEntry { - timestamp, - source, - session_id: raw.id, - data, - }); - } - Ok(()) -} - -fn decode_body(raw: &RawEntry) -> Bytes { - if raw.e.as_deref() == Some("b64") { - match base64_decode(&raw.d) { - Some(bytes) => Bytes::from(bytes), - None => Bytes::from(raw.d.clone().into_bytes()), - } - } else { - Bytes::from(raw.d.clone().into_bytes()) - } -} - -fn append_text_log_as_system(path: &Path, out: &mut Vec) { - if !path.exists() { - return; - } - let bytes = match std::fs::read(path) { - Ok(b) => b, - Err(_) => return, - }; - let text = String::from_utf8_lossy(&bytes); - let mtime = std::fs::metadata(path) - .ok() - .and_then(|m| m.modified().ok()) - .map(|t| -> DateTime { t.into() }) - .unwrap_or_else(Utc::now); - - for line in text.lines() { - if line.is_empty() { - continue; - } - // Strip ANSI before parsing — tracing wraps the timestamp in - // color escapes which would otherwise hide it from the - // leading-token detector. Fall back to file mtime if a line - // genuinely lacks an RFC 3339 prefix. - let stripped = strip_ansi(line); - let (ts, body) = match split_leading_timestamp(&stripped) { - Some((s, rest)) => ( - DateTime::parse_from_rfc3339(s) - .map(|d| d.with_timezone(&Utc)) - .unwrap_or(mtime), - rest.trim_start().to_string(), - ), - None => (mtime, stripped.clone()), - }; - out.push(LogEntry { - timestamp: ts, - source: LogSource::System, - session_id: None, - data: Bytes::from(format!("{body}\n").into_bytes()), - }); - } -} - -fn apply_filters(entries: &mut Vec, opts: &LogOptions) { - if let Some(s) = opts.since { - entries.retain(|e| e.timestamp >= s); - } - if let Some(u) = opts.until { - entries.retain(|e| e.timestamp < u); - } - if let Some(n) = opts.tail - && entries.len() > n - { - let drop = entries.len() - n; - entries.drain(0..drop); - } -} - -//-------------------------------------------------------------------------------------------------- -// Tests -//-------------------------------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn default_sources_are_user_program_output() { - let opts = LogOptions::default(); - let s = effective_sources(&opts); - assert_eq!( - s, - vec![LogSource::Stdout, LogSource::Stderr, LogSource::Output] - ); - } - - #[test] - fn explicit_sources_used_when_set() { - let opts = LogOptions { - sources: vec![LogSource::System], - ..Default::default() - }; - let s = effective_sources(&opts); - assert_eq!(s, vec![LogSource::System]); - } - - #[test] - fn round_trip_jsonl_via_temp_dir() { - let dir = tempfile::tempdir().unwrap(); - let exec_log_path = dir.path().join("exec.log"); - let lines = [ - r#"{"t":"2026-04-30T20:32:59.000Z","s":"stdout","d":"hello\n","id":7}"#, - r#"{"t":"2026-04-30T20:33:00.000Z","s":"stderr","d":"oops\n","id":7}"#, - r#"{"t":"2026-04-30T20:33:01.000Z","s":"system","d":"--- sandbox stopped ---\n"}"#, - ]; - std::fs::write(&exec_log_path, lines.join("\n").into_bytes()).unwrap(); - - let mut out = Vec::new(); - append_jsonl_file( - &exec_log_path, - &mut out, - &[LogSource::Stdout, LogSource::Stderr], - ) - .unwrap(); - assert_eq!(out.len(), 2); - assert_eq!(out[0].source, LogSource::Stdout); - assert_eq!(out[0].data, Bytes::from("hello\n".as_bytes())); - assert_eq!(out[0].session_id, Some(7u64)); - assert_eq!(out[1].source, LogSource::Stderr); - assert_eq!(out[1].session_id, Some(7u64)); - } - - #[test] - fn apply_filters_tail() { - let mut entries = (0..5) - .map(|i| LogEntry { - timestamp: DateTime::parse_from_rfc3339(&format!("2026-04-30T00:00:0{i}.000Z")) - .unwrap() - .with_timezone(&Utc), - source: LogSource::Stdout, - session_id: Some(1u64), - data: Bytes::from(format!("line {i}").into_bytes()), - }) - .collect::>(); - apply_filters( - &mut entries, - &LogOptions { - tail: Some(2), - ..Default::default() - }, - ); - assert_eq!(entries.len(), 2); - } - - #[test] - fn base64_decode_basic() { - assert_eq!(base64_decode("aGVsbG8=").unwrap(), b"hello"); - assert_eq!(base64_decode("").unwrap(), Vec::::new()); - } -} diff --git a/crates/microsandbox/lib/sandbox/mod.rs b/crates/microsandbox/lib/sandbox/mod.rs index 71dd93435..ee4895d64 100644 --- a/crates/microsandbox/lib/sandbox/mod.rs +++ b/crates/microsandbox/lib/sandbox/mod.rs @@ -12,7 +12,6 @@ pub mod exec; pub mod fs; mod handle; pub mod init; -pub mod logs; mod metrics; mod patch; mod types; @@ -64,7 +63,6 @@ pub use exec::{ExecOptionsBuilder, ExecOutput, Rlimit, RlimitResource}; pub use fs::{FsEntry, FsEntryKind, FsMetadata, FsReadStream, FsWriteSink, SandboxFs}; pub use handle::{DEFAULT_CONNECT_TIMEOUT, DEFAULT_STOP_TIMEOUT, SandboxHandle}; pub use init::{HandoffInit, InitOptionsBuilder}; -pub use logs::{LogEntry, LogOptions, LogSource}; pub use metrics::{SandboxMetrics, all_sandbox_metrics}; pub use microsandbox_image::{PullPolicy, PullProgress, PullProgressHandle}; #[cfg(feature = "net")] @@ -501,8 +499,29 @@ impl Sandbox { /// running and stopped sandboxes alike — there is no protocol /// traffic. Pass `LogOptions::default()` for "everything, /// stdout+stderr". - pub fn logs(&self, opts: &LogOptions) -> MicrosandboxResult> { - logs::read_logs(self.name(), opts) + pub async fn logs( + &self, + opts: &crate::logs::LogOptions, + ) -> MicrosandboxResult> { + crate::logs::read_logs(self.name(), opts).await + } + + /// Stream captured output as it appears, with optional follow. + /// + /// Backed by the same on-disk `exec.log` as [`logs`](Self::logs), + /// but yields entries lazily as a [`futures::Stream`]. Pass + /// `LogStreamOptions { follow: true, .. }` to keep the stream + /// open past current EOF and pick up new entries as they are + /// written; otherwise the stream drains the current contents and + /// ends. See the type docs on [`crate::logs::LogStreamOptions`] and + /// [`crate::logs::LogStreamStart`] for replay / resume options. + pub async fn log_stream( + &self, + opts: &crate::logs::LogStreamOptions, + ) -> MicrosandboxResult< + impl futures::Stream> + Send + 'static, + > { + crate::logs::log_stream(self.name(), opts).await } /// Low-level access to the guest agent client. Use this for custom diff --git a/crates/microsandbox/tests/dns_matrix/sandbox.rs b/crates/microsandbox/tests/dns_matrix/sandbox.rs index 35341cacc..b23618b42 100644 --- a/crates/microsandbox/tests/dns_matrix/sandbox.rs +++ b/crates/microsandbox/tests/dns_matrix/sandbox.rs @@ -2,7 +2,7 @@ //! configuration, install the DNS tooling we need, and surface the //! guest's gateway IP for scenarios that target it explicitly. -use std::net::Ipv4Addr; +use std::{net::Ipv4Addr, time::Duration}; use ipnetwork::{IpNetwork, Ipv4Network}; use microsandbox::{NetworkPolicy, Sandbox}; @@ -57,9 +57,46 @@ pub(crate) fn deny_resolver(resolver: &str) -> Result Result<(), Box> { - sb.shell("apk add --quiet --no-progress bind-tools >/dev/null 2>&1") - .await?; - Ok(()) + let mut last_error = String::new(); + for attempt in 1..=3 { + let out = sb.shell("apk add --quiet --no-progress bind-tools").await?; + if out.status().success { + let check = sb.shell("command -v dig").await?; + if check.status().success { + return Ok(()); + } + last_error = format!( + "bind-tools installed, but dig was not found (attempt {attempt}):\n{}", + describe_output(&check), + ); + } else { + last_error = format!( + "apk add bind-tools failed (attempt {attempt}):\n{}", + describe_output(&out), + ); + } + + if attempt < 3 { + tokio::time::sleep(Duration::from_secs(2)).await; + } + } + + Err(format!("failed to install dig inside guest after 3 attempts:\n{last_error}").into()) +} + +fn describe_output(output: µsandbox::sandbox::exec::ExecOutput) -> String { + let stdout = output + .stdout() + .unwrap_or_else(|_| String::from_utf8_lossy(output.stdout_bytes()).into_owned()); + let stderr = output + .stderr() + .unwrap_or_else(|_| String::from_utf8_lossy(output.stderr_bytes()).into_owned()); + format!( + "exit code: {}\nstdout:\n{}\nstderr:\n{}", + output.status().code, + stdout.trim(), + stderr.trim(), + ) } /// Read the guest's first configured nameserver (the sandbox gateway) diff --git a/crates/microsandbox/tests/log_stream.rs b/crates/microsandbox/tests/log_stream.rs new file mode 100644 index 000000000..9506ffec0 --- /dev/null +++ b/crates/microsandbox/tests/log_stream.rs @@ -0,0 +1,478 @@ +//! Integration tests for the public `log_stream` / `read_logs` API. +//! +//! Drives the full caller path — +//! `microsandbox::logs::log_stream(name, opts)` — against +//! a synthetic on-disk log directory. No real microVM is booted; +//! we set `MSB_HOME` to a tempdir, lay out `sandboxes//logs/` +//! with hand-crafted `exec.log` / `runtime.log` / `kernel.log`, +//! and verify the streamed entries. +//! +//! Each test uses a unique sandbox name so they can run in +//! parallel under the shared `MSB_HOME`. +//! +//! These tests don't require any external infrastructure +//! (libkrunfw, kernel image, etc.) — they're pure I/O exercises +//! against the streaming engine through its public entry points. + +use std::path::PathBuf; +use std::sync::OnceLock; + +use futures::TryStreamExt; +use microsandbox::logs; +use microsandbox::logs::{LogCursor, LogOptions, LogSource, LogStreamOptions, LogStreamStart}; +use tempfile::TempDir; + +/// Singleton MSB_HOME for the whole test binary. We set it once +/// before any code touches the global config; tests share the root +/// and use distinct sandbox names so they don't collide. +static MSB_HOME: OnceLock = OnceLock::new(); + +fn msb_home() -> &'static TempDir { + MSB_HOME.get_or_init(|| { + let dir = tempfile::Builder::new() + .prefix("msb-log-stream-test-") + .tempdir() + .expect("failed to create MSB_HOME tempdir"); + // SAFETY: integration test setup; runs before any other + // code in this binary reads the env. + unsafe { + std::env::set_var("MSB_HOME", dir.path()); + } + dir + }) +} + +/// Create the log directory layout for a fresh sandbox name and +/// return its path. +fn make_log_dir(name: &str) -> PathBuf { + let root = msb_home().path(); + let log_dir = root.join("sandboxes").join(name).join("logs"); + std::fs::create_dir_all(&log_dir).expect("create log dir"); + log_dir +} + +fn jsonl(ts: &str, source: &str, data: &str, id: u64) -> String { + format!(r#"{{"t":"{ts}","s":"{source}","d":"{data}","id":{id}}}"#) +} + +fn ts(s: &str) -> chrono::DateTime { + chrono::DateTime::parse_from_rfc3339(s) + .unwrap() + .with_timezone(&chrono::Utc) +} + +fn write_lines(path: &std::path::Path, lines: &[String]) { + let mut blob = String::new(); + for l in lines { + blob.push_str(l); + blob.push('\n'); + } + std::fs::write(path, blob).expect("write log file"); +} + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[tokio::test] +async fn log_stream_drains_exec_log_with_default_sources() { + let name = "log-stream-default-sources"; + let log_dir = make_log_dir(name); + write_lines( + &log_dir.join("exec.log"), + &[ + jsonl("2026-05-01T10:00:00.000Z", "stdout", "first line", 1), + jsonl("2026-05-01T10:00:01.000Z", "stderr", "warning", 1), + jsonl("2026-05-01T10:00:02.000Z", "output", "pty out", 1), + ], + ); + + let stream = logs::log_stream(name, &LogStreamOptions::default()) + .await + .expect("open stream"); + let entries: Vec<_> = stream.try_collect().await.expect("drain stream"); + + assert_eq!(entries.len(), 3); + assert_eq!(entries[0].source, LogSource::Stdout); + assert_eq!(entries[0].data.as_ref(), b"first line"); + assert_eq!(entries[1].source, LogSource::Stderr); + assert_eq!(entries[2].source, LogSource::Output); +} + +#[tokio::test] +async fn log_stream_includes_system_files_when_requested() { + let name = "log-stream-system-sources"; + let log_dir = make_log_dir(name); + write_lines( + &log_dir.join("exec.log"), + &[jsonl( + "2026-05-01T10:00:00.000Z", + "stdout", + "user output", + 1, + )], + ); + write_lines( + &log_dir.join("runtime.log"), + &["2026-05-01T09:59:00.000Z INFO runtime started".to_string()], + ); + write_lines( + &log_dir.join("kernel.log"), + &["2026-05-01T09:58:00.000Z kernel boot complete".to_string()], + ); + + let stream = logs::log_stream( + name, + &LogStreamOptions { + sources: vec![LogSource::Stdout, LogSource::System], + follow: false, + ..Default::default() + }, + ) + .await + .expect("open stream"); + let entries: Vec<_> = stream.try_collect().await.expect("drain stream"); + + // Should see the exec stdout entry and both system entries. + let has_user = entries + .iter() + .any(|e| e.source == LogSource::Stdout && e.data.as_ref() == b"user output"); + let has_runtime = entries.iter().any(|e| { + e.source == LogSource::System + && std::str::from_utf8(&e.data) + .unwrap_or("") + .contains("runtime started") + }); + let has_kernel = entries.iter().any(|e| { + e.source == LogSource::System + && std::str::from_utf8(&e.data) + .unwrap_or("") + .contains("kernel boot complete") + }); + assert!(has_user, "missing user stdout entry: {entries:?}"); + assert!(has_runtime, "missing runtime.log entry: {entries:?}"); + assert!(has_kernel, "missing kernel.log entry: {entries:?}"); +} + +#[tokio::test] +async fn log_stream_includes_rotated_exec_files() { + let name = "log-stream-rotated"; + let log_dir = make_log_dir(name); + // Older entry in a rotated file, newer in the active file. + write_lines( + &log_dir.join("exec.log.1"), + &[jsonl( + "2026-05-01T08:00:00.000Z", + "stdout", + "from rotated", + 1, + )], + ); + write_lines( + &log_dir.join("exec.log"), + &[jsonl( + "2026-05-01T09:00:00.000Z", + "stdout", + "from active", + 1, + )], + ); + + let stream = logs::log_stream(name, &LogStreamOptions::default()) + .await + .expect("open stream"); + let entries: Vec<_> = stream.try_collect().await.expect("drain stream"); + + assert_eq!(entries.len(), 2); + // Per-source order: rotated file drained first, then active. + assert_eq!(entries[0].data.as_ref(), b"from rotated"); + assert_eq!(entries[1].data.as_ref(), b"from active"); +} + +#[tokio::test] +async fn log_stream_resume_from_cursor_continues_after_seen_entries() { + let name = "log-stream-resume"; + let log_dir = make_log_dir(name); + write_lines( + &log_dir.join("exec.log"), + &[ + jsonl("2026-05-01T10:00:00.000Z", "stdout", "a", 1), + jsonl("2026-05-01T10:00:01.000Z", "stdout", "b", 1), + jsonl("2026-05-01T10:00:02.000Z", "stdout", "c", 1), + ], + ); + + // First pass: drain everything to grab a cursor. + let stream = logs::log_stream(name, &LogStreamOptions::default()) + .await + .expect("first stream"); + let entries: Vec<_> = stream.try_collect().await.expect("drain first stream"); + assert_eq!(entries.len(), 3); + let cursor_after_b = entries[1].cursor.clone(); + + // Second pass: resume from cursor; should yield only "c". + let stream2 = logs::log_stream( + name, + &LogStreamOptions { + start: LogStreamStart::From(cursor_after_b), + ..Default::default() + }, + ) + .await + .expect("resume stream"); + let entries2: Vec<_> = stream2.try_collect().await.expect("drain resumed stream"); + assert_eq!(entries2.len(), 1); + assert_eq!(entries2[0].data.as_ref(), b"c"); +} + +#[tokio::test] +async fn log_stream_cursor_serializes_and_round_trips() { + let name = "log-stream-cursor-rt"; + let log_dir = make_log_dir(name); + write_lines( + &log_dir.join("exec.log"), + &[jsonl("2026-05-01T10:00:00.000Z", "stdout", "x", 1)], + ); + + let stream = logs::log_stream(name, &LogStreamOptions::default()) + .await + .expect("open stream"); + let entries: Vec<_> = stream.try_collect().await.expect("drain"); + assert_eq!(entries.len(), 1); + + // Cursor round-trips through string form. + let cursor_str = entries[0].cursor.to_string(); + let parsed: LogCursor = cursor_str.parse().expect("cursor parses"); + assert_eq!(parsed, entries[0].cursor); +} + +#[tokio::test] +async fn log_stream_follow_picks_up_appended_entries() { + let name = "log-stream-follow"; + let log_dir = make_log_dir(name); + write_lines( + &log_dir.join("exec.log"), + &[jsonl("2026-05-01T10:00:00.000Z", "stdout", "first", 1)], + ); + + let stream = logs::log_stream( + name, + &LogStreamOptions { + follow: true, + ..Default::default() + }, + ) + .await + .expect("open follow stream"); + let mut stream = Box::pin(stream); + use futures::StreamExt; + + // First entry from backfill. + let first = tokio::time::timeout(std::time::Duration::from_secs(3), stream.next()) + .await + .expect("first entry within 3s") + .expect("stream not ended") + .expect("not an error"); + assert_eq!(first.data.as_ref(), b"first"); + + // Append a new entry. + write_lines( + &log_dir.join("exec.log"), + &[ + jsonl("2026-05-01T10:00:00.000Z", "stdout", "first", 1), + jsonl("2026-05-01T10:00:01.000Z", "stdout", "second", 1), + ], + ); + + let second = tokio::time::timeout(std::time::Duration::from_secs(5), stream.next()) + .await + .expect("second entry within 5s") + .expect("stream not ended") + .expect("not an error"); + assert_eq!(second.data.as_ref(), b"second"); +} + +#[tokio::test] +async fn read_logs_returns_sorted_snapshot() { + let name = "read-logs-sorted"; + let log_dir = make_log_dir(name); + // Put runtime.log (older) and exec.log (newer) — read_logs + // should sort the merged result chronologically. + write_lines( + &log_dir.join("runtime.log"), + &["2026-05-01T08:00:00.000Z INFO early runtime".to_string()], + ); + write_lines( + &log_dir.join("exec.log"), + &[jsonl( + "2026-05-01T09:00:00.000Z", + "stdout", + "late stdout", + 1, + )], + ); + + let entries = logs::read_logs( + name, + &LogOptions { + sources: vec![LogSource::Stdout, LogSource::System], + ..Default::default() + }, + ) + .await + .expect("snapshot read"); + + // Chronological order: runtime (08:00) before exec stdout (09:00). + assert!( + entries.len() >= 2, + "expected at least 2 entries: {entries:?}" + ); + assert!( + std::str::from_utf8(&entries[0].data) + .unwrap_or("") + .contains("early runtime"), + "expected runtime entry first: {entries:?}", + ); + assert!( + entries[entries.len() - 1].data.as_ref() == b"late stdout", + "expected stdout entry last: {entries:?}", + ); +} + +#[tokio::test] +async fn read_logs_sorted_cursor_resumes_after_selected_entry() { + let name = "read-logs-sorted-cursor-resume"; + let log_dir = make_log_dir(name); + write_lines( + &log_dir.join("exec.log"), + &[ + jsonl("2026-05-01T10:00:00.000Z", "stdout", "a", 1), + jsonl("2026-05-01T10:02:00.000Z", "stdout", "c", 1), + ], + ); + write_lines( + &log_dir.join("runtime.log"), + &["2026-05-01T10:01:00.000Z INFO b".to_string()], + ); + + let entries = logs::read_logs( + name, + &LogOptions { + sources: vec![LogSource::Stdout, LogSource::System], + ..Default::default() + }, + ) + .await + .expect("snapshot read"); + + assert_eq!(entries.len(), 3); + assert_eq!(entries[0].data.as_ref(), b"a"); + assert!( + std::str::from_utf8(&entries[1].data) + .unwrap() + .contains("INFO b") + ); + assert_eq!(entries[2].data.as_ref(), b"c"); + + let stream = logs::log_stream( + name, + &LogStreamOptions { + sources: vec![LogSource::Stdout, LogSource::System], + start: LogStreamStart::From(entries[1].cursor.clone()), + follow: false, + ..Default::default() + }, + ) + .await + .expect("resume stream"); + let resumed: Vec<_> = stream.try_collect().await.expect("drain resumed stream"); + + assert_eq!(resumed.len(), 1); + assert_eq!(resumed[0].data.as_ref(), b"c"); +} + +#[tokio::test] +async fn read_logs_snapshot_cursor_advances_past_since_filtered_entries() { + let name = "read-logs-snapshot-cursor-since-filter"; + let log_dir = make_log_dir(name); + write_lines( + &log_dir.join("exec.log"), + &[jsonl("2026-05-01T10:00:00.000Z", "stdout", "old", 1)], + ); + + let snapshot = logs::read_logs_snapshot( + name, + &LogOptions { + since: Some(ts("2026-05-01T10:01:00.000Z")), + ..Default::default() + }, + ) + .await + .expect("snapshot read"); + + assert!(snapshot.entries.is_empty()); + + let stream = logs::log_stream( + name, + &LogStreamOptions { + start: LogStreamStart::From(snapshot.cursor), + follow: false, + ..Default::default() + }, + ) + .await + .expect("resume stream"); + let resumed: Vec<_> = stream.try_collect().await.expect("drain resumed stream"); + + assert!( + resumed.is_empty(), + "since-filtered entries replayed after snapshot cursor: {resumed:?}" + ); +} + +#[tokio::test] +async fn log_stream_invalid_cursor_returns_error_upfront() { + let name = "log-stream-invalid-cursor"; + let log_dir = make_log_dir(name); + write_lines( + &log_dir.join("exec.log"), + &[jsonl("2026-05-01T10:00:00.000Z", "stdout", "a", 1)], + ); + + // Craft a cursor pointing at a non-existent exec.log generation. + // We get a valid cursor first, then mangle the string. Simpler: + // use a cursor from a different sandbox name. + let other = "log-stream-invalid-cursor-other"; + let other_dir = make_log_dir(other); + write_lines( + &other_dir.join("exec.log"), + &[jsonl("2026-05-01T10:00:00.000Z", "stdout", "x", 1)], + ); + let other_stream = logs::log_stream(other, &LogStreamOptions::default()) + .await + .expect("open other stream"); + let other_entries: Vec<_> = other_stream + .try_collect() + .await + .expect("drain other stream"); + let foreign_cursor = other_entries[0].cursor.clone(); + + // Resume the first sandbox with the other sandbox's cursor. + // The exec generation won't match, so InvalidCursor fires. + let result = logs::log_stream( + name, + &LogStreamOptions { + start: LogStreamStart::From(foreign_cursor), + ..Default::default() + }, + ) + .await; + assert!( + matches!( + result, + Err(microsandbox::MicrosandboxError::InvalidCursor(_)) + ), + "expected InvalidCursor, got: {:?}", + result.as_ref().err(), + ); +} diff --git a/crates/microsandbox/tests/log_stream_e2e.rs b/crates/microsandbox/tests/log_stream_e2e.rs new file mode 100644 index 000000000..a2a8c84a2 --- /dev/null +++ b/crates/microsandbox/tests/log_stream_e2e.rs @@ -0,0 +1,202 @@ +//! End-to-end tests for log capture against a real microVM. +//! +//! Unlike `log_stream.rs` (which drives the engine against synthetic +//! on-disk layouts), these tests boot an alpine sandbox, run a real +//! exec session, and validate that the relay tap → `exec.log` → +//! `log_stream` / `read_logs` pipeline carries the produced bytes +//! through to a consumer with the right source tag, session id, and +//! cursor semantics. +//! +//! These tests require KVM (or libkrun on macOS). The `#[msb_test]` +//! attribute marks them `#[ignore]`, so plain `cargo test --workspace` +//! skips them. Run them via: +//! +//! cargo nextest run -p microsandbox --test log_stream_e2e --run-ignored=only + +use std::time::Duration; + +use futures::{StreamExt, TryStreamExt}; +use microsandbox::Sandbox; +use microsandbox::logs::{LogOptions, LogSource, LogStreamOptions, LogStreamStart}; +use test_utils::msb_test; + +const ALPINE: &str = "mirror.gcr.io/library/alpine"; + +/// Snapshot path: after exec returns, the relay tap should have +/// flushed the session's stdout into `exec.log`, and `read_logs` +/// should surface it with `source = Stdout` and a real `session_id`. +#[msb_test] +async fn logs_captures_exec_stdout_from_running_sandbox() { + let name = "log-stream-e2e-snapshot"; + let marker = "log-e2e-snapshot-marker-9f3a"; + + let sandbox = Sandbox::builder(name) + .image(ALPINE) + .cpus(1) + .memory(512) + .replace() + .create() + .await + .expect("create sandbox"); + + sandbox + .exec("sh", ["-c", &format!("echo {marker}")]) + .await + .expect("exec"); + + let entries = sandbox + .logs(&LogOptions::default()) + .await + .expect("read logs"); + + sandbox.stop_and_wait().await.expect("stop"); + Sandbox::remove(name).await.expect("remove"); + + let matched: Vec<_> = entries.iter().filter(|e| contains(e, marker)).collect(); + assert!( + !matched.is_empty(), + "expected marker {marker:?} in snapshot logs; saw {} entries", + entries.len(), + ); + let entry = matched[0]; + assert_eq!( + entry.source, + LogSource::Stdout, + "marker came in on the wrong source: {:?}", + entry.source, + ); + assert!( + entry.session_id.is_some(), + "exec-captured entry should carry a session id; got None", + ); +} + +/// Follow path: open the stream first, then exec. The follower +/// must wake on the new write (via filesystem notify) and yield +/// the marker entry within a short timeout. +#[msb_test] +async fn log_stream_follow_catches_live_writes() { + let name = "log-stream-e2e-follow"; + let marker = "log-e2e-follow-marker-7a4b"; + + let sandbox = Sandbox::builder(name) + .image(ALPINE) + .cpus(1) + .memory(512) + .replace() + .create() + .await + .expect("create sandbox"); + + // Start the stream at "now" so the boot-lifecycle entries are + // skipped and the only matching entry should be from the exec + // we run below. + let cutoff = chrono::Utc::now(); + let stream = sandbox + .log_stream(&LogStreamOptions { + sources: Vec::new(), + start: LogStreamStart::Since(cutoff), + until: None, + follow: true, + }) + .await + .expect("open log stream"); + + sandbox + .exec("sh", ["-c", &format!("echo {marker}")]) + .await + .expect("exec"); + + let found = tokio::time::timeout(Duration::from_secs(5), async { + let mut stream = std::pin::pin!(stream); + while let Some(item) = stream.next().await { + let entry = item.expect("stream item"); + if contains(&entry, marker) { + return entry; + } + } + panic!("stream ended without ever seeing marker {marker:?}"); + }) + .await + .expect("marker arrived within timeout"); + + sandbox.stop_and_wait().await.expect("stop"); + Sandbox::remove(name).await.expect("remove"); + + assert_eq!(found.source, LogSource::Stdout); +} + +/// Cursor resume: run exec A, capture its cursor from a snapshot, +/// run exec B, then open a fresh stream from that cursor. The +/// resumed stream must see B but not replay A. +#[msb_test] +async fn log_stream_resume_from_cursor_excludes_replayed_entries() { + let name = "log-stream-e2e-resume"; + let marker_a = "log-e2e-resume-marker-A-3f9d"; + let marker_b = "log-e2e-resume-marker-B-8c2e"; + + let sandbox = Sandbox::builder(name) + .image(ALPINE) + .cpus(1) + .memory(512) + .replace() + .create() + .await + .expect("create sandbox"); + + sandbox + .exec("sh", ["-c", &format!("echo {marker_a}")]) + .await + .expect("exec A"); + + let snapshot = sandbox + .logs(&LogOptions::default()) + .await + .expect("snapshot after exec A"); + let cursor_at_a = snapshot + .iter() + .rev() + .find(|e| contains(e, marker_a)) + .expect("snapshot contained marker A") + .cursor + .clone(); + + sandbox + .exec("sh", ["-c", &format!("echo {marker_b}")]) + .await + .expect("exec B"); + + let resumed: Vec<_> = sandbox + .log_stream(&LogStreamOptions { + sources: Vec::new(), + start: LogStreamStart::From(cursor_at_a), + until: None, + follow: false, + }) + .await + .expect("open resumed stream") + .try_collect() + .await + .expect("drain resumed stream"); + + sandbox.stop_and_wait().await.expect("stop"); + Sandbox::remove(name).await.expect("remove"); + + let saw_a = resumed.iter().any(|e| contains(e, marker_a)); + let saw_b = resumed.iter().any(|e| contains(e, marker_b)); + assert!( + saw_b, + "resumed stream missing marker B (saw {} entries)", + resumed.len() + ); + assert!( + !saw_a, + "resumed stream replayed marker A from before the cursor" + ); +} + +fn contains(entry: µsandbox::logs::LogEntry, needle: &str) -> bool { + std::str::from_utf8(&entry.data) + .map(|s| s.contains(needle)) + .unwrap_or(false) +} diff --git a/crates/utils/lib/log_text.rs b/crates/utils/lib/log_text.rs index dc4eaf265..907b1124f 100644 --- a/crates/utils/lib/log_text.rs +++ b/crates/utils/lib/log_text.rs @@ -1,6 +1,6 @@ //! Text helpers for parsing/rendering captured log lines. //! -//! Shared by the SDK's `microsandbox::sandbox::logs` reader and the +//! Shared by the SDK's `microsandbox::logs` reader and the //! CLI's `msb logs` renderer — both consume the same on-disk JSON //! Lines format and need the same low-level transforms. diff --git a/examples/rust/logs-read/bin/main.rs b/examples/rust/logs-read/bin/main.rs index 4c6df4f4b..11473c9c0 100644 --- a/examples/rust/logs-read/bin/main.rs +++ b/examples/rust/logs-read/bin/main.rs @@ -1,6 +1,7 @@ //! Read captured exec.log entries from a stopped sandbox. -use microsandbox::sandbox::{LogOptions, LogSource, Sandbox}; +use microsandbox::logs::{LogOptions, LogSource}; +use microsandbox::sandbox::Sandbox; #[tokio::main] async fn main() -> anyhow::Result<()> { @@ -21,7 +22,7 @@ async fn main() -> anyhow::Result<()> { let handle = Sandbox::get("logs-read").await?; // Default sources are user-program output (stdout/stderr/output). - let entries = handle.logs(&LogOptions::default())?; + let entries = handle.logs(&LogOptions::default()).await?; println!( "\n== default sources (stdout+stderr+output): {} entries", entries.len() @@ -31,24 +32,28 @@ async fn main() -> anyhow::Result<()> { } // Adding `System` mixes in lifecycle markers and runtime/kernel diagnostics. - let with_system = handle.logs(&LogOptions { - sources: vec![ - LogSource::Stdout, - LogSource::Stderr, - LogSource::Output, - LogSource::System, - ], - ..Default::default() - })?; + let with_system = handle + .logs(&LogOptions { + sources: vec![ + LogSource::Stdout, + LogSource::Stderr, + LogSource::Output, + LogSource::System, + ], + ..Default::default() + }) + .await?; println!( "\n== including system (runtime/kernel + lifecycle markers): {} entries", with_system.len() ); - let tail = handle.logs(&LogOptions { - tail: Some(1), - ..Default::default() - })?; + let tail = handle + .logs(&LogOptions { + tail: Some(1), + ..Default::default() + }) + .await?; println!("\n== tail=1: {} entries", tail.len()); if let Some(e) = tail.first() { print_entry(e); @@ -57,7 +62,7 @@ async fn main() -> anyhow::Result<()> { Ok(()) } -fn print_entry(e: µsandbox::sandbox::LogEntry) { +fn print_entry(e: µsandbox::logs::LogEntry) { let id = e .session_id .map(|i| format!("id={i:>3}")) diff --git a/sdk/go/internal/ffi/ffi.go b/sdk/go/internal/ffi/ffi.go index f60db07cb..02ab2c932 100644 --- a/sdk/go/internal/ffi/ffi.go +++ b/sdk/go/internal/ffi/ffi.go @@ -123,6 +123,10 @@ typedef char *(*msb_all_sandbox_metrics_fn)(uint64_t cancel_id, uint8_t *buf, si typedef char *(*msb_sandbox_handle_metrics_fn)(uint64_t cancel_id, const char *name, uint8_t *buf, size_t buf_len); typedef char *(*msb_sandbox_logs_fn)(uint64_t cancel_id, uint64_t handle, const char *opts_json, uint8_t *buf, size_t buf_len); typedef char *(*msb_sandbox_handle_logs_fn)(uint64_t cancel_id, const char *name, const char *opts_json, uint8_t *buf, size_t buf_len); +typedef char *(*msb_sandbox_log_stream_fn)(uint64_t cancel_id, uint64_t handle, const char *opts_json, uint8_t *buf, size_t buf_len); +typedef char *(*msb_sandbox_handle_log_stream_fn)(uint64_t cancel_id, const char *name, const char *opts_json, uint8_t *buf, size_t buf_len); +typedef char *(*msb_log_recv_fn)(uint64_t cancel_id, uint64_t stream_handle, uint8_t *buf, size_t buf_len); +typedef char *(*msb_log_close_fn)(uint64_t stream_handle, uint8_t *buf, size_t buf_len); typedef char *(*msb_volume_create_fn)(uint64_t cancel_id, const char *name, const char *opts_json, uint8_t *buf, size_t buf_len); typedef char *(*msb_volume_remove_fn)(uint64_t cancel_id, const char *name, uint8_t *buf, size_t buf_len); @@ -226,6 +230,10 @@ static msb_all_sandbox_metrics_fn ptr_msb_all_sandbox_metrics = NULL; static msb_sandbox_handle_metrics_fn ptr_msb_sandbox_handle_metrics = NULL; static msb_sandbox_logs_fn ptr_msb_sandbox_logs = NULL; static msb_sandbox_handle_logs_fn ptr_msb_sandbox_handle_logs = NULL; +static msb_sandbox_log_stream_fn ptr_msb_sandbox_log_stream = NULL; +static msb_sandbox_handle_log_stream_fn ptr_msb_sandbox_handle_log_stream = NULL; +static msb_log_recv_fn ptr_msb_log_recv = NULL; +static msb_log_close_fn ptr_msb_log_close = NULL; static msb_volume_create_fn ptr_msb_volume_create = NULL; static msb_volume_remove_fn ptr_msb_volume_remove = NULL; static msb_volume_list_fn ptr_msb_volume_list = NULL; @@ -353,6 +361,10 @@ const char *load_microsandbox(const char *path) { RESOLVE(msb_sandbox_handle_metrics); RESOLVE(msb_sandbox_logs); RESOLVE(msb_sandbox_handle_logs); + RESOLVE(msb_sandbox_log_stream); + RESOLVE(msb_sandbox_handle_log_stream); + RESOLVE(msb_log_recv); + RESOLVE(msb_log_close); RESOLVE(msb_volume_create); RESOLVE(msb_volume_remove); RESOLVE(msb_volume_list); @@ -570,6 +582,18 @@ char *call_msb_sandbox_logs(uint64_t cancel_id, uint64_t handle, const char *opt char *call_msb_sandbox_handle_logs(uint64_t cancel_id, const char *name, const char *opts_json, uint8_t *buf, size_t buf_len) { return ptr_msb_sandbox_handle_logs ? ptr_msb_sandbox_handle_logs(cancel_id, name, opts_json, buf, buf_len) : NULL; } +char *call_msb_sandbox_log_stream(uint64_t cancel_id, uint64_t handle, const char *opts_json, uint8_t *buf, size_t buf_len) { + return ptr_msb_sandbox_log_stream ? ptr_msb_sandbox_log_stream(cancel_id, handle, opts_json, buf, buf_len) : NULL; +} +char *call_msb_sandbox_handle_log_stream(uint64_t cancel_id, const char *name, const char *opts_json, uint8_t *buf, size_t buf_len) { + return ptr_msb_sandbox_handle_log_stream ? ptr_msb_sandbox_handle_log_stream(cancel_id, name, opts_json, buf, buf_len) : NULL; +} +char *call_msb_log_recv(uint64_t cancel_id, uint64_t stream_handle, uint8_t *buf, size_t buf_len) { + return ptr_msb_log_recv ? ptr_msb_log_recv(cancel_id, stream_handle, buf, buf_len) : NULL; +} +char *call_msb_log_close(uint64_t stream_handle, uint8_t *buf, size_t buf_len) { + return ptr_msb_log_close ? ptr_msb_log_close(stream_handle, buf, buf_len) : NULL; +} char *call_msb_volume_create(uint64_t cancel_id, const char *name, const char *opts_json, uint8_t *buf, size_t buf_len) { return ptr_msb_volume_create ? ptr_msb_volume_create(cancel_id, name, opts_json, buf, buf_len) : NULL; } @@ -1454,6 +1478,25 @@ type LogEntry struct { SessionID *uint64 `json:"session_id"` TimestampMs int64 `json:"timestamp_ms"` DataB64 string `json:"data_b64"` + Cursor string `json:"cursor"` +} + +// LogStreamOptions configures a live log stream. `SinceMs` and +// `FromCursor` are mutually exclusive. +type LogStreamOptions struct { + Sources []string `json:"sources,omitempty"` + SinceMs *int64 `json:"since_ms,omitempty"` + FromCursor *string `json:"from_cursor,omitempty"` + UntilMs *int64 `json:"until_ms,omitempty"` + Follow bool `json:"follow,omitempty"` +} + +func logStreamOptionsJSON(opts LogStreamOptions) (*C.char, error) { + b, err := json.Marshal(opts) + if err != nil { + return nil, fmt.Errorf("marshal log stream opts: %w", err) + } + return C.CString(string(b)), nil } func logsOptionsJSON(opts LogOptions) (*C.char, error) { @@ -2203,6 +2246,119 @@ func (h *MetricsStreamHandle) Close() error { return nil } +// ============================================================================= +// Log streaming + +// LogStreamHandle is an opaque reference to a running log stream. Call Close +// to release Rust-side resources and stop the background task. +type LogStreamHandle struct { + handle C.uint64_t +} + +// LogStream starts a log stream against a live sandbox handle. Caller must +// Close the returned handle to release Rust-side resources. +func (s *Sandbox) LogStream(ctx context.Context, opts LogStreamOptions) (*LogStreamHandle, error) { + if err := ensureLoaded(); err != nil { + return nil, err + } + cOpts, err := logStreamOptionsJSON(opts) + if err != nil { + return nil, err + } + defer C.free(unsafe.Pointer(cOpts)) + out, err := call(ctx, func(cancelID C.uint64_t, buf *C.uint8_t, bufLen C.size_t) *C.char { + return C.call_msb_sandbox_log_stream(cancelID, s.h(), cOpts, buf, bufLen) + }) + if err != nil { + return nil, err + } + var resp struct { + StreamHandle uint64 `json:"stream_handle"` + } + if err := json.Unmarshal([]byte(out), &resp); err != nil { + return nil, fmt.Errorf("parse log_stream response: %w", err) + } + return &LogStreamHandle{handle: C.uint64_t(resp.StreamHandle)}, nil +} + +// SandboxHandleLogStream starts a log stream identified by name without +// requiring a live sandbox handle. +func SandboxHandleLogStream( + ctx context.Context, + name string, + opts LogStreamOptions, +) (*LogStreamHandle, error) { + if err := ensureLoaded(); err != nil { + return nil, err + } + cName := C.CString(name) + defer C.free(unsafe.Pointer(cName)) + cOpts, err := logStreamOptionsJSON(opts) + if err != nil { + return nil, err + } + defer C.free(unsafe.Pointer(cOpts)) + out, err := call(ctx, func(cancelID C.uint64_t, buf *C.uint8_t, bufLen C.size_t) *C.char { + return C.call_msb_sandbox_handle_log_stream(cancelID, cName, cOpts, buf, bufLen) + }) + if err != nil { + return nil, err + } + var resp struct { + StreamHandle uint64 `json:"stream_handle"` + } + if err := json.Unmarshal([]byte(out), &resp); err != nil { + return nil, fmt.Errorf("parse log_stream response: %w", err) + } + return &LogStreamHandle{handle: C.uint64_t(resp.StreamHandle)}, nil +} + +// Recv blocks until the next log entry arrives or ctx is cancelled. Returns +// nil when the stream has ended (snapshot drained, until reached, or fatal +// stream error has already been surfaced on a prior call). +func (h *LogStreamHandle) Recv(ctx context.Context) (*LogEntry, error) { + if err := ensureLoaded(); err != nil { + return nil, err + } + out, err := call(ctx, func(cancelID C.uint64_t, buf *C.uint8_t, bufLen C.size_t) *C.char { + return C.call_msb_log_recv(cancelID, h.handle, buf, bufLen) + }) + if err != nil { + return nil, err + } + var done struct { + Done bool `json:"done"` + } + if jerr := json.Unmarshal([]byte(out), &done); jerr == nil && done.Done { + return nil, nil + } + var entry LogEntry + if err := json.Unmarshal([]byte(out), &entry); err != nil { + return nil, fmt.Errorf("parse log_recv response: %w", err) + } + return &entry, nil +} + +// Close drops the stream handle. The background Rust task stops when the +// channel is closed. +func (h *LogStreamHandle) Close() error { + if err := ensureLoaded(); err != nil { + return err + } + buf := make([]byte, defaultBufSize) + errPtr := C.call_msb_log_close(h.handle, (*C.uint8_t)(unsafe.Pointer(&buf[0])), C.size_t(len(buf))) + if errPtr != nil { + msg := C.GoString(errPtr) + C.call_msb_free_string(errPtr) + var e Error + if jerr := json.Unmarshal([]byte(msg), &e); jerr != nil { + e = Error{Kind: KindInternal, Message: msg} + } + return &e + } + return nil +} + // ============================================================================= // Exec — collect / wait / kill diff --git a/sdk/go/logs.go b/sdk/go/logs.go index 7178ba0ff..b5b1e2edb 100644 --- a/sdk/go/logs.go +++ b/sdk/go/logs.go @@ -33,11 +33,37 @@ type LogEntry struct { SessionID *uint64 Timestamp time.Time Data []byte + // Cursor is an opaque resume token. Pass to LogStreamOptions.FromCursor + // to resume a stream strictly after this entry. + Cursor string } // Text returns the log payload as a string. func (e LogEntry) Text() string { return string(e.Data) } +// LogStreamOptions configures a live log stream. The zero value reads the +// default stdout+stderr+output sources from the beginning, with follow off. +// +// Since and FromCursor are mutually exclusive — passing both rejects at the +// boundary. +type LogStreamOptions struct { + // Sources to include. Empty means the default set + // (stdout + stderr + output). Add LogSourceSystem for + // runtime/kernel diagnostics. + Sources []LogSource + // Start at the first entry whose timestamp is >= Since. + // Mutually exclusive with FromCursor. + Since time.Time + // Resume strictly after the entry whose Cursor matches this value. + // Mutually exclusive with Since. + FromCursor string + // Stop emitting at the first entry whose timestamp is >= Until. + Until time.Time + // When true, keep the stream open past current EOF and yield new + // entries as they are written. + Follow bool +} + func logOptionsToFFI(opts LogOptions) ffi.LogOptions { out := ffi.LogOptions{ Tail: opts.Tail, @@ -57,23 +83,55 @@ func logOptionsToFFI(opts LogOptions) ffi.LogOptions { return out } +func logEntryFromFFI(entry ffi.LogEntry) (LogEntry, error) { + data, err := base64.StdEncoding.DecodeString(entry.DataB64) + if err != nil { + return LogEntry{}, err + } + return LogEntry{ + Source: LogSource(entry.Source), + SessionID: entry.SessionID, + Timestamp: time.UnixMilli(entry.TimestampMs), + Data: data, + Cursor: entry.Cursor, + }, nil +} + func logEntriesFromFFI(entries []ffi.LogEntry) ([]LogEntry, error) { out := make([]LogEntry, 0, len(entries)) for _, entry := range entries { - data, err := base64.StdEncoding.DecodeString(entry.DataB64) + e, err := logEntryFromFFI(entry) if err != nil { return nil, err } - out = append(out, LogEntry{ - Source: LogSource(entry.Source), - SessionID: entry.SessionID, - Timestamp: time.UnixMilli(entry.TimestampMs), - Data: data, - }) + out = append(out, e) } return out, nil } +func logStreamOptionsToFFI(opts LogStreamOptions) ffi.LogStreamOptions { + out := ffi.LogStreamOptions{ + Sources: make([]string, 0, len(opts.Sources)), + Follow: opts.Follow, + } + if !opts.Since.IsZero() { + ms := opts.Since.UnixMilli() + out.SinceMs = &ms + } + if opts.FromCursor != "" { + s := opts.FromCursor + out.FromCursor = &s + } + if !opts.Until.IsZero() { + ms := opts.Until.UnixMilli() + out.UntilMs = &ms + } + for _, source := range opts.Sources { + out.Sources = append(out.Sources, string(source)) + } + return out +} + // Logs reads persisted output for this live sandbox. It works for running and // stopped sandboxes and does not require guest-agent protocol traffic. func (s *Sandbox) Logs(ctx context.Context, opts LogOptions) ([]LogEntry, error) { @@ -93,3 +151,60 @@ func (h *SandboxHandle) Logs(ctx context.Context, opts LogOptions) ([]LogEntry, } return logEntriesFromFFI(entries) } + +// LogStreamHandle is a live log subscription. Obtain via +// Sandbox.LogStream or SandboxHandle.LogStream. Call Close to release +// Rust-side resources. +type LogStreamHandle struct { + inner *ffi.LogStreamHandle +} + +// Recv blocks until the next log entry arrives or ctx is cancelled. +// Returns nil, nil when the stream has ended (snapshot drained, Until +// reached, or a fatal stream error has already been surfaced). +func (h *LogStreamHandle) Recv(ctx context.Context) (*LogEntry, error) { + raw, err := h.inner.Recv(ctx) + if err != nil { + return nil, wrapFFI(err) + } + if raw == nil { + return nil, nil + } + entry, err := logEntryFromFFI(*raw) + if err != nil { + return nil, err + } + return &entry, nil +} + +// Close stops the stream and releases Rust-side resources. +func (h *LogStreamHandle) Close() error { + return wrapFFI(h.inner.Close()) +} + +// LogStream starts a streaming log subscription against a live sandbox. +// Pass LogStreamOptions{Follow: true} to keep the stream open past +// current EOF and pick up new entries as they are written. Close the +// returned handle when done. +func (s *Sandbox) LogStream(ctx context.Context, opts LogStreamOptions) (*LogStreamHandle, error) { + h, err := s.inner.LogStream(ctx, logStreamOptionsToFFI(opts)) + if err != nil { + return nil, wrapFFI(err) + } + return &LogStreamHandle{inner: h}, nil +} + +// LogStream starts a streaming log subscription for this sandbox handle. +// Works without starting or connecting to the sandbox; with +// LogStreamOptions{Follow: true}, the stream picks up new entries the +// moment they land in exec.log. +func (h *SandboxHandle) LogStream( + ctx context.Context, + opts LogStreamOptions, +) (*LogStreamHandle, error) { + inner, err := ffi.SandboxHandleLogStream(ctx, h.name, logStreamOptionsToFFI(opts)) + if err != nil { + return nil, wrapFFI(err) + } + return &LogStreamHandle{inner: inner}, nil +} diff --git a/sdk/go/logs_test.go b/sdk/go/logs_test.go index acec395b9..675316a81 100644 --- a/sdk/go/logs_test.go +++ b/sdk/go/logs_test.go @@ -40,6 +40,7 @@ func TestLogEntriesFromFFI(t *testing.T) { SessionID: &sessionID, TimestampMs: 1234, DataB64: "aGVsbG8K", + Cursor: "AQECAwQ=", }}) if err != nil { t.Fatalf("logEntriesFromFFI: %v", err) @@ -59,4 +60,47 @@ func TestLogEntriesFromFFI(t *testing.T) { if entries[0].Text() != "hello\n" { t.Errorf("Text = %q, want %q", entries[0].Text(), "hello\n") } + if entries[0].Cursor != "AQECAwQ=" { + t.Errorf("Cursor = %q, want %q", entries[0].Cursor, "AQECAwQ=") + } +} + +func TestLogStreamOptionsToFFI(t *testing.T) { + since := time.Unix(30, int64(789*time.Millisecond)) + until := time.Unix(40, int64(0)) + + got := logStreamOptionsToFFI(LogStreamOptions{ + Sources: []LogSource{LogSourceStdout, LogSourceSystem}, + Since: since, + Until: until, + Follow: true, + }) + + if got.SinceMs == nil || *got.SinceMs != since.UnixMilli() { + t.Errorf("SinceMs = %v, want %d", got.SinceMs, since.UnixMilli()) + } + if got.UntilMs == nil || *got.UntilMs != until.UnixMilli() { + t.Errorf("UntilMs = %v, want %d", got.UntilMs, until.UnixMilli()) + } + if got.FromCursor != nil { + t.Errorf("FromCursor = %v, want nil", got.FromCursor) + } + if !got.Follow { + t.Errorf("Follow = false, want true") + } + if want := []string{"stdout", "system"}; !reflect.DeepEqual(got.Sources, want) { + t.Errorf("Sources = %v, want %v", got.Sources, want) + } +} + +func TestLogStreamOptionsToFFI_FromCursor(t *testing.T) { + got := logStreamOptionsToFFI(LogStreamOptions{ + FromCursor: "AQECAwQ=", + }) + if got.FromCursor == nil || *got.FromCursor != "AQECAwQ=" { + t.Errorf("FromCursor = %v, want %q", got.FromCursor, "AQECAwQ=") + } + if got.SinceMs != nil { + t.Errorf("SinceMs = %v, want nil", got.SinceMs) + } } diff --git a/sdk/go/native/microsandbox_go_ffi.h b/sdk/go/native/microsandbox_go_ffi.h index 129fe9b9a..3195a8a8b 100644 --- a/sdk/go/native/microsandbox_go_ffi.h +++ b/sdk/go/native/microsandbox_go_ffi.h @@ -263,6 +263,38 @@ char *msb_metrics_recv(uint64_t cancel_id, */ char *msb_metrics_close(Handle stream_handle, unsigned char *buf, uintptr_t buf_len); +/** + * Start a log stream against a live sandbox handle. Returns + * `{"stream_handle":}`. + */ +char *msb_sandbox_log_stream(uint64_t cancel_id, + Handle handle, + const char *opts_json, + unsigned char *buf, + uintptr_t buf_len); + +/** + * Start a log stream against a sandbox identified by name (no live handle + * required). Returns `{"stream_handle":}`. + */ +char *msb_sandbox_handle_log_stream(uint64_t cancel_id, + const char *name, + const char *opts_json, + unsigned char *buf, + uintptr_t buf_len); + +/** + * Block for the next log entry on this stream. Returns a single log-entry + * JSON object, or `{"done":true}` when the stream has ended. + */ +char *msb_log_recv(uint64_t cancel_id, Handle stream_handle, unsigned char *buf, uintptr_t buf_len); + +/** + * Close (drop) a log stream. The background driver task exits when the + * channel receiver is dropped. + */ +char *msb_log_close(Handle stream_handle, unsigned char *buf, uintptr_t buf_len); + /** * Start a streaming exec session. Returns `{"exec_handle":}`. * The exec handle MUST be released with msb_exec_close when done. diff --git a/sdk/go/native/src/lib.rs b/sdk/go/native/src/lib.rs index 8c5d0edd6..d11b2c8dd 100644 --- a/sdk/go/native/src/lib.rs +++ b/sdk/go/native/src/lib.rs @@ -51,11 +51,11 @@ use std::{ use base64::Engine; use microsandbox::{ AgentBridge, LogLevel, MicrosandboxError, RegistryAuth, Sandbox, Snapshot, UpperVerifyStatus, + logs::{self, LogOptions, LogSource}, sandbox::{ - FsEntryKind, LogOptions, LogSource, PullPolicy, all_sandbox_metrics, + FsEntryKind, PullPolicy, all_sandbox_metrics, exec::{ExecEvent, ExecHandle, ExecSink}, fs::{FsReadStream, FsWriteSink}, - logs, }, snapshot::{ExportOpts, SnapshotDestination, SnapshotFormat}, volume::{Volume, VolumeBuilder, VolumeHandle}, @@ -859,6 +859,17 @@ struct LogReadOpts { sources: Vec, } +#[derive(serde::Deserialize, Default)] +struct LogStreamOpts { + #[serde(default)] + sources: Vec, + since_ms: Option, + from_cursor: Option, + until_ms: Option, + #[serde(default)] + follow: bool, +} + #[derive(serde::Deserialize, Default)] struct SnapshotCreateOpts { name: Option, @@ -2119,16 +2130,69 @@ fn parse_log_options(opts_json: *const c_char) -> Result { }) } -fn log_entries_json(entries: Vec) -> Result { - let mut out = Vec::with_capacity(entries.len()); - for entry in entries { - out.push(serde_json::json!({ - "source": log_source_str(entry.source), - "session_id": entry.session_id, - "timestamp_ms": entry.timestamp.timestamp_millis(), - "data_b64": base64::engine::general_purpose::STANDARD.encode(entry.data), - })); +fn parse_log_stream_options( + opts_json: *const c_char, +) -> Result { + use microsandbox::logs::{LogCursor, LogCursorParseError, LogStreamStart}; + + let raw = if opts_json.is_null() { + "{}".to_string() + } else { + unsafe { cstr(opts_json) }?.to_string() + }; + let opts: LogStreamOpts = serde_json::from_str(&raw) + .map_err(|e| FfiError::invalid_argument(format!("invalid log stream opts JSON: {e}")))?; + if opts.since_ms.is_some() && opts.from_cursor.is_some() { + return Err(FfiError::invalid_argument( + "since_ms and from_cursor are mutually exclusive", + )); } + let start = if let Some(ms) = opts.since_ms { + let ts = chrono::DateTime::from_timestamp_millis(ms).ok_or_else(|| { + FfiError::invalid_argument(format!("invalid since_ms timestamp: {ms}")) + })?; + LogStreamStart::Since(ts) + } else if let Some(token) = opts.from_cursor.as_deref() { + let cursor: LogCursor = token.parse().map_err(|e: LogCursorParseError| { + FfiError::invalid_argument(format!("invalid from_cursor: {e}")) + })?; + LogStreamStart::From(cursor) + } else { + LogStreamStart::Beginning + }; + let until = opts + .until_ms + .map(|ms| { + chrono::DateTime::from_timestamp_millis(ms).ok_or_else(|| { + FfiError::invalid_argument(format!("invalid until_ms timestamp: {ms}")) + }) + }) + .transpose()?; + let sources = opts + .sources + .iter() + .map(|s| parse_log_source(s)) + .collect::, _>>()?; + Ok(microsandbox::logs::LogStreamOptions { + sources, + start, + until, + follow: opts.follow, + }) +} + +fn log_entry_json(entry: microsandbox::logs::LogEntry) -> serde_json::Value { + serde_json::json!({ + "source": log_source_str(entry.source), + "session_id": entry.session_id, + "timestamp_ms": entry.timestamp.timestamp_millis(), + "data_b64": base64::engine::general_purpose::STANDARD.encode(entry.data), + "cursor": entry.cursor.to_string(), + }) +} + +fn log_entries_json(entries: Vec) -> Result { + let out: Vec = entries.into_iter().map(log_entry_json).collect(); serde_json::to_string(&out).map_err(|e| FfiError::internal(format!("serialise logs: {e}"))) } @@ -2144,7 +2208,7 @@ pub unsafe extern "C" fn msb_sandbox_logs( let opts = parse_log_options(opts_json)?; Ok(Box::pin(async move { let sb = get(handle)?; - let entries = sb.logs(&opts).map_err(FfiError::from)?; + let entries = sb.logs(&opts).await.map_err(FfiError::from)?; log_entries_json(entries) })) }) @@ -2162,7 +2226,9 @@ pub unsafe extern "C" fn msb_sandbox_handle_logs( let name = unsafe { cstr(name) }?.to_owned(); let opts = parse_log_options(opts_json)?; Ok(Box::pin(async move { - let entries = logs::read_logs(&name, &opts).map_err(FfiError::from)?; + let entries = logs::read_logs(&name, &opts) + .await + .map_err(FfiError::from)?; log_entries_json(entries) })) }) @@ -2805,6 +2871,164 @@ pub unsafe extern "C" fn msb_metrics_close( }) } +// --------------------------------------------------------------------------- +// Log streaming +// +// msb_sandbox_log_stream — start; returns a stream_handle u64 +// msb_sandbox_handle_log_stream — start by name; same return shape +// msb_log_recv — block for next entry (or {"done":true}) +// msb_log_close — drop the stream +// --------------------------------------------------------------------------- + +static NEXT_LOG_STREAM_HANDLE: AtomicU64 = AtomicU64::new(1); + +type LogStreamItem = Result; +type LogStreamEntry = + std::sync::Arc>>; + +fn log_stream_registry() -> &'static RwLock> { + static REG: OnceLock>> = OnceLock::new(); + REG.get_or_init(|| RwLock::new(HashMap::new())) +} + +fn register_log_stream(rx: tokio::sync::mpsc::Receiver) -> Result { + let h = NEXT_LOG_STREAM_HANDLE.fetch_add(1, Ordering::Relaxed); + log_stream_registry() + .write() + .map_err(|_| FfiError::internal("log stream registry lock poisoned"))? + .insert(h, std::sync::Arc::new(tokio::sync::Mutex::new(rx))); + Ok(h) +} + +fn get_log_stream(handle: Handle) -> Result { + log_stream_registry() + .read() + .map_err(|_| FfiError::internal("log stream registry lock poisoned"))? + .get(&handle) + .cloned() + .ok_or_else(|| FfiError::invalid_handle(handle)) +} + +fn remove_log_stream(handle: Handle) { + let _ = log_stream_registry().write().map(|mut r| r.remove(&handle)); +} + +/// Spawn a forwarder that drives the engine stream into an unbounded mpsc +/// channel, register the receiver, and return the handle. +async fn start_log_stream_for( + log_dir_name: &str, + opts: microsandbox::logs::LogStreamOptions, +) -> Result { + let stream = microsandbox::logs::log_stream(log_dir_name, &opts) + .await + .map_err(FfiError::from)?; + let mut stream = Box::pin(stream); + let (tx, rx) = tokio::sync::mpsc::channel::(16); + // The forwarder task is moved off the foreground future so the caller + // can return the stream handle immediately. The task stops naturally + // when the receiver is dropped (msb_log_close). + tokio::spawn(async move { + while let Some(item) = stream.next().await { + if tx.send(item).await.is_err() { + break; + } + } + }); + register_log_stream(rx) +} + +/// Start a log stream against a live sandbox handle. Returns +/// `{"stream_handle":}`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn msb_sandbox_log_stream( + cancel_id: u64, + handle: Handle, + opts_json: *const c_char, + buf: *mut c_uchar, + buf_len: usize, +) -> *mut c_char { + run_c(cancel_id, buf, buf_len, || { + let opts = parse_log_stream_options(opts_json)?; + Ok(Box::pin(async move { + let sb = get(handle)?; + let name = sb.name().to_string(); + let sh = start_log_stream_for(&name, opts).await?; + Ok(format!(r#"{{"stream_handle":{sh}}}"#)) + })) + }) +} + +/// Start a log stream against a sandbox identified by name (no live handle +/// required). Returns `{"stream_handle":}`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn msb_sandbox_handle_log_stream( + cancel_id: u64, + name: *const c_char, + opts_json: *const c_char, + buf: *mut c_uchar, + buf_len: usize, +) -> *mut c_char { + run_c(cancel_id, buf, buf_len, || { + let name = unsafe { cstr(name) }?.to_owned(); + let opts = parse_log_stream_options(opts_json)?; + Ok(Box::pin(async move { + let sh = start_log_stream_for(&name, opts).await?; + Ok(format!(r#"{{"stream_handle":{sh}}}"#)) + })) + }) +} + +/// Block for the next log entry on this stream. Returns a single log-entry +/// JSON object, or `{"done":true}` when the stream has ended. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn msb_log_recv( + cancel_id: u64, + stream_handle: Handle, + buf: *mut c_uchar, + buf_len: usize, +) -> *mut c_char { + let result: Result<(), FfiError> = (|| -> Result<(), FfiError> { + let token = lookup_cancel_token(cancel_id)?; + let entry = get_log_stream(stream_handle)?; + let mut recv = entry + .try_lock() + .map_err(|_| FfiError::internal("log stream mutex busy"))?; + let json = rt().block_on(async { + tokio::select! { + item = recv.recv() => { + match item { + None => Ok(r#"{"done":true}"#.to_string()), + Some(Ok(e)) => serde_json::to_string(&log_entry_json(e)) + .map_err(|e| FfiError::internal(format!("serialise log entry: {e}"))), + Some(Err(e)) => Err(FfiError::from(e)), + } + } + _ = token.cancelled() => Err(FfiError::new(error_kind::CANCELLED, "cancelled")), + } + })?; + write_output(buf, buf_len, &json) + })(); + cancel_unregister(cancel_id); + match result { + Ok(()) => std::ptr::null_mut(), + Err(e) => err_ptr(e), + } +} + +/// Close (drop) a log stream. The background driver task exits when the +/// channel receiver is dropped. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn msb_log_close( + stream_handle: Handle, + buf: *mut c_uchar, + buf_len: usize, +) -> *mut c_char { + run(buf, buf_len, || { + remove_log_stream(stream_handle); + Ok(r#"{"ok":true}"#.into()) + }) +} + // --------------------------------------------------------------------------- // Exec streaming // diff --git a/sdk/node-ts/native/error.rs b/sdk/node-ts/native/error.rs index fb8c7f30a..9301f551e 100644 --- a/sdk/node-ts/native/error.rs +++ b/sdk/node-ts/native/error.rs @@ -44,6 +44,8 @@ fn error_type_str(err: &MicrosandboxError) -> &'static str { MicrosandboxError::SnapshotImageMissing(_) => "SnapshotImageMissing", MicrosandboxError::SnapshotIntegrity(_) => "SnapshotIntegrity", MicrosandboxError::MetricsDisabled(_) => "MetricsDisabled", + MicrosandboxError::MissedRotation { .. } => "MissedRotation", + MicrosandboxError::InvalidCursor(_) => "InvalidCursor", MicrosandboxError::AgentClient(_) => "AgentClient", MicrosandboxError::Custom(_) => "Custom", } diff --git a/sdk/node-ts/native/index.cjs b/sdk/node-ts/native/index.cjs index 9659b63f7..22dd9695c 100644 --- a/sdk/node-ts/native/index.cjs +++ b/sdk/node-ts/native/index.cjs @@ -600,6 +600,8 @@ module.exports.InitOptionsBuilder = nativeBinding.InitOptionsBuilder module.exports.JsInitOptionsBuilder = nativeBinding.JsInitOptionsBuilder module.exports.InterfaceOverridesBuilder = nativeBinding.InterfaceOverridesBuilder module.exports.JsInterfaceOverridesBuilder = nativeBinding.JsInterfaceOverridesBuilder +module.exports.LogStream = nativeBinding.LogStream +module.exports.JsLogStream = nativeBinding.JsLogStream module.exports.MetricsStream = nativeBinding.MetricsStream module.exports.JsMetricsStream = nativeBinding.JsMetricsStream module.exports.MountBuilder = nativeBinding.MountBuilder diff --git a/sdk/node-ts/native/index.d.ts b/sdk/node-ts/native/index.d.ts index 3eb4bca8b..63ba4437a 100644 --- a/sdk/node-ts/native/index.d.ts +++ b/sdk/node-ts/native/index.d.ts @@ -291,6 +291,33 @@ export declare class InterfaceOverridesBuilder { } export type JsInterfaceOverridesBuilder = InterfaceOverridesBuilder +/** + * A streaming subscription for sandbox log entries. + * + * Supports both manual `recv()` calls and `for await...of` iteration: + * ```js + * const stream = await sb.logStream({ follow: true }); + * for await (const entry of stream) { + * process.stdout.write(entry.data); + * } + * ``` + * + * This type implements JavaScript's async iterable protocol. + * It can be used with `for await...of` loops. + * + * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols#the_async_iterator_and_async_iterable_protocols + */ +export declare class LogStream { + /** + * Receive the next entry. Returns `null` when the stream ends + * (snapshot drained, `until` reached, or fatal stream error + * already surfaced). + */ + recv(): Promise + [Symbol.asyncIterator](): AsyncGenerator +} +export type JsLogStream = LogStream + /** * A streaming subscription for sandbox metrics at a regular interval. * @@ -781,6 +808,15 @@ export declare class Sandbox { * protocol traffic. */ logs(opts?: LogOptions | undefined | null): Promise> + /** + * Stream captured output as it appears, with optional follow. + * + * Returns an async iterable of `LogEntry`. Each entry carries + * an opaque `cursor` token suitable for passing back via + * `fromCursor` on a later call to resume exactly after that + * entry. + */ + logStream(opts?: LogStreamOptions | undefined | null): Promise } /** @@ -1052,6 +1088,14 @@ export declare class SandboxHandle { * Works without starting the sandbox. */ logs(opts?: LogOptions | undefined | null): Promise> + /** + * Stream captured output as it appears, with optional follow. + * + * Works without starting the sandbox; with `follow: true`, the + * stream picks up new entries the moment they land in + * `exec.log`. + */ + logStream(opts?: LogStreamOptions | undefined | null): Promise /** * Snapshot this (stopped) sandbox under a bare name. * @@ -1474,6 +1518,11 @@ export interface LogEntry { * preserves bytes via base64 round-trip on the host side. */ data: Buffer + /** + * Opaque resume token. Pass back to `logStream` via + * `fromCursor` to pick up immediately after this entry. + */ + cursor: string } /** @@ -1497,6 +1546,39 @@ export interface LogOptions { sources?: Array } +/** + * Options accepted by `Sandbox.logStream()`. + * + * All fields optional. Defaults: sources = `["stdout", "stderr", + * "output"]`, start from the beginning of available history, no + * upper bound, `follow = false`. + * + * `sinceMs` and `fromCursor` are mutually exclusive — passing both + * rejects at the boundary. + */ +export interface LogStreamOptions { + /** Same shape as `LogOptions.sources`. */ + sources?: Array + /** + * Start at the first entry whose timestamp is `>= sinceMs`. + * Mutually exclusive with `fromCursor`. + */ + sinceMs?: number + /** + * Resume strictly after the entry identified by this cursor + * (the value of `LogEntry.cursor` from a prior call). + * Mutually exclusive with `sinceMs`. + */ + fromCursor?: string + /** Stop emitting at the first entry whose timestamp is `>= untilMs`. */ + untilMs?: number + /** + * When true, keep the stream open past current EOF and yield + * new entries as they are written. + */ + follow?: boolean +} + export interface NetworkPolicy { defaultEgress: string defaultIngress: string diff --git a/sdk/node-ts/native/sandbox.rs b/sdk/node-ts/native/sandbox.rs index 9ab0c996c..9a2ad017c 100644 --- a/sdk/node-ts/native/sandbox.rs +++ b/sdk/node-ts/native/sandbox.rs @@ -41,6 +41,20 @@ pub struct JsMetricsStream { rx: Arc>>>, } +/// A streaming subscription for sandbox log entries. +/// +/// Supports both manual `recv()` calls and `for await...of` iteration: +/// ```js +/// const stream = await sb.logStream({ follow: true }); +/// for await (const entry of stream) { +/// process.stdout.write(entry.data); +/// } +/// ``` +#[napi(async_iterator, js_name = "LogStream")] +pub struct JsLogStream { + rx: Arc>>>, +} + //-------------------------------------------------------------------------------------------------- // Methods //-------------------------------------------------------------------------------------------------- @@ -389,10 +403,26 @@ impl Sandbox { let sb = guard.as_ref().ok_or_else(consumed_error)?; let name = sb.name().to_string(); let rust_opts = log_options_from_js(opts).map_err(napi::Error::from_reason)?; - let entries = - microsandbox::sandbox::logs::read_logs(&name, &rust_opts).map_err(to_napi_error)?; + let entries = microsandbox::logs::read_logs(&name, &rust_opts) + .await + .map_err(to_napi_error)?; Ok(entries.into_iter().map(log_entry_to_js).collect()) } + + /// Stream captured output as it appears, with optional follow. + /// + /// Returns an async iterable of `LogEntry`. Each entry carries + /// an opaque `cursor` token suitable for passing back via + /// `fromCursor` on a later call to resume exactly after that + /// entry. + #[napi] + pub async fn log_stream(&self, opts: Option) -> Result { + let guard = self.inner.lock().await; + let sb = guard.as_ref().ok_or_else(consumed_error)?; + let name = sb.name().to_string(); + let rust_opts = log_stream_options_from_js(opts).map_err(napi::Error::from_reason)?; + spawn_log_stream(&name, rust_opts).await + } } //-------------------------------------------------------------------------------------------------- @@ -433,28 +463,91 @@ impl AsyncGenerator for JsMetricsStream { } } -pub fn log_entry_to_js(entry: microsandbox::sandbox::LogEntry) -> LogEntry { +#[napi] +impl JsLogStream { + /// Receive the next entry. Returns `null` when the stream ends + /// (snapshot drained, `until` reached, or fatal stream error + /// already surfaced). + #[napi] + pub async fn recv(&self) -> Result> { + let mut guard = self.rx.lock().await; + match guard.recv().await { + Some(result) => Ok(Some(result?)), + None => Ok(None), + } + } +} + +#[napi] +impl AsyncGenerator for JsLogStream { + type Yield = LogEntry; + type Next = (); + type Return = (); + + fn next( + &mut self, + _value: Option, + ) -> impl std::future::Future>> + Send + 'static { + let rx = Arc::clone(&self.rx); + async move { + let mut guard = rx.lock().await; + match guard.recv().await { + Some(result) => Ok(Some(result?)), + None => Ok(None), + } + } + } +} + +/// Open a log stream against the given sandbox name and bridge it +/// onto a JS-side mpsc channel. Shared between `Sandbox::log_stream` +/// and `SandboxHandle::log_stream`. +pub async fn spawn_log_stream( + name: &str, + opts: microsandbox::logs::LogStreamOptions, +) -> Result { + let mut stream = Box::pin( + microsandbox::logs::log_stream(name, &opts) + .await + .map_err(to_napi_error)?, + ); + let (tx, rx) = tokio::sync::mpsc::channel(16); + tokio::spawn(async move { + while let Some(result) = stream.next().await { + let item = result.map(log_entry_to_js).map_err(to_napi_error); + if tx.send(item).await.is_err() { + break; + } + } + }); + Ok(JsLogStream { + rx: Arc::new(Mutex::new(rx)), + }) +} + +pub fn log_entry_to_js(entry: microsandbox::logs::LogEntry) -> LogEntry { let source = match entry.source { - microsandbox::sandbox::LogSource::Stdout => "stdout", - microsandbox::sandbox::LogSource::Stderr => "stderr", - microsandbox::sandbox::LogSource::Output => "output", - microsandbox::sandbox::LogSource::System => "system", + microsandbox::logs::LogSource::Stdout => "stdout", + microsandbox::logs::LogSource::Stderr => "stderr", + microsandbox::logs::LogSource::Output => "output", + microsandbox::logs::LogSource::System => "system", }; LogEntry { timestamp_ms: entry.timestamp.timestamp_millis() as f64, source: source.to_string(), session_id: entry.session_id.map(|id| id as f64), data: entry.data.to_vec().into(), + cursor: entry.cursor.to_string(), } } pub fn log_options_from_js( opts: Option, -) -> std::result::Result { +) -> std::result::Result { let Some(o) = opts else { - return Ok(microsandbox::sandbox::LogOptions::default()); + return Ok(microsandbox::logs::LogOptions::default()); }; - let mut out = microsandbox::sandbox::LogOptions { + let mut out = microsandbox::logs::LogOptions { tail: o.tail.map(|n| n as usize), since: o.since_ms.and_then(ms_to_datetime), until: o.until_ms.and_then(ms_to_datetime), @@ -463,16 +556,16 @@ pub fn log_options_from_js( if let Some(srcs) = o.sources { for s in srcs { match s.as_str() { - "stdout" => out.sources.push(microsandbox::sandbox::LogSource::Stdout), - "stderr" => out.sources.push(microsandbox::sandbox::LogSource::Stderr), - "output" => out.sources.push(microsandbox::sandbox::LogSource::Output), - "system" => out.sources.push(microsandbox::sandbox::LogSource::System), + "stdout" => out.sources.push(microsandbox::logs::LogSource::Stdout), + "stderr" => out.sources.push(microsandbox::logs::LogSource::Stderr), + "output" => out.sources.push(microsandbox::logs::LogSource::Output), + "system" => out.sources.push(microsandbox::logs::LogSource::System), "all" => { out.sources = vec![ - microsandbox::sandbox::LogSource::Stdout, - microsandbox::sandbox::LogSource::Stderr, - microsandbox::sandbox::LogSource::Output, - microsandbox::sandbox::LogSource::System, + microsandbox::logs::LogSource::Stdout, + microsandbox::logs::LogSource::Stderr, + microsandbox::logs::LogSource::Output, + microsandbox::logs::LogSource::System, ]; } other => return Err(format!("unknown log source {other:?}")), @@ -482,6 +575,57 @@ pub fn log_options_from_js( Ok(out) } +pub fn log_stream_options_from_js( + opts: Option, +) -> std::result::Result { + let Some(o) = opts else { + return Ok(microsandbox::logs::LogStreamOptions::default()); + }; + if o.since_ms.is_some() && o.from_cursor.is_some() { + return Err("sinceMs and fromCursor are mutually exclusive".into()); + } + let start = if let Some(ms) = o.since_ms { + let ts = ms_to_datetime(ms).ok_or_else(|| format!("invalid sinceMs value {ms}"))?; + microsandbox::logs::LogStreamStart::Since(ts) + } else if let Some(token) = o.from_cursor.as_deref() { + let cursor: microsandbox::logs::LogCursor = + token + .parse() + .map_err(|e: microsandbox::logs::LogCursorParseError| { + format!("invalid fromCursor: {e}") + })?; + microsandbox::logs::LogStreamStart::From(cursor) + } else { + microsandbox::logs::LogStreamStart::Beginning + }; + let mut sources = Vec::new(); + if let Some(srcs) = o.sources { + for s in srcs { + match s.as_str() { + "stdout" => sources.push(microsandbox::logs::LogSource::Stdout), + "stderr" => sources.push(microsandbox::logs::LogSource::Stderr), + "output" => sources.push(microsandbox::logs::LogSource::Output), + "system" => sources.push(microsandbox::logs::LogSource::System), + "all" => { + sources = vec![ + microsandbox::logs::LogSource::Stdout, + microsandbox::logs::LogSource::Stderr, + microsandbox::logs::LogSource::Output, + microsandbox::logs::LogSource::System, + ]; + } + other => return Err(format!("unknown log source {other:?}")), + } + } + } + Ok(microsandbox::logs::LogStreamOptions { + sources, + start, + until: o.until_ms.and_then(ms_to_datetime), + follow: o.follow.unwrap_or(false), + }) +} + fn ms_to_datetime(ms: f64) -> Option> { let secs = (ms / 1000.0).trunc() as i64; let nsecs = ((ms - secs as f64 * 1000.0) * 1_000_000.0).round() as u32; diff --git a/sdk/node-ts/native/sandbox_handle.rs b/sdk/node-ts/native/sandbox_handle.rs index 5e8b2c151..9fb2d9340 100644 --- a/sdk/node-ts/native/sandbox_handle.rs +++ b/sdk/node-ts/native/sandbox_handle.rs @@ -150,13 +150,28 @@ impl JsSandboxHandle { pub async fn logs(&self, opts: Option) -> Result> { let rust_opts = crate::sandbox::log_options_from_js(opts).map_err(napi::Error::from_reason)?; - let entries = self.inner.logs(&rust_opts).map_err(to_napi_error)?; + let entries = self.inner.logs(&rust_opts).await.map_err(to_napi_error)?; Ok(entries .into_iter() .map(crate::sandbox::log_entry_to_js) .collect()) } + /// Stream captured output as it appears, with optional follow. + /// + /// Works without starting the sandbox; with `follow: true`, the + /// stream picks up new entries the moment they land in + /// `exec.log`. + #[napi] + pub async fn log_stream( + &self, + opts: Option, + ) -> Result { + let rust_opts = + crate::sandbox::log_stream_options_from_js(opts).map_err(napi::Error::from_reason)?; + crate::sandbox::spawn_log_stream(self.inner.name(), rust_opts).await + } + /// Snapshot this (stopped) sandbox under a bare name. /// /// Resolves under `~/.microsandbox/snapshots//`. Use diff --git a/sdk/node-ts/native/types.rs b/sdk/node-ts/native/types.rs index ef08ae6a5..8f99159cd 100644 --- a/sdk/node-ts/native/types.rs +++ b/sdk/node-ts/native/types.rs @@ -32,6 +32,10 @@ pub struct LogEntry { /// Body bytes. UTF-8 lossy decoded by default; raw mode (future) /// preserves bytes via base64 round-trip on the host side. pub data: napi::bindgen_prelude::Buffer, + + /// Opaque resume token. Pass back to `logStream` via + /// `fromCursor` to pick up immediately after this entry. + pub cursor: String, } /// Filters applied by `Sandbox.logs()`. @@ -55,6 +59,36 @@ pub struct LogOptions { pub sources: Option>, } +/// Options accepted by `Sandbox.logStream()`. +/// +/// All fields optional. Defaults: sources = `["stdout", "stderr", +/// "output"]`, start from the beginning of available history, no +/// upper bound, `follow = false`. +/// +/// `sinceMs` and `fromCursor` are mutually exclusive — passing both +/// rejects at the boundary. +#[napi(object)] +pub struct LogStreamOptions { + /// Same shape as `LogOptions.sources`. + pub sources: Option>, + + /// Start at the first entry whose timestamp is `>= sinceMs`. + /// Mutually exclusive with `fromCursor`. + pub since_ms: Option, + + /// Resume strictly after the entry identified by this cursor + /// (the value of `LogEntry.cursor` from a prior call). + /// Mutually exclusive with `sinceMs`. + pub from_cursor: Option, + + /// Stop emitting at the first entry whose timestamp is `>= untilMs`. + pub until_ms: Option, + + /// When true, keep the stream open past current EOF and yield + /// new entries as they are written. + pub follow: Option, +} + /// Filesystem entry metadata returned by `fs.list()`. #[napi(object)] pub struct FsEntry { diff --git a/sdk/node-ts/src/index.ts b/sdk/node-ts/src/index.ts index a8d6957ec..f8cb817b9 100644 --- a/sdk/node-ts/src/index.ts +++ b/sdk/node-ts/src/index.ts @@ -80,8 +80,13 @@ export type { } from "./image.js"; // Logs -export { LogEntry } from "./logs.js"; -export type { LogReadOptions, LogReadSource, LogSource } from "./logs.js"; +export { LogEntry, LogStream } from "./logs.js"; +export type { + LogReadOptions, + LogReadSource, + LogSource, + LogStreamOptions, +} from "./logs.js"; // Metrics streaming export { MetricsStream } from "./metrics-stream.js"; diff --git a/sdk/node-ts/src/internal/napi.ts b/sdk/node-ts/src/internal/napi.ts index 6dcf85420..e2721a7fd 100644 --- a/sdk/node-ts/src/internal/napi.ts +++ b/sdk/node-ts/src/internal/napi.ts @@ -190,6 +190,7 @@ export interface NapiSandbox { detach(): Promise; removePersisted(): Promise; logs(opts?: LogOptions): Promise; + logStream(opts?: LogStreamOptions): Promise; } export interface NapiSandboxHandle { @@ -208,6 +209,7 @@ export interface NapiSandboxHandle { kill(): Promise; remove(): Promise; logs(opts?: LogOptions): Promise; + logStream(opts?: LogStreamOptions): Promise; snapshot(name: string): Promise; snapshotTo(path: string): Promise; } @@ -218,6 +220,7 @@ export interface LogEntry { readonly source: string; readonly sessionId: number | null; readonly data: Buffer; + readonly cursor: string; } /** Native filter object accepted by `logs()`. */ @@ -228,6 +231,20 @@ export interface LogOptions { sources?: string[]; } +/** Native option object accepted by `logStream()`. */ +export interface LogStreamOptions { + sources?: string[]; + sinceMs?: number; + fromCursor?: string; + untilMs?: number; + follow?: boolean; +} + +/** Native stream returned by `logStream()`. */ +export interface NapiLogStream extends AsyncIterable { + recv(): Promise; +} + export interface NapiSandboxInfo { readonly name: string; readonly status: string; diff --git a/sdk/node-ts/src/logs.ts b/sdk/node-ts/src/logs.ts index 5a0717202..a7440f9be 100644 --- a/sdk/node-ts/src/logs.ts +++ b/sdk/node-ts/src/logs.ts @@ -1,4 +1,11 @@ -import type { LogEntry as NapiLogEntry, LogOptions as NapiLogOptions } from "./internal/napi.js"; +import { withMappedErrors } from "./internal/error-mapping.js"; +import { mapAsyncIterable } from "./internal/async-iter.js"; +import type { + LogEntry as NapiLogEntry, + LogOptions as NapiLogOptions, + LogStreamOptions as NapiLogStreamOptions, + NapiLogStream, +} from "./internal/napi.js"; /** * Source tag on a captured log entry. @@ -46,16 +53,25 @@ export class LogEntry { /** The captured chunk's bytes. */ readonly data: Uint8Array; + /** + * Opaque resume token. Pass back to {@link Sandbox.logStream} as + * {@link LogStreamOptions.fromCursor} to resume the stream + * immediately after this entry. + */ + readonly cursor: string; + constructor( timestamp: Date, source: LogSource, sessionId: number | null, data: Uint8Array, + cursor: string, ) { this.timestamp = timestamp; this.source = source; this.sessionId = sessionId; this.data = data; + this.cursor = cursor; } /** UTF-8 decode of {@link data} (lossy — invalid bytes are replaced). */ @@ -90,6 +106,79 @@ export interface LogReadOptions { sources?: ReadonlyArray; } +/** + * Options for {@link Sandbox.logStream}. + * + * All fields optional. `since` and `fromCursor` are mutually + * exclusive — passing both rejects at the boundary. + */ +export interface LogStreamOptions { + /** Same shape as {@link LogReadOptions.sources}. */ + sources?: ReadonlyArray; + + /** + * Start at the first entry whose timestamp is `>= since`. + * Mutually exclusive with {@link fromCursor}. + */ + since?: Date; + + /** + * Resume strictly after the entry whose {@link LogEntry.cursor} + * matches this value. Mutually exclusive with {@link since}. + */ + fromCursor?: string; + + /** Stop emitting at the first entry whose timestamp is `>= until`. */ + until?: Date; + + /** + * When `true`, keep the stream open past current EOF and yield + * new entries as they are written. Defaults to `false`. + */ + follow?: boolean; +} + +/** + * An async iterable of {@link LogEntry} values. + * + * Use `for await...of` to drain, or call {@link recv} directly. The + * stream ends naturally when the underlying source drains (snapshot + * mode or `until` reached), and may end early with an error if the + * follower falls behind the file's rotation retention window. + */ +export class LogStream + implements AsyncIterable, AsyncDisposable +{ + private readonly inner: NapiLogStream; + private done = false; + + /** @internal */ + constructor(inner: NapiLogStream) { + this.inner = inner; + } + + async recv(): Promise { + if (this.done) return null; + const raw = await withMappedErrors(() => this.inner.recv()); + if (raw === null) { + this.done = true; + return null; + } + return logEntryFromNapi(raw); + } + + [Symbol.asyncIterator](): AsyncIterator { + return mapAsyncIterable( + { recv: () => this.inner.recv() }, + logEntryFromNapi, + )[Symbol.asyncIterator](); + } + + async [Symbol.asyncDispose](): Promise { + this.done = true; + } +} + /** @internal */ export function logEntryFromNapi(raw: NapiLogEntry): LogEntry { const source = raw.source as LogSource; @@ -98,6 +187,7 @@ export function logEntryFromNapi(raw: NapiLogEntry): LogEntry { source, raw.sessionId, new Uint8Array(raw.data), + raw.cursor, ); } @@ -113,3 +203,17 @@ export function logReadOptionsToNapi( sources: opts.sources ? Array.from(opts.sources) : undefined, }; } + +/** @internal */ +export function logStreamOptionsToNapi( + opts?: LogStreamOptions, +): NapiLogStreamOptions | undefined { + if (!opts) return undefined; + return { + sources: opts.sources ? Array.from(opts.sources) : undefined, + sinceMs: opts.since?.getTime(), + fromCursor: opts.fromCursor, + untilMs: opts.until?.getTime(), + follow: opts.follow, + }; +} diff --git a/sdk/node-ts/src/sandbox-handle.ts b/sdk/node-ts/src/sandbox-handle.ts index 4c7a2869c..4b09fab26 100644 --- a/sdk/node-ts/src/sandbox-handle.ts +++ b/sdk/node-ts/src/sandbox-handle.ts @@ -6,9 +6,12 @@ import type { } from "./internal/napi.js"; import { LogEntry, + LogStream, type LogReadOptions, + type LogStreamOptions, logEntryFromNapi, logReadOptionsToNapi, + logStreamOptionsToNapi, } from "./logs.js"; import { Sandbox } from "./sandbox.js"; import type { SandboxStatus } from "./sandbox-status.js"; @@ -132,6 +135,20 @@ export class SandboxHandle { return raw.map(logEntryFromNapi); } + /** + * Stream captured output as it appears, with optional follow. + * + * Works without starting the sandbox; with `{ follow: true }`, + * the stream picks up new entries the moment they land in + * `exec.log`. + */ + async logStream(opts?: LogStreamOptions): Promise { + const live = this.requireLive(); + const napiOpts = logStreamOptionsToNapi(opts); + const raw = await withMappedErrors(() => live.logStream(napiOpts)); + return new LogStream(raw); + } + /** * Snapshot this (stopped) sandbox under a bare name. Resolves under * `~/.microsandbox/snapshots//`. For an explicit filesystem diff --git a/sdk/node-ts/src/sandbox.ts b/sdk/node-ts/src/sandbox.ts index b91736dd4..99b655752 100644 --- a/sdk/node-ts/src/sandbox.ts +++ b/sdk/node-ts/src/sandbox.ts @@ -14,9 +14,12 @@ import { SandboxFs } from "./fs.js"; import type { ExitStatus } from "./exit-status.js"; import { LogEntry, + LogStream, type LogReadOptions, + type LogStreamOptions, logEntryFromNapi, logReadOptionsToNapi, + logStreamOptionsToNapi, } from "./logs.js"; import { SandboxHandle, sandboxInfoToHandle } from "./sandbox-handle.js"; import type { SandboxMetrics } from "./metrics.js"; @@ -277,6 +280,23 @@ export class Sandbox implements AsyncDisposable { return raw.map(logEntryFromNapi); } + /** + * Stream captured output as it appears, with optional follow. + * + * Backed by the same on-disk `exec.log` as {@link logs}, but + * yields entries lazily. Pass `{ follow: true }` to keep the + * stream open past current EOF and pick up new entries as they + * are written; otherwise the stream drains the current contents + * and ends. Each yielded {@link LogEntry} carries an opaque + * `cursor` that can be passed back via + * {@link LogStreamOptions.fromCursor} to resume. + */ + async logStream(opts?: LogStreamOptions): Promise { + const napiOpts = logStreamOptionsToNapi(opts); + const raw = await withMappedErrors(() => this.inner.logStream(napiOpts)); + return new LogStream(raw); + } + // -- metrics ------------------------------------------------------------ async metrics(): Promise { diff --git a/sdk/python/microsandbox/__init__.py b/sdk/python/microsandbox/__init__.py index 26e4b1ff9..98dc30b51 100644 --- a/sdk/python/microsandbox/__init__.py +++ b/sdk/python/microsandbox/__init__.py @@ -10,6 +10,8 @@ FsMetadata, FsReadStream, FsWriteSink, + LogEntry, + LogStream, MetricsStream, PullSession, Sandbox, @@ -140,6 +142,8 @@ "ExecHandle", "ExecOutput", "ExecSink", + "LogEntry", + "LogStream", "MetricsStream", "ExecOptions", "ExitStatus", diff --git a/sdk/python/microsandbox/_microsandbox.pyi b/sdk/python/microsandbox/_microsandbox.pyi index ce81eed4c..6b60782a2 100644 --- a/sdk/python/microsandbox/_microsandbox.pyi +++ b/sdk/python/microsandbox/_microsandbox.pyi @@ -71,6 +71,14 @@ class Sandbox: until_ms: float | None = None, sources: list[LogReadSource] | None = None, ) -> list[LogEntry]: ... + async def log_stream( + self, + sources: list[LogReadSource] | None = None, + since_ms: float | None = None, + from_cursor: str | None = None, + until_ms: float | None = None, + follow: bool = False, + ) -> LogStream: ... async def stop(self) -> None: ... async def stop_and_wait(self) -> tuple[int, bool]: ... async def kill(self) -> None: ... @@ -102,6 +110,14 @@ class SandboxHandle: until_ms: float | None = None, sources: list[LogReadSource] | None = None, ) -> list[LogEntry]: ... + async def log_stream( + self, + sources: list[LogReadSource] | None = None, + since_ms: float | None = None, + from_cursor: str | None = None, + until_ms: float | None = None, + follow: bool = False, + ) -> LogStream: ... async def start(self, *, detached: bool = False) -> Sandbox: ... async def connect(self) -> Sandbox: ... async def stop(self) -> None: ... @@ -211,10 +227,15 @@ class LogEntry: timestamp_ms: float source: LogSource session_id: int | None # None for `system` entries + cursor: str # opaque resume token, pass back via `from_cursor` @property def data(self) -> bytes: ... def text(self) -> str: ... +class LogStream: + def __aiter__(self) -> AsyncIterator[LogEntry]: ... + async def __anext__(self) -> LogEntry: ... + class Volume: @staticmethod async def create( diff --git a/sdk/python/src/lib.rs b/sdk/python/src/lib.rs index b94a88600..1468a383f 100644 --- a/sdk/python/src/lib.rs +++ b/sdk/python/src/lib.rs @@ -43,6 +43,7 @@ fn _microsandbox(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/sdk/python/src/logs.rs b/sdk/python/src/logs.rs index 59644b4c8..68f6cb221 100644 --- a/sdk/python/src/logs.rs +++ b/sdk/python/src/logs.rs @@ -1,5 +1,10 @@ +use std::pin::Pin; +use std::sync::Arc; + +use futures::StreamExt; use pyo3::prelude::*; use pyo3::types::PyBytes; +use tokio::sync::Mutex; use crate::error::to_py_err; @@ -23,6 +28,11 @@ pub struct PyLogEntry { #[pyo3(get)] pub session_id: Option, + /// Opaque resume token. Pass back to `Sandbox.log_stream` via + /// `from_cursor` to resume immediately after this entry. + #[pyo3(get)] + pub cursor: String, + /// Captured chunk's bytes. pub data: Vec, } @@ -50,26 +60,150 @@ impl PyLogEntry { } } +type LogStreamInner = Pin< + Box< + dyn futures::Stream> + + Send, + >, +>; + +/// Async iterator over a live log stream. Yielded objects are +/// [`PyLogEntry`] values; iteration ends naturally when the stream +/// drains (snapshot mode or `until_ms` reached) and raises on a +/// fatal stream error. +#[pyclass(name = "LogStream")] +pub struct PyLogStream { + stream: Arc>, +} + +impl PyLogStream { + pub fn new( + stream: impl futures::Stream< + Item = microsandbox::MicrosandboxResult, + > + Send + + 'static, + ) -> Self { + Self { + stream: Arc::new(Mutex::new(Box::pin(stream))), + } + } +} + +#[pymethods] +impl PyLogStream { + fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __anext__<'py>(&self, py: Python<'py>) -> PyResult> { + let stream = self.stream.clone(); + pyo3_async_runtimes::tokio::future_into_py(py, async move { + let mut guard = stream.lock().await; + match guard.next().await { + Some(Ok(entry)) => Ok(convert_entry(entry)), + Some(Err(e)) => Err(to_py_err(e)), + None => Err(pyo3::exceptions::PyStopAsyncIteration::new_err(())), + } + }) + } +} + //-------------------------------------------------------------------------------------------------- // Functions //-------------------------------------------------------------------------------------------------- /// Convert a Rust `LogEntry` into the Python class. -pub fn convert_entry(entry: microsandbox::sandbox::LogEntry) -> PyLogEntry { +pub fn convert_entry(entry: microsandbox::logs::LogEntry) -> PyLogEntry { let source = match entry.source { - microsandbox::sandbox::LogSource::Stdout => "stdout", - microsandbox::sandbox::LogSource::Stderr => "stderr", - microsandbox::sandbox::LogSource::Output => "output", - microsandbox::sandbox::LogSource::System => "system", + microsandbox::logs::LogSource::Stdout => "stdout", + microsandbox::logs::LogSource::Stderr => "stderr", + microsandbox::logs::LogSource::Output => "output", + microsandbox::logs::LogSource::System => "system", }; PyLogEntry { timestamp_ms: entry.timestamp.timestamp_millis() as f64, source: source.to_string(), session_id: entry.session_id, + cursor: entry.cursor.to_string(), data: entry.data.to_vec(), } } +/// Build a [`microsandbox::logs::LogStreamOptions`] from the keyword +/// args the Python method accepts. `since_ms` and `from_cursor` are +/// mutually exclusive. +pub fn parse_log_stream_options( + sources: Option>, + since_ms: Option, + from_cursor: Option, + until_ms: Option, + follow: bool, +) -> PyResult { + use microsandbox::logs::{LogCursor, LogCursorParseError, LogSource, LogStreamStart}; + + if since_ms.is_some() && from_cursor.is_some() { + return Err(pyo3::exceptions::PyValueError::new_err( + "since_ms and from_cursor are mutually exclusive", + )); + } + let start = if let Some(ms) = since_ms { + let ts = ms_to_datetime(ms).ok_or_else(|| { + pyo3::exceptions::PyValueError::new_err(format!("invalid since_ms value {ms}")) + })?; + LogStreamStart::Since(ts) + } else if let Some(token) = from_cursor.as_deref() { + let cursor: LogCursor = token.parse().map_err(|e: LogCursorParseError| { + pyo3::exceptions::PyValueError::new_err(format!("invalid from_cursor: {e}")) + })?; + LogStreamStart::From(cursor) + } else { + LogStreamStart::Beginning + }; + let mut engine_sources = Vec::new(); + if let Some(src) = sources { + for s in src { + match s.as_str() { + "stdout" => engine_sources.push(LogSource::Stdout), + "stderr" => engine_sources.push(LogSource::Stderr), + "output" => engine_sources.push(LogSource::Output), + "system" => engine_sources.push(LogSource::System), + "all" => { + engine_sources = vec![ + LogSource::Stdout, + LogSource::Stderr, + LogSource::Output, + LogSource::System, + ]; + } + other => { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "unknown log source {other:?}" + ))); + } + } + } + } + Ok(microsandbox::logs::LogStreamOptions { + sources: engine_sources, + start, + until: until_ms.and_then(ms_to_datetime), + follow, + }) +} + +/// Open a log stream against the named sandbox and wrap it as a +/// `PyLogStream`. Shared between `Sandbox.log_stream` and +/// `SandboxHandle.log_stream`. +pub async fn open_log_stream( + name: &str, + opts: microsandbox::logs::LogStreamOptions, +) -> PyResult { + let stream = microsandbox::logs::log_stream(name, &opts) + .await + .map_err(to_py_err)?; + Ok(PyLogStream::new(stream)) +} + /// Read captured logs for a sandbox by name. Filters are encoded as a /// `LogOptions` Rust struct on the caller's side. pub fn read_logs_blocking( @@ -79,7 +213,7 @@ pub fn read_logs_blocking( until_ms: Option, sources: Option>, ) -> PyResult> { - use microsandbox::sandbox::{LogOptions, LogSource}; + use microsandbox::logs::{LogOptions, LogSource}; let mut opts = LogOptions { tail, @@ -111,7 +245,9 @@ pub fn read_logs_blocking( } } - let entries = microsandbox::sandbox::logs::read_logs(name, &opts).map_err(to_py_err)?; + let entries = tokio::runtime::Handle::current() + .block_on(microsandbox::logs::read_logs(name, &opts)) + .map_err(to_py_err)?; Ok(entries.into_iter().map(convert_entry).collect()) } diff --git a/sdk/python/src/sandbox.rs b/sdk/python/src/sandbox.rs index c4cc3115e..ef5516479 100644 --- a/sdk/python/src/sandbox.rs +++ b/sdk/python/src/sandbox.rs @@ -418,6 +418,43 @@ impl PySandbox { }) } + /// Stream captured output as it appears, with optional follow. + /// + /// Returns an async iterator of `LogEntry`. Each entry carries + /// an opaque `cursor` string suitable for passing back via + /// `from_cursor` on a later call to resume exactly after that + /// entry. `since_ms` and `from_cursor` are mutually exclusive. + #[pyo3(signature = ( + sources = None, + since_ms = None, + from_cursor = None, + until_ms = None, + follow = false, + ))] + fn log_stream<'py>( + &self, + py: Python<'py>, + sources: Option>, + since_ms: Option, + from_cursor: Option, + until_ms: Option, + follow: bool, + ) -> PyResult> { + let inner = self.inner.clone(); + let opts = crate::logs::parse_log_stream_options( + sources, + since_ms, + from_cursor, + until_ms, + follow, + )?; + pyo3_async_runtimes::tokio::future_into_py(py, async move { + let sandbox = Self::clone_sandbox(&inner).await?; + let name = sandbox.name().to_string(); + crate::logs::open_log_stream(&name, opts).await + }) + } + //---------------------------------------------------------------------------------------------- // Lifecycle //---------------------------------------------------------------------------------------------- diff --git a/sdk/python/src/sandbox_handle.rs b/sdk/python/src/sandbox_handle.rs index b0fef21b9..f4adedb03 100644 --- a/sdk/python/src/sandbox_handle.rs +++ b/sdk/python/src/sandbox_handle.rs @@ -118,6 +118,44 @@ impl PySandboxHandle { }) } + /// Stream captured output as it appears, with optional follow. + /// + /// Works without starting the sandbox; with `follow=True`, the + /// stream picks up new entries the moment they land in + /// `exec.log`. `since_ms` and `from_cursor` are mutually + /// exclusive. + #[pyo3(signature = ( + sources = None, + since_ms = None, + from_cursor = None, + until_ms = None, + follow = false, + ))] + fn log_stream<'py>( + &self, + py: Python<'py>, + sources: Option>, + since_ms: Option, + from_cursor: Option, + until_ms: Option, + follow: bool, + ) -> PyResult> { + let inner = self.inner.clone(); + let opts = crate::logs::parse_log_stream_options( + sources, + since_ms, + from_cursor, + until_ms, + follow, + )?; + pyo3_async_runtimes::tokio::future_into_py(py, async move { + let guard = inner.lock().await; + let name = guard.name().to_string(); + drop(guard); + crate::logs::open_log_stream(&name, opts).await + }) + } + /// Start the sandbox. #[pyo3(signature = (*, detached = false))] fn start<'py>(&self, py: Python<'py>, detached: bool) -> PyResult> {