From 35f5db7701dc4429effa374930b627289f60abbc Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 11 Feb 2026 05:07:48 +0100 Subject: [PATCH] return memory.peak after calling sandboxed commands --- src/cmd/mod.rs | 25 ++++++- src/cmd/sandbox.rs | 157 +++++++++++++++++++++++++++++++++++------ src/crates/git.rs | 2 +- tests/buildtest/mod.rs | 24 +++++++ 4 files changed, 182 insertions(+), 26 deletions(-) diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index 2d4a4a2..c411e37 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -375,9 +375,9 @@ impl<'w> Command<'w, '_> { /// Run the prepared command and return an error if it fails (for example with a non-zero exit /// code or a timeout). - pub fn run(self) -> Result<(), CommandError> { - self.run_inner(false)?; - Ok(()) + pub fn run(self) -> Result { + let output = self.run_inner(false)?; + Ok(output.statistics) } /// Run the prepared command and return its output if it succeedes. If it fails (for example @@ -551,15 +551,26 @@ impl From for ProcessOutput { ProcessOutput { stdout: orig.stdout, stderr: orig.stderr, + statistics: ProcessStatistics::default(), } } } +/// collected statistics about the process execution. +#[derive(Default)] +pub struct ProcessStatistics { + /// peak memory usage in bytes. + /// This is populated for sandboxed commands on systems + /// with cgroups v1/v2. + pub memory_peak: Option, +} + /// Output of a [`Command`](struct.Command.html) when it was executed with the /// [`run_capture`](struct.Command.html#method.run_capture) method. pub struct ProcessOutput { stdout: Vec, stderr: Vec, + statistics: ProcessStatistics, } impl ProcessOutput { @@ -572,6 +583,14 @@ impl ProcessOutput { pub fn stderr_lines(&self) -> &[String] { &self.stderr } + + /// Return the peak memory usage in bytes of the sandbox container, if available. + /// + /// This is populated for sandboxed commands on systems with cgroups v2. Returns `None` for + /// non-sandboxed commands or when the metric could not be read. + pub fn memory_peak_bytes(&self) -> Option { + self.statistics.memory_peak + } } enum OutputKind { diff --git a/src/cmd/sandbox.rs b/src/cmd/sandbox.rs index 6f4d3c6..f293497 100644 --- a/src/cmd/sandbox.rs +++ b/src/cmd/sandbox.rs @@ -1,5 +1,5 @@ use crate::Workspace; -use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput}; +use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput, ProcessStatistics}; use log::{error, info}; use serde::Deserialize; use std::error::Error; @@ -226,6 +226,7 @@ impl SandboxBuilder { fn create(self, workspace: &Workspace) -> Result, CommandError> { let mut args: Vec = vec!["create".into()]; + // Mounts are container-level config, always on `docker create` for mount in &self.mounts { std::fs::create_dir_all(&mount.host_path)?; @@ -240,16 +241,7 @@ impl SandboxBuilder { } } - for (var, value) in &self.env { - args.push("-e".into()); - args.push(format! {"{var}={value}"}) - } - - if let Some(workdir) = self.workdir { - args.push("-w".into()); - args.push(workdir); - } - + // Resource limits and networking are container-level config if let Some(limit) = self.memory_limit { args.push("-m".into()); args.push(limit.to_string()); @@ -260,11 +252,6 @@ impl SandboxBuilder { args.push(limit.to_string()); } - if let Some(user) = self.user { - args.push("--user".into()); - args.push(user); - } - if !self.enable_networking { args.push("--network".into()); args.push("none".into()); @@ -276,9 +263,10 @@ impl SandboxBuilder { args.push(workspace.sandbox_image().name.clone()); - for arg in self.cmd { - args.push(arg); - } + // Use an idle command; the real command runs via `docker exec` so the container stays + // alive after the command finishes, allowing us to read cgroup metrics. + args.push("sleep".into()); + args.push("infinity".into()); let out = Command::new(workspace, "docker") .args(&args) @@ -287,6 +275,10 @@ impl SandboxBuilder { Ok(Container { id: out.stdout_lines()[0].clone(), workspace, + cmd: self.cmd, + env: self.env, + workdir: self.workdir, + user: self.user, }) } @@ -345,6 +337,11 @@ struct Container<'w> { // Docker container ID id: String, workspace: &'w Workspace, + // Command-level config for `docker exec` (not baked into `docker create`) + cmd: Vec, + env: Vec<(String, String)>, + workdir: Option, + user: Option, } impl fmt::Display for Container<'_> { @@ -367,6 +364,82 @@ impl Container<'_> { Ok(data.pop().unwrap()) } + /// Start the container in detached mode (without `-a`). + fn start(&self) -> Result<(), CommandError> { + Command::new(self.workspace, "docker") + .args(&["start", &self.id]) + .log_output(false) + .run() + .map(|_| ()) + } + + /// Stop a running container. Uses `-t 1` to give `sleep infinity` a short grace period. + fn stop(&self) -> Result<(), CommandError> { + Command::new(self.workspace, "docker") + .args(&["stop", "-t", "1", &self.id]) + .log_output(false) + .run() + .map(|_| ()) + } + + /// Helper to `docker exec cat ` and return stdout lines on success. + fn exec_cat_file(&self, path: &str) -> Option> { + Command::new(self.workspace, "docker") + .args(&["exec", &self.id, "cat", path]) + .log_output(false) + .log_command(false) + .run_capture() + .ok() + .map(|o| o.stdout_lines().to_vec()) + } + + /// Best-effort read of peak memory usage from the still-running container. + /// Tries cgroups v2 first, then falls back to cgroups v1. + fn read_memory_peak(&self) -> Option { + let paths = [ + "/sys/fs/cgroup/memory.peak", // v2 + "/sys/fs/cgroup/memory/memory.max_usage_in_bytes", // v1 + ]; + for path in paths { + if let Some(val) = self + .exec_cat_file(path) + .and_then(|lines| lines.first()?.trim().parse::().ok()) + { + return Some(val); + } + } + None + } + + /// Check if any OOM kills occurred in the container's cgroup. + /// + /// With the `docker exec` model, the OOM killer may only kill the exec'd process + /// while `sleep infinity` (PID 1) survives. In that case `docker inspect` won't + /// report `OOMKilled`, so we check the cgroup events directly. + /// Tries cgroups v2 first, then falls back to cgroups v1. + fn check_cgroup_oom(&self) -> bool { + // Both v1 and v2 expose `oom_kill ` — just in different files. + let paths = [ + "/sys/fs/cgroup/memory.events", // v2 + "/sys/fs/cgroup/memory/memory.oom_control", // v1 + ]; + for path in paths { + if let Some(lines) = self.exec_cat_file(path) { + let found = lines.iter().any(|line| { + line.strip_prefix("oom_kill ") + .and_then(|rest| rest.trim().parse::().ok()) + .is_some_and(|count| count > 0) + }); + if found { + return true; + } + // File existed but no OOM — don't try the other version + return false; + } + } + false + } + #[allow(clippy::type_complexity)] fn run( &self, @@ -377,8 +450,32 @@ impl Container<'_> { log_command: bool, capture: bool, ) -> Result { + // Start the container in detached mode (runs `sleep infinity`) + self.start()?; + + // Build the `docker exec` command with env/workdir/user from the sandbox config + let mut args: Vec = vec!["exec".into()]; + + for (var, value) in &self.env { + args.push("-e".into()); + args.push(format!("{var}={value}")); + } + + if let Some(ref workdir) = self.workdir { + args.push("-w".into()); + args.push(workdir.clone()); + } + + if let Some(ref user) = self.user { + args.push("--user".into()); + args.push(user.clone()); + } + + args.push(self.id.clone()); + args.extend(self.cmd.iter().cloned()); + let mut cmd = Command::new(self.workspace, "docker") - .args(&["start", "-a", &self.id]) + .args(&args) .timeout(timeout) .log_output(log_output) .log_command(log_command) @@ -389,16 +486,31 @@ impl Container<'_> { } let res = cmd.run_inner(capture); + + // Read peak memory usage while the container is still running (best-effort) + let memory_peak = self.read_memory_peak(); + + // Check OOM via cgroup events (catches cases where only the exec'd process + // was killed, leaving the container's init process alive) + let cgroup_oom = self.check_cgroup_oom(); + + // Explicitly stop the container now that we're done reading metrics. + // The scopeguard will still call `docker rm -f` for final cleanup. + let _ = self.stop(); + let details = self.inspect()?; // Return a different error if the container was killed due to an OOM - if details.state.oom_killed { + if details.state.oom_killed || cgroup_oom { Err(match res { Ok(_) | Err(CommandError::ExecutionFailed { .. }) => CommandError::SandboxOOM, Err(err) => err, }) } else { - res + res.map(|mut output| { + output.statistics = ProcessStatistics { memory_peak }; + output + }) } } @@ -406,6 +518,7 @@ impl Container<'_> { Command::new(self.workspace, "docker") .args(&["rm", "-f", &self.id]) .run() + .map(|_| ()) } } diff --git a/src/crates/git.rs b/src/crates/git.rs index 92fe7be..b24e991 100644 --- a/src/crates/git.rs +++ b/src/crates/git.rs @@ -101,7 +101,7 @@ impl CrateTrait for GitRepo { if private_repository && res.is_err() { Err(PrepareError::PrivateGitRepository.into()) } else { - Ok(res?) + Ok(res.map(|_| ())?) } } diff --git a/tests/buildtest/mod.rs b/tests/buildtest/mod.rs index 08120e5..51bcfde 100644 --- a/tests/buildtest/mod.rs +++ b/tests/buildtest/mod.rs @@ -163,6 +163,30 @@ fn test_process_lines() { }); } +#[test] +#[cfg(not(windows))] +fn test_memory_peak() { + runner::run("hello-world", |run| { + run.run( + SandboxBuilder::new() + .enable_networking(false) + .memory_limit(Some(512 * 1024 * 1024)), + |build| { + let output = build.cargo().args(&["run"]).run_capture()?; + if let Some(peak) = output.memory_peak_bytes() { + assert!(peak > 0, "memory peak should be positive"); + assert!( + peak < 512 * 1024 * 1024, + "memory peak should be below the limit" + ); + } + Ok(()) + }, + )?; + Ok(()) + }); +} + #[test] #[cfg(not(windows))] fn test_sandbox_oom() {