diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index ff8bf9826..bdae1aff8 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -302,3 +302,65 @@ jobs: export PATH="$HOME/.microsandbox/bin:$PATH" export LD_LIBRARY_PATH="${{ github.workspace }}/build:$HOME/.microsandbox/lib" npm test + + # --------------------------------------------------------------------------- + # Boot timing regression gate (requires KVM) + # --------------------------------------------------------------------------- + boot-timing: + name: Boot Timing + needs: check + runs-on: self-hosted-ubuntu-2404-x64 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + + - name: Install build deps + run: sudo apt-get update && sudo apt-get install -y musl-tools libcap-ng-dev gcc make flex bison libelf-dev bc python3-pyelftools libdbus-1-dev + + - name: Build agentd + run: | + rustup target add x86_64-unknown-linux-musl + cargo build --release --manifest-path crates/agentd/Cargo.toml --target x86_64-unknown-linux-musl + mkdir -p build + cp crates/agentd/target/x86_64-unknown-linux-musl/release/agentd build/agentd + + - name: Build libkrunfw + run: | + cd vendor/libkrunfw + make -j$(nproc) + cd ../.. + mkdir -p build + cp vendor/libkrunfw/libkrunfw.so.${{ env.LIBKRUNFW_VERSION }} build/ + cd build + ln -sf libkrunfw.so.${{ env.LIBKRUNFW_VERSION }} libkrunfw.so.${{ env.LIBKRUNFW_ABI }} + ln -sf libkrunfw.so.${{ env.LIBKRUNFW_ABI }} libkrunfw.so + + - name: Build msb + run: | + cargo build --release --no-default-features --features net -p microsandbox-cli + mkdir -p build + cp target/release/msb build/msb + + - name: Build boot timing benchmark + run: cargo build --release -p boot-timing-ci + + - name: Check boot timing thresholds + run: | + export MSB_PATH="${{ github.workspace }}/build/msb" + export LD_LIBRARY_PATH="${{ github.workspace }}/build:$HOME/.microsandbox/lib" + python3 scripts/ci/check_boot_timings.py \ + --binary target/release/boot-timing-ci \ + --config scripts/ci/boot-timing-thresholds.json \ + --output boot-timing-results.json + + - name: Upload boot timing results + if: always() + uses: actions/upload-artifact@v4 + with: + name: boot-timing-results + path: boot-timing-results.json diff --git a/Cargo.lock b/Cargo.lock index 1374e771d..3db8d26d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -501,6 +501,15 @@ dependencies = [ "piper", ] +[[package]] +name = "boot-timing-ci" +version = "0.1.0" +dependencies = [ + "microsandbox", + "serde_json", + "tokio", +] + [[package]] name = "bumpalo" version = "3.20.2" diff --git a/Cargo.toml b/Cargo.toml index ec5f0a5d9..c7fa7e9f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "crates/protocol", "crates/runtime", "crates/utils", + "examples/rust/boot-timing-ci", "examples/rust/fs-read-stream", "examples/rust/shell-attach", "examples/rust/metrics-stream", diff --git a/crates/filesystem/build.rs b/crates/filesystem/build.rs index 0cdf1a9b5..ca11ef24f 100644 --- a/crates/filesystem/build.rs +++ b/crates/filesystem/build.rs @@ -29,7 +29,7 @@ fn build_agentd(workspace_root: &Path, out_dir: &Path) { return; } - // In CI, prefer the locally-built agentd from workspace build/. + // In CI, prefer the locally-built agentd from workspace build/. if std::env::var_os("CI").is_some() { let local = workspace_root.join("build").join(AGENTD_BINARY); if local.is_file() { @@ -37,7 +37,6 @@ fn build_agentd(workspace_root: &Path, out_dir: &Path) { return; } } - let _ = workspace_root; let arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap(); let url = agentd_download_url(PREBUILT_VERSION, &arch); diff --git a/crates/microsandbox/build.rs b/crates/microsandbox/build.rs index bd96e610b..88f752c45 100644 --- a/crates/microsandbox/build.rs +++ b/crates/microsandbox/build.rs @@ -60,6 +60,7 @@ fn main() { return; } + let url = bundle_url(); println!( "cargo:warning=downloading microsandbox runtime dependencies (v{PREBUILT_VERSION})..." @@ -136,7 +137,6 @@ fn install_ci_local_bundle( if std::env::var_os("CI").is_none() { return Ok(false); } - let Some(build_dir) = workspace_build_dir() else { return Ok(false); }; @@ -161,7 +161,7 @@ fn install_ci_local_bundle( } create_symlinks(lib_dir, libkrunfw_name); - println!("cargo:warning=installed microsandbox runtime dependencies from local CI build/"); + println!("cargo:warning=installed microsandbox runtime dependencies from local build/"); Ok(true) } diff --git a/crates/microsandbox/lib/lib.rs b/crates/microsandbox/lib/lib.rs index ca5fb912d..4b4a2af7f 100644 --- a/crates/microsandbox/lib/lib.rs +++ b/crates/microsandbox/lib/lib.rs @@ -26,5 +26,5 @@ pub use microsandbox_utils::size; #[cfg(feature = "net")] pub use sandbox::NetworkPolicy; pub use sandbox::exec::{ExecEvent, ExecHandle}; -pub use sandbox::{ExecOutput, Sandbox, SandboxConfig}; +pub use sandbox::{BootTimings, ExecOutput, Sandbox, SandboxConfig}; pub use volume::Volume; diff --git a/crates/microsandbox/lib/sandbox/mod.rs b/crates/microsandbox/lib/sandbox/mod.rs index 1b20ddd93..aa915cdb2 100644 --- a/crates/microsandbox/lib/sandbox/mod.rs +++ b/crates/microsandbox/lib/sandbox/mod.rs @@ -80,6 +80,33 @@ pub struct Sandbox { client: Arc, } +/// Boot timing data reported by the guest in `core.ready`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct BootTimings { + /// Guest `CLOCK_BOOTTIME` at the start of `agentd::main()`. + /// + /// In practice this tracks the time from VM entry until the guest reaches + /// userspace and starts `agentd`. + pub enter_to_boot_ns: u64, + /// Nanoseconds spent in synchronous guest init before the async agent loop. + pub boot_to_init_ns: u64, + /// Nanoseconds from `agentd::main()` start until `core.ready` was sent. + pub boot_to_ready_ns: u64, + /// Total guest time from kernel boot until `core.ready`. + pub enter_to_ready_ns: u64, +} + +impl BootTimings { + fn from_ready(ready: µsandbox_protocol::core::Ready) -> Self { + Self { + enter_to_boot_ns: ready.boot_time_ns, + boot_to_init_ns: ready.init_time_ns, + boot_to_ready_ns: ready.ready_time_ns.saturating_sub(ready.boot_time_ns), + enter_to_ready_ns: ready.ready_time_ns, + } + } +} + //-------------------------------------------------------------------------------------------------- // Methods: Static //-------------------------------------------------------------------------------------------------- @@ -422,6 +449,11 @@ impl Sandbox { fs::SandboxFs::new(&self.client) } + /// Return the cached guest boot timing snapshot captured during startup. + pub fn boot_timings(&self) -> BootTimings { + BootTimings::from_ready(self.client.ready()) + } + /// Stop the sandbox gracefully by sending `core.shutdown` to agentd. pub async fn stop(&self) -> MicrosandboxResult<()> { tracing::debug!(sandbox = %self.config.name, "stop: sending shutdown"); @@ -1706,11 +1738,12 @@ mod tests { image as image_entity, run as run_entity, sandbox_image as sandbox_image_entity, }; use microsandbox_migration::{Migrator, MigratorTrait}; + use microsandbox_protocol::core::Ready; use sea_orm::{ColumnTrait, ConnectOptions, Database, EntityTrait, QueryFilter, Set}; use tempfile::tempdir; use super::{ - RootfsSource, SandboxConfig, SandboxStatus, insert_sandbox_record, + BootTimings, RootfsSource, SandboxConfig, SandboxStatus, insert_sandbox_record, persist_oci_manifest_pin, prepare_create_target, reconcile_sandbox_runtime_state, remove_dir_if_exists, validate_rootfs_source, }; @@ -1800,6 +1833,20 @@ mod tests { ); } + #[test] + fn test_boot_timings_from_ready() { + let timings = BootTimings::from_ready(&Ready { + boot_time_ns: 70_000_000, + init_time_ns: 15_000_000, + ready_time_ns: 92_000_000, + }); + + assert_eq!(timings.enter_to_boot_ns, 70_000_000); + assert_eq!(timings.boot_to_init_ns, 15_000_000); + assert_eq!(timings.boot_to_ready_ns, 22_000_000); + assert_eq!(timings.enter_to_ready_ns, 92_000_000); + } + #[test] fn test_validate_rootfs_source_missing_bind_path() { let path = unique_temp_path("missing"); diff --git a/examples/rust/boot-timing-ci/Cargo.toml b/examples/rust/boot-timing-ci/Cargo.toml new file mode 100644 index 000000000..6ed28ab0b --- /dev/null +++ b/examples/rust/boot-timing-ci/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "boot-timing-ci" +version = "0.1.0" +edition = "2024" +publish = false + +[[bin]] +name = "boot-timing-ci" +path = "bin/main.rs" + +[dependencies] +microsandbox = { path = "../../../crates/microsandbox" } +serde_json = "1.0" +tokio = { version = "1.42", features = ["full"] } diff --git a/examples/rust/boot-timing-ci/bin/main.rs b/examples/rust/boot-timing-ci/bin/main.rs new file mode 100644 index 000000000..72d275d7f --- /dev/null +++ b/examples/rust/boot-timing-ci/bin/main.rs @@ -0,0 +1,102 @@ +use microsandbox::Sandbox; +use serde_json::json; +use std::path::{Path, PathBuf}; + +const SANDBOX_NAME: &str = "boot-timing-ci"; +const ROOTFS_ENV_VAR: &str = "MICROSANDBOX_BOOT_TIMING_ROOTFS"; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let sandbox = Sandbox::builder(SANDBOX_NAME) + .image(rootfs_path()) + .cpus(1) + .memory(256) + .quiet_logs() + .replace() + .create() + .await?; + + let timings = sandbox.boot_timings(); + sandbox.stop_and_wait().await?; + sandbox.remove_persisted().await?; + + println!( + "{}", + serde_json::to_string(&json!({ + "enter_to_boot_ns": timings.enter_to_boot_ns, + "boot_to_init_ns": timings.boot_to_init_ns, + "boot_to_ready_ns": timings.boot_to_ready_ns, + "enter_to_ready_ns": timings.enter_to_ready_ns, + "enter_to_boot_ms": ns_to_ms(timings.enter_to_boot_ns), + "boot_to_init_ms": ns_to_ms(timings.boot_to_init_ns), + "boot_to_ready_ms": ns_to_ms(timings.boot_to_ready_ns), + "enter_to_ready_ms": ns_to_ms(timings.enter_to_ready_ns), + }))? + ); + + Ok(()) +} + +fn ns_to_ms(value: u64) -> f64 { + value as f64 / 1_000_000.0 +} + +fn rootfs_path() -> String { + let arch = std::env::consts::ARCH; + let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let relative_rootfs = Path::new("examples/rust/root-bind/rootfs-alpine").join(arch); + + if let Some(path) = std::env::var_os(ROOTFS_ENV_VAR) { + let candidate = PathBuf::from(path); + return resolve_rootfs_candidate(candidate) + .unwrap_or_else(|reason| { + panic!( + "{ROOTFS_ENV_VAR} is set but unusable: {reason}. Set it to a populated {arch} rootfs directory." + ) + }) + .display() + .to_string(); + } + + let mut candidates = Vec::new(); + candidates.push(manifest_dir.join("../root-bind/rootfs-alpine").join(arch)); + + for ancestor in manifest_dir.ancestors() { + candidates.push(ancestor.join(&relative_rootfs)); + if let Some(parent) = ancestor.parent() { + candidates.push(parent.join("microsandbox").join(&relative_rootfs)); + } + } + + for candidate in candidates { + if resolve_rootfs_candidate(candidate.clone()).is_ok() { + return candidate.display().to_string(); + } + } + + panic!( + "unable to find a populated {arch} rootfs for {SANDBOX_NAME}. \ +expected the root-bind submodule at ../root-bind/rootfs-alpine/{arch}, \ +but it is missing in this checkout. Run `git submodule update --init --recursive` \ +or set {ROOTFS_ENV_VAR} to a populated {arch} rootfs directory." + ); +} + +fn resolve_rootfs_candidate(candidate: PathBuf) -> Result { + if !candidate.exists() { + return Err(format!("path does not exist: {}", candidate.display())); + } + + if !candidate.is_dir() { + return Err(format!("path is not a directory: {}", candidate.display())); + } + + let mut entries = candidate + .read_dir() + .map_err(|err| format!("failed to read {}: {err}", candidate.display()))?; + if entries.next().is_none() { + return Err(format!("path is empty: {}", candidate.display())); + } + + Ok(candidate) +} diff --git a/scripts/ci/boot-timing-thresholds.json b/scripts/ci/boot-timing-thresholds.json new file mode 100644 index 000000000..ee0e8225d --- /dev/null +++ b/scripts/ci/boot-timing-thresholds.json @@ -0,0 +1,16 @@ +{ + "samples": 7, + "warmups": 1, + "baseline_ms": { + "enter_to_boot": 70.0, + "boot_to_init": 16.0 + }, + "max_regression_ms": { + "enter_to_boot": 20.0, + "boot_to_init": 8.0 + }, + "max_threshold_ms": { + "enter_to_boot": 100.0, + "boot_to_init": 30.0 + } +} diff --git a/scripts/ci/check_boot_timings.py b/scripts/ci/check_boot_timings.py new file mode 100644 index 000000000..9366e08af --- /dev/null +++ b/scripts/ci/check_boot_timings.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import statistics +import subprocess +import sys +from pathlib import Path + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--binary", required=True) + parser.add_argument("--config", required=True) + parser.add_argument("--output", required=True) + return parser.parse_args() + + +def run_sample(binary: str) -> dict: + proc = subprocess.run( + [binary], + check=True, + capture_output=True, + text=True, + ) + stdout = proc.stdout.strip() + if not stdout: + raise RuntimeError("benchmark binary produced no stdout") + return json.loads(stdout) + + +def metric_summary(samples: list[dict], key: str) -> dict: + values = [sample[key] for sample in samples] + return { + "samples": values, + "median": statistics.median(values), + "minimum": min(values), + "maximum": max(values), + } + + +def render_summary(results: dict, failures: list[str]) -> str: + labels = { + "enter_to_boot": "vm enter -> boot", + "boot_to_init": "boot -> init", + } + lines = [ + "## Boot timing gate", + "", + "| Metric | Median (ms) | Min | Max | Baseline | Max regression | Max threshold |", + "| --- | ---: | ---: | ---: | ---: | ---: | ---: |", + ] + + for metric in ("enter_to_boot", "boot_to_init"): + summary = results["metrics"][metric] + lines.append( + "| {metric} | {median:.3f} | {minimum:.3f} | {maximum:.3f} | {baseline:.3f} | {regression:.3f} | {threshold:.3f} |".format( + metric=labels[metric], + median=summary["median_ms"], + minimum=summary["min_ms"], + maximum=summary["max_ms"], + baseline=summary["baseline_ms"], + regression=summary["max_regression_ms"], + threshold=summary["max_threshold_ms"], + ) + ) + + lines.append("") + if failures: + lines.append("Result: failed") + lines.extend(f"- {failure}" for failure in failures) + else: + lines.append("Result: passed") + + return "\n".join(lines) + "\n" + + +def main() -> int: + args = parse_args() + config = json.loads(Path(args.config).read_text()) + + for _ in range(int(config["warmups"])): + run_sample(args.binary) + + raw_samples = [run_sample(args.binary) for _ in range(int(config["samples"]))] + + enter_to_boot = metric_summary(raw_samples, "enter_to_boot_ms") + boot_to_init = metric_summary(raw_samples, "boot_to_init_ms") + + results = { + "raw_samples": raw_samples, + "metrics": { + "enter_to_boot": { + "median_ms": enter_to_boot["median"], + "min_ms": enter_to_boot["minimum"], + "max_ms": enter_to_boot["maximum"], + "baseline_ms": config["baseline_ms"]["enter_to_boot"], + "max_regression_ms": config["max_regression_ms"]["enter_to_boot"], + "max_threshold_ms": config["max_threshold_ms"]["enter_to_boot"], + }, + "boot_to_init": { + "median_ms": boot_to_init["median"], + "min_ms": boot_to_init["minimum"], + "max_ms": boot_to_init["maximum"], + "baseline_ms": config["baseline_ms"]["boot_to_init"], + "max_regression_ms": config["max_regression_ms"]["boot_to_init"], + "max_threshold_ms": config["max_threshold_ms"]["boot_to_init"], + }, + }, + } + + failures = [] + for metric, summary in results["metrics"].items(): + if summary["median_ms"] > summary["max_threshold_ms"]: + failures.append( + f"{metric} median {summary['median_ms']:.3f}ms exceeded max threshold {summary['max_threshold_ms']:.3f}ms" + ) + + regression = summary["median_ms"] - summary["baseline_ms"] + if regression > summary["max_regression_ms"]: + failures.append( + f"{metric} median regressed by {regression:.3f}ms (baseline {summary['baseline_ms']:.3f}ms, allowed {summary['max_regression_ms']:.3f}ms)" + ) + + Path(args.output).write_text(json.dumps(results, indent=2) + "\n") + + summary = render_summary(results, failures) + print(summary, end="") + + step_summary = os.environ.get("GITHUB_STEP_SUMMARY") + if step_summary: + with open(step_summary, "a", encoding="utf-8") as handle: + handle.write(summary) + + return 1 if failures else 0 + + +if __name__ == "__main__": + sys.exit(main())