From 5f6a6b329c33e6b3ff089e7cc5c42c31197b0bb0 Mon Sep 17 00:00:00 2001 From: Sagi Shnaidman Date: Tue, 9 Jun 2026 11:31:16 +0300 Subject: [PATCH 1/3] feat(podman): make container health check interval configurable The Podman driver hardcoded a 3-second health check interval, which spawns a conmon subprocess on every tick. On systems running multiple sandboxes this creates sustained process churn and unnecessary CPU overhead. Add a `health_check_interval_secs` field to `PodmanComputeConfig` (default: 10s) and wire it into the container health check spec. Operators can tune it further via `[openshell.drivers.podman]` in gateway.toml. Signed-off-by: Sagi Shnaidman --- crates/openshell-driver-podman/src/config.rs | 20 +++++++++++++++++++ .../openshell-driver-podman/src/container.rs | 15 +++++++++++++- crates/openshell-driver-podman/src/main.rs | 2 +- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index e21c66176..5220eb986 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -126,8 +126,17 @@ pub struct PodmanComputeConfig { /// `template.driver_config`. #[serde(default)] pub enable_bind_mounts: bool, + /// Health check interval in seconds for sandbox containers. + /// + /// Podman runs the health check command at this interval to determine + /// container readiness. Lower values detect readiness faster but + /// increase process churn (each check spawns a conmon subprocess). + /// Defaults to [`DEFAULT_HEALTH_CHECK_INTERVAL_SECS`]. + pub health_check_interval_secs: u64, } +pub const DEFAULT_HEALTH_CHECK_INTERVAL_SECS: u64 = 10; + impl PodmanComputeConfig { /// Returns `true` when all three TLS paths are configured. #[must_use] @@ -251,6 +260,7 @@ impl Default for PodmanComputeConfig { guest_tls_key: None, sandbox_pids_limit: DEFAULT_SANDBOX_PIDS_LIMIT, enable_bind_mounts: false, + health_check_interval_secs: DEFAULT_HEALTH_CHECK_INTERVAL_SECS, } } } @@ -273,6 +283,10 @@ impl std::fmt::Debug for PodmanComputeConfig { .field("guest_tls_key", &self.guest_tls_key) .field("sandbox_pids_limit", &self.sandbox_pids_limit) .field("enable_bind_mounts", &self.enable_bind_mounts) + .field( + "health_check_interval_secs", + &self.health_check_interval_secs, + ) .finish() } } @@ -314,6 +328,12 @@ mod tests { }); } + #[test] + fn default_config_sets_health_check_interval() { + let cfg = PodmanComputeConfig::default(); + assert_eq!(cfg.health_check_interval_secs, DEFAULT_HEALTH_CHECK_INTERVAL_SECS); + } + #[test] fn default_config_sets_driver_owned_pids_limit() { let cfg = PodmanComputeConfig::default(); diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index 0dd0c42c2..13516a607 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -880,7 +880,7 @@ pub fn try_build_container_spec_with_token( openshell_core::config::DEFAULT_SSH_PORT ), ], - interval: 3_000_000_000, + interval: config.health_check_interval_secs * 1_000_000_000, timeout: 2_000_000_000, retries: 10, start_period: 5_000_000_000, @@ -1328,6 +1328,19 @@ mod tests { ); } + #[test] + fn container_spec_healthcheck_interval_from_config() { + let sandbox = test_sandbox("test-id", "test-name"); + let mut config = test_config(); + config.health_check_interval_secs = 30; + let spec = build_container_spec(&sandbox, &config); + + let interval = spec["healthconfig"]["Interval"] + .as_u64() + .expect("healthcheck interval should be a u64"); + assert_eq!(interval, 30_000_000_000); + } + #[test] fn container_spec_required_vars_cannot_be_overridden() { use openshell_core::proto::compute::v1::{DriverSandboxSpec, DriverSandboxTemplate}; diff --git a/crates/openshell-driver-podman/src/main.rs b/crates/openshell-driver-podman/src/main.rs index 2d8d4055b..e6ba7b9ff 100644 --- a/crates/openshell-driver-podman/src/main.rs +++ b/crates/openshell-driver-podman/src/main.rs @@ -135,7 +135,7 @@ async fn main() -> Result<()> { guest_tls_cert: args.podman_tls_cert, guest_tls_key: args.podman_tls_key, sandbox_pids_limit: args.sandbox_pids_limit, - enable_bind_mounts: false, + ..PodmanComputeConfig::default() }) .await .into_diagnostic()?; From fe7d584036742c279caf6e2141d5d3fefd8cec3b Mon Sep 17 00:00:00 2001 From: Sagi Shnaidman Date: Tue, 9 Jun 2026 11:59:41 +0300 Subject: [PATCH 2/3] docs(podman): document health_check_interval_secs config field Signed-off-by: Sagi Shnaidman --- crates/openshell-driver-podman/src/config.rs | 5 ++++- docs/reference/gateway-config.mdx | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index 5220eb986..c2333719e 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -331,7 +331,10 @@ mod tests { #[test] fn default_config_sets_health_check_interval() { let cfg = PodmanComputeConfig::default(); - assert_eq!(cfg.health_check_interval_secs, DEFAULT_HEALTH_CHECK_INTERVAL_SECS); + assert_eq!( + cfg.health_check_interval_secs, + DEFAULT_HEALTH_CHECK_INTERVAL_SECS + ); } #[test] diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index 1ddf70552..a54472dec 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -262,6 +262,10 @@ guest_tls_key = "/etc/openshell/certs/client-key.pem" enable_bind_mounts = false # Set to 0 to leave Podman's runtime default unchanged. sandbox_pids_limit = 2048 +# Health check interval in seconds. Lower values detect readiness faster +# but increase process churn (each check spawns a conmon subprocess). +# Default: 10. +health_check_interval_secs = 10 ``` ### MicroVM From 7421b23d8c9417bf9a0f8868f749f796650cbb11 Mon Sep 17 00:00:00 2001 From: Sagi Shnaidman Date: Wed, 10 Jun 2026 18:09:36 +0300 Subject: [PATCH 3/3] docs(podman): document health_check_interval_secs zero-disables behavior Signed-off-by: Sagi Shnaidman --- crates/openshell-driver-podman/src/config.rs | 3 ++- docs/reference/gateway-config.mdx | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index c2333719e..0e29f52dd 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -131,7 +131,8 @@ pub struct PodmanComputeConfig { /// Podman runs the health check command at this interval to determine /// container readiness. Lower values detect readiness faster but /// increase process churn (each check spawns a conmon subprocess). - /// Defaults to [`DEFAULT_HEALTH_CHECK_INTERVAL_SECS`]. + /// Set to `0` to disable health checks entirely. + /// Defaults to [`DEFAULT_HEALTH_CHECK_INTERVAL_SECS`] (10 seconds). pub health_check_interval_secs: u64, } diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index a54472dec..024dfcd57 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -264,7 +264,7 @@ enable_bind_mounts = false sandbox_pids_limit = 2048 # Health check interval in seconds. Lower values detect readiness faster # but increase process churn (each check spawns a conmon subprocess). -# Default: 10. +# Set to 0 to disable health checks entirely. Default: 10. health_check_interval_secs = 10 ```