Skip to content

Commit 42e7b80

Browse files
authored
feat(podman): make container health check interval configurable (#1833)
* feat(podman): make container health check interval configurable The Podman driver hardcoded a 3-second health check interval, which spawns a conmon subprocess on every tick. On systems running multiple sandboxes this creates sustained process churn and unnecessary CPU overhead. Add a `health_check_interval_secs` field to `PodmanComputeConfig` (default: 10s) and wire it into the container health check spec. Operators can tune it further via `[openshell.drivers.podman]` in gateway.toml. Signed-off-by: Sagi Shnaidman <sshnaidm@redhat.com> * docs(podman): document health_check_interval_secs config field Signed-off-by: Sagi Shnaidman <sshnaidm@redhat.com> * docs(podman): document health_check_interval_secs zero-disables behavior Signed-off-by: Sagi Shnaidman <sshnaidm@redhat.com> --------- Signed-off-by: Sagi Shnaidman <sshnaidm@redhat.com>
1 parent c5ce3ed commit 42e7b80

4 files changed

Lines changed: 43 additions & 2 deletions

File tree

crates/openshell-driver-podman/src/config.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,18 @@ pub struct PodmanComputeConfig {
126126
/// `template.driver_config`.
127127
#[serde(default)]
128128
pub enable_bind_mounts: bool,
129+
/// Health check interval in seconds for sandbox containers.
130+
///
131+
/// Podman runs the health check command at this interval to determine
132+
/// container readiness. Lower values detect readiness faster but
133+
/// increase process churn (each check spawns a conmon subprocess).
134+
/// Set to `0` to disable health checks entirely.
135+
/// Defaults to [`DEFAULT_HEALTH_CHECK_INTERVAL_SECS`] (10 seconds).
136+
pub health_check_interval_secs: u64,
129137
}
130138

139+
pub const DEFAULT_HEALTH_CHECK_INTERVAL_SECS: u64 = 10;
140+
131141
impl PodmanComputeConfig {
132142
/// Returns `true` when all three TLS paths are configured.
133143
#[must_use]
@@ -251,6 +261,7 @@ impl Default for PodmanComputeConfig {
251261
guest_tls_key: None,
252262
sandbox_pids_limit: DEFAULT_SANDBOX_PIDS_LIMIT,
253263
enable_bind_mounts: false,
264+
health_check_interval_secs: DEFAULT_HEALTH_CHECK_INTERVAL_SECS,
254265
}
255266
}
256267
}
@@ -273,6 +284,10 @@ impl std::fmt::Debug for PodmanComputeConfig {
273284
.field("guest_tls_key", &self.guest_tls_key)
274285
.field("sandbox_pids_limit", &self.sandbox_pids_limit)
275286
.field("enable_bind_mounts", &self.enable_bind_mounts)
287+
.field(
288+
"health_check_interval_secs",
289+
&self.health_check_interval_secs,
290+
)
276291
.finish()
277292
}
278293
}
@@ -314,6 +329,15 @@ mod tests {
314329
});
315330
}
316331

332+
#[test]
333+
fn default_config_sets_health_check_interval() {
334+
let cfg = PodmanComputeConfig::default();
335+
assert_eq!(
336+
cfg.health_check_interval_secs,
337+
DEFAULT_HEALTH_CHECK_INTERVAL_SECS
338+
);
339+
}
340+
317341
#[test]
318342
fn default_config_sets_driver_owned_pids_limit() {
319343
let cfg = PodmanComputeConfig::default();

crates/openshell-driver-podman/src/container.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,7 @@ pub fn try_build_container_spec_with_token(
880880
openshell_core::config::DEFAULT_SSH_PORT
881881
),
882882
],
883-
interval: 3_000_000_000,
883+
interval: config.health_check_interval_secs * 1_000_000_000,
884884
timeout: 2_000_000_000,
885885
retries: 10,
886886
start_period: 5_000_000_000,
@@ -1328,6 +1328,19 @@ mod tests {
13281328
);
13291329
}
13301330

1331+
#[test]
1332+
fn container_spec_healthcheck_interval_from_config() {
1333+
let sandbox = test_sandbox("test-id", "test-name");
1334+
let mut config = test_config();
1335+
config.health_check_interval_secs = 30;
1336+
let spec = build_container_spec(&sandbox, &config);
1337+
1338+
let interval = spec["healthconfig"]["Interval"]
1339+
.as_u64()
1340+
.expect("healthcheck interval should be a u64");
1341+
assert_eq!(interval, 30_000_000_000);
1342+
}
1343+
13311344
#[test]
13321345
fn container_spec_required_vars_cannot_be_overridden() {
13331346
use openshell_core::proto::compute::v1::{DriverSandboxSpec, DriverSandboxTemplate};

crates/openshell-driver-podman/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ async fn main() -> Result<()> {
135135
guest_tls_cert: args.podman_tls_cert,
136136
guest_tls_key: args.podman_tls_key,
137137
sandbox_pids_limit: args.sandbox_pids_limit,
138-
enable_bind_mounts: false,
138+
..PodmanComputeConfig::default()
139139
})
140140
.await
141141
.into_diagnostic()?;

docs/reference/gateway-config.mdx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,10 @@ guest_tls_key = "/etc/openshell/certs/client-key.pem"
262262
enable_bind_mounts = false
263263
# Set to 0 to leave Podman's runtime default unchanged.
264264
sandbox_pids_limit = 2048
265+
# Health check interval in seconds. Lower values detect readiness faster
266+
# but increase process churn (each check spawns a conmon subprocess).
267+
# Set to 0 to disable health checks entirely. Default: 10.
268+
health_check_interval_secs = 10
265269
```
266270

267271
### MicroVM

0 commit comments

Comments
 (0)