diff --git a/Cargo.lock b/Cargo.lock index f693acd66..96e02e545 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3503,6 +3503,7 @@ dependencies = [ "kube-runtime", "miette", "openshell-core", + "openshell-policy", "prost", "prost-types", "serde", @@ -3800,6 +3801,7 @@ dependencies = [ "nix", "openshell-core", "openshell-ocsf", + "openshell-policy", "rand_core 0.6.4", "russh", "rustix 1.1.4", diff --git a/crates/openshell-core/src/sandbox_env.rs b/crates/openshell-core/src/sandbox_env.rs index b457a4a8e..c56a1c889 100644 --- a/crates/openshell-core/src/sandbox_env.rs +++ b/crates/openshell-core/src/sandbox_env.rs @@ -71,3 +71,18 @@ pub const K8S_SA_TOKEN_FILE: &str = "OPENSHELL_K8S_SA_TOKEN_FILE"; /// exchanges without using SPIFFE for gateway authentication. pub const PROVIDER_SPIFFE_WORKLOAD_API_SOCKET: &str = "OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET"; + +/// Resolved sandbox UID used to override `run_as_user` when the policy +/// specifies a numeric value instead of the hardcoded "sandbox" user name. +/// +/// Set by compute drivers (Kubernetes, Docker, VM) from resolved config or +/// cluster autodetection. The supervisor reads this at startup and uses it +/// directly with `setuid()` / `chown()` without requiring an `/etc/passwd` +/// entry in the sandbox image. +pub const SANDBOX_UID: &str = "OPENSHELL_SANDBOX_UID"; + +/// Resolved sandbox GID paired with [`SANDBOX_UID`]. +/// +/// Used alongside UID for PVC init container `chown` operations and when the +/// supervisor drops privileges to a group other than the UID's primary group. +pub const SANDBOX_GID: &str = "OPENSHELL_SANDBOX_GID"; diff --git a/crates/openshell-driver-kubernetes/Cargo.toml b/crates/openshell-driver-kubernetes/Cargo.toml index 07fa91015..2c02f864a 100644 --- a/crates/openshell-driver-kubernetes/Cargo.toml +++ b/crates/openshell-driver-kubernetes/Cargo.toml @@ -16,6 +16,7 @@ path = "src/main.rs" [dependencies] openshell-core = { path = "../openshell-core", default-features = false } +openshell-policy = { path = "../openshell-policy" } tokio = { workspace = true } tonic = { workspace = true, features = ["transport"] } diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 4c1153b08..92a666c8c 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -211,6 +211,16 @@ pub struct KubernetesComputeConfig { deserialize_with = "deserialize_provider_spiffe_workload_api_socket_path" )] pub provider_spiffe_workload_api_socket_path: String, + /// UID used for the supervisor container `securityContext.runAsUser` and + /// PVC init container ownership operations. When empty, the driver + /// auto-detects from OpenShift SCC annotations on the target namespace; + /// if those are also absent, falls back to `1000`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub sandbox_uid: Option, + /// GID used alongside `sandbox_uid` for PVC init container operations. + /// When empty and `sandbox_uid` is set, defaults to the resolved UID. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub sandbox_gid: Option, } /// Lower bound enforced by kubelet for projected SA tokens. @@ -221,6 +231,18 @@ pub const MIN_SA_TOKEN_TTL_SECS: i64 = 600; /// pod start). pub const MAX_SA_TOKEN_TTL_SECS: i64 = 86_400; +/// Default sandbox UID used when neither config nor OpenShift SCC annotations +/// provide a resolved value. +pub(crate) const DEFAULT_SANDBOX_UID: u32 = 1000; + +/// The annotation key for the OpenShift ServiceAccount UID range. +/// Format: `/` (e.g. `1000000000/10000`). +pub const ANNOTATION_SCC_UID_RANGE: &str = "openshift.io/sa.scc.uid-range"; + +/// The annotation key for the OpenShift ServiceAccount supplemental groups. +/// Format: `/` (e.g. `1000000000/10000`). +pub const ANNOTATION_SCC_SUPPLEMENTAL_GROUPS: &str = "openshift.io/sa.scc.supplemental-groups"; + impl Default for KubernetesComputeConfig { fn default() -> Self { Self { @@ -246,6 +268,8 @@ impl Default for KubernetesComputeConfig { default_runtime_class_name: String::new(), sa_token_ttl_secs: 3600, provider_spiffe_workload_api_socket_path: String::new(), + sandbox_uid: None, + sandbox_gid: None, } } } @@ -277,6 +301,64 @@ impl KubernetesComputeConfig { &self.provider_spiffe_workload_api_socket_path, ) } + + /// Resolve the sandbox UID/GID pair. + /// + /// Resolution order: + /// 1. Configured `sandbox_uid` / `sandbox_gid` (explicit override) + /// 2. OpenShift SCC namespace annotations (`sa.scc.uid-range`, + /// `sa.scc.supplemental-groups`) — passed in as the optional + /// `namespace_annotations` map + /// 3. Fallback defaults: UID=`1000`, GID=UID + pub fn resolve_sandbox_uid( + &self, + namespace_annotations: Option<&std::collections::BTreeMap>, + ) -> u32 { + if let Some(uid) = self.sandbox_uid { + return uid; + } + // Try OpenShift SCC annotation. + if let Some(anns) = namespace_annotations { + if let Some(range) = anns.get(ANNOTATION_SCC_UID_RANGE) { + if let Some(uid) = Self::from_open_shift_uid_range(range) { + return uid; + } + } + } + DEFAULT_SANDBOX_UID + } + + pub fn resolve_sandbox_gid( + &self, + resolved_uid: u32, + _namespace_annotations: Option<&std::collections::BTreeMap>, + ) -> u32 { + self.sandbox_gid + .or_else(|| self.sandbox_uid) + .unwrap_or(resolved_uid) + } + + /// Parse OpenShift SCC `sa.scc.uid-range` annotation. + /// + /// Format: `/` (e.g. `1000000000/10000`). + pub fn from_open_shift_uid_range(annotation: &str) -> Option { + let (start, _) = annotation.split_once('/')?; + start + .trim() + .parse::() + .ok() + .filter(|&uid| uid >= openshell_policy::MIN_SANDBOX_UID) + } + + /// Parse OpenShift SCC `sa.scc.supplemental-groups` annotation. + pub fn from_open_shift_supplemental_groups(annotation: &str) -> Option { + let (start, _) = annotation.split_once('/')?; + start + .trim() + .parse::() + .ok() + .filter(|&gid| gid >= openshell_policy::MIN_SANDBOX_UID) + } } fn validate_provider_spiffe_workload_api_socket_path_value( @@ -314,6 +396,7 @@ fn validate_provider_spiffe_workload_api_socket_path_value( #[cfg(test)] mod tests { use super::*; + use std::collections::BTreeMap as HashMap; #[test] fn default_workspace_storage_size_is_2gi() { @@ -459,4 +542,128 @@ mod tests { let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); assert_eq!(cfg.image_pull_secrets, ["regcred", "backup-regcred"]); } + + #[test] + fn default_sandbox_uid_and_gid_are_none() { + let cfg = KubernetesComputeConfig::default(); + assert_eq!(cfg.sandbox_uid, None); + assert_eq!(cfg.sandbox_gid, None); + } + + #[test] + fn serde_override_sandbox_uid() { + let json = serde_json::json!({ + "sandbox_uid": 1500 + }); + let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); + assert_eq!(cfg.sandbox_uid, Some(1500)); + } + + #[test] + fn serde_override_sandbox_gid() { + let json = serde_json::json!({ + "sandbox_gid": 2000 + }); + let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); + assert_eq!(cfg.sandbox_gid, Some(2000)); + } + + #[test] + fn parse_openshift_uid_range() { + assert_eq!( + KubernetesComputeConfig::from_open_shift_uid_range("1000000000/10000"), + Some(1000000000) + ); + assert_eq!( + KubernetesComputeConfig::from_open_shift_uid_range("1000/50000"), + Some(1000) + ); + } + + #[test] + fn parse_openshift_uid_range_rejects_below_min() { + // 999 is below MIN_SANDBOX_UID (1000) — should be rejected. + assert_eq!( + KubernetesComputeConfig::from_open_shift_uid_range("999/50000"), + None + ); + } + + #[test] + fn parse_openshift_supplemental_groups() { + assert_eq!( + KubernetesComputeConfig::from_open_shift_supplemental_groups("1000/50000"), + Some(1000) + ); + } + + #[test] + fn resolve_sandbox_uid_prefers_config() { + let cfg = KubernetesComputeConfig { + sandbox_uid: Some(5000), + ..KubernetesComputeConfig::default() + }; + // Config value should win even when annotations are present. + let mut anns: HashMap = HashMap::new(); + anns.insert( + ANNOTATION_SCC_UID_RANGE.to_string(), + "1000000000/10000".to_string(), + ); + assert_eq!(cfg.resolve_sandbox_uid(Some(&anns)), 5000); + } + + #[test] + fn resolve_sandbox_uid_falls_back_to_openshift_annotation() { + let cfg = KubernetesComputeConfig::default(); + let mut anns: HashMap = HashMap::new(); + anns.insert( + ANNOTATION_SCC_UID_RANGE.to_string(), + "1000000000/10000".to_string(), + ); + assert_eq!(cfg.resolve_sandbox_uid(Some(&anns)), 1000000000); + } + + #[test] + fn resolve_sandbox_uid_falls_back_to_default() { + let cfg = KubernetesComputeConfig::default(); + // No config, no annotations. + assert_eq!(cfg.resolve_sandbox_uid(None), DEFAULT_SANDBOX_UID); + // Empty annotations map. + let anns: HashMap = HashMap::new(); + assert_eq!(cfg.resolve_sandbox_uid(Some(&anns)), DEFAULT_SANDBOX_UID); + } + + #[test] + fn resolve_sandbox_gid_prefers_config() { + let cfg = KubernetesComputeConfig { + sandbox_uid: Some(5000), + sandbox_gid: Some(6000), + ..KubernetesComputeConfig::default() + }; + assert_eq!( + cfg.resolve_sandbox_gid(cfg.resolve_sandbox_uid(None), None), + 6000 + ); + } + + #[test] + fn resolve_sandbox_gid_falls_back_to_uid() { + let cfg = KubernetesComputeConfig { + sandbox_uid: Some(5000), + ..KubernetesComputeConfig::default() + }; + // sandbox_gid is None, should fall back to sandbox_uid. + assert_eq!( + cfg.resolve_sandbox_gid(cfg.resolve_sandbox_uid(None), None), + 5000 + ); + } + + #[test] + fn resolve_sandbox_gid_falls_back_to_resolved_uid() { + let cfg = KubernetesComputeConfig::default(); + // Both are None, should use the resolved UID. + let uid = cfg.resolve_sandbox_uid(None); + assert_eq!(cfg.resolve_sandbox_gid(uid, None), uid); + } } diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index dc636efc3..e3e152356 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -3,12 +3,13 @@ //! Kubernetes compute driver. +use super::AppArmorProfile; use crate::config::{ - AppArmorProfile, DEFAULT_SANDBOX_SERVICE_ACCOUNT_NAME, DEFAULT_WORKSPACE_STORAGE_SIZE, + DEFAULT_SANDBOX_SERVICE_ACCOUNT_NAME, DEFAULT_SANDBOX_UID, DEFAULT_WORKSPACE_STORAGE_SIZE, KubernetesComputeConfig, SupervisorSideloadMethod, }; use futures::{Stream, StreamExt, TryStreamExt}; -use k8s_openapi::api::core::v1::{Event as KubeEventObj, Node}; +use k8s_openapi::api::core::v1::{Event as KubeEventObj, Namespace, Node}; use kube::api::{Api, ApiResource, DeleteParams, ListParams, PostParams}; use kube::core::gvk::GroupVersionKind; use kube::core::{DynamicObject, ObjectMeta}; @@ -267,6 +268,52 @@ impl KubernetesComputeDriver { Api::namespaced_with(self.client.clone(), &self.config.namespace, &resource) } + /// Resolve sandbox UID/GID from config or OpenShift SCC namespace annotations. + /// + /// Returns `(uid, gid, ns_annotations_map)`: + /// - If `sandbox_uid` is set in config, returns that (with fallback GID) + /// - Otherwise fetches the target namespace and checks for + /// `openshift.io/sa.scc.uid-range` / `openshift.io/sa.scc.supplemental-groups` + /// annotations. + /// - If neither config nor OpenShift is found, returns `(1000, 1000, {})` as defaults. + async fn resolve_sandbox_identity(&self) -> (u32, u32, BTreeMap) { + // Explicit config takes priority — skip namespace lookup entirely. + if self.config.sandbox_uid.is_some() { + let uid = self.config.resolve_sandbox_uid(None); + let gid = self.config.resolve_sandbox_gid(uid, None); + return (uid, gid, BTreeMap::new()); + } + + // Try to read namespace annotations for OpenShift SCC. + // Namespace is namespaced so Api::all works (it's cluster-scoped but + // can list all namespaces) and we filter by name, or use Api::namespaced. + let ns_api: Api = Api::all(self.client.clone()); + match tokio::time::timeout(KUBE_API_TIMEOUT, ns_api.get(self.config.namespace.as_str())) + .await + { + Ok(Ok(ns)) => { + let anns = ns.metadata.annotations.unwrap_or_default(); + let uid = self.config.resolve_sandbox_uid(Some(&anns)); + // Collect supplemental groups annotation for sandbox init containers. + let gid = if let Some(sup_range) = + anns.get(crate::config::ANNOTATION_SCC_SUPPLEMENTAL_GROUPS) + { + KubernetesComputeConfig::from_open_shift_supplemental_groups(sup_range) + .unwrap_or(uid) + } else { + uid + }; + (uid, gid, anns) + } + Ok(Err(_)) | Err(_) => { + // Namespace fetch failed or timed out — fall back to defaults. + let uid = DEFAULT_SANDBOX_UID; + let gid = uid; + (uid, gid, BTreeMap::new()) + } + } + } + async fn has_gpu_capacity(&self) -> Result { let nodes: Api = Api::all(self.client.clone()); let node_list = nodes.list(&ListParams::default()).await?; @@ -384,15 +431,9 @@ impl KubernetesComputeDriver { "Creating sandbox in Kubernetes" ); - let gvk = GroupVersionKind::gvk(SANDBOX_GROUP, SANDBOX_VERSION, SANDBOX_KIND); - let resource = ApiResource::from_gvk(&gvk); - let mut obj = DynamicObject::new(name, &resource); - obj.metadata = ObjectMeta { - name: Some(name.to_string()), - namespace: Some(self.config.namespace.clone()), - labels: Some(sandbox_labels(sandbox)), - ..Default::default() - }; + // Resolve sandbox UID/GID from config or OpenShift SCC namespace annotations. + let (resolved_uid, resolved_gid, ns_annotations) = self.resolve_sandbox_identity().await; + let params = SandboxPodParams { default_image: &self.config.default_image, image_pull_policy: &self.config.image_pull_policy, @@ -416,7 +457,25 @@ impl KubernetesComputeDriver { provider_spiffe_workload_api_socket_path: &self .config .provider_spiffe_workload_api_socket_path, + sandbox_uid: resolved_uid, + sandbox_gid: resolved_gid, }; + + let gvk = GroupVersionKind::gvk(SANDBOX_GROUP, SANDBOX_VERSION, SANDBOX_KIND); + let resource = ApiResource::from_gvk(&gvk); + let mut obj = DynamicObject::new(name, &resource); + obj.metadata = ObjectMeta { + name: Some(name.to_string()), + namespace: Some(self.config.namespace.clone()), + labels: Some(sandbox_labels(sandbox)), + annotations: if ns_annotations.is_empty() { + None + } else { + Some(ns_annotations) + }, + ..Default::default() + }; + obj.data = sandbox_to_k8s_spec(sandbox.spec.as_ref(), ¶ms); let api = self.api(); @@ -909,6 +968,8 @@ fn apply_supervisor_sideload( supervisor_image: &str, supervisor_image_pull_policy: &str, method: SupervisorSideloadMethod, + sandbox_uid: u32, + sandbox_gid: u32, ) { let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else { return; @@ -988,6 +1049,23 @@ fn apply_supervisor_sideload( if let Some(volume_mounts) = volume_mounts { volume_mounts.push(supervisor_volume_mount()); } + + // Inject resolved sandbox UID/GID as environment variables so the + // supervisor can use them directly without /etc/passwd lookups. + let env = container + .entry("env") + .or_insert_with(|| serde_json::json!([])) + .as_array_mut(); + if let Some(env) = env { + env.push(serde_json::json!({ + "name": openshell_core::sandbox_env::SANDBOX_UID.to_string(), + "value": sandbox_uid.to_string(), + })); + env.push(serde_json::json!({ + "name": openshell_core::sandbox_env::SANDBOX_GID.to_string(), + "value": sandbox_gid.to_string(), + })); + } } } @@ -1010,6 +1088,8 @@ fn apply_workspace_persistence( pod_template: &mut serde_json::Value, image: &str, image_pull_policy: &str, + sandbox_uid: u32, + sandbox_gid: u32, ) { let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else { return; @@ -1072,7 +1152,11 @@ fn apply_workspace_persistence( "name": WORKSPACE_INIT_CONTAINER_NAME, "image": image, "command": ["sh", "-c", copy_cmd], - "securityContext": { "runAsUser": 0 }, + "securityContext": { + "runAsUser": sandbox_uid, + "runAsGroup": sandbox_gid, + "fsGroup": sandbox_gid, + }, "volumeMounts": [{ "name": WORKSPACE_VOLUME_NAME, "mountPath": WORKSPACE_INIT_MOUNT_PATH @@ -1134,6 +1218,10 @@ struct SandboxPodParams<'a> { sa_token_ttl_secs: i64, provider_spiffe_enabled: bool, provider_spiffe_workload_api_socket_path: &'a str, + /// Resolved sandbox UID for supervisor `runAsUser` and env var. + sandbox_uid: u32, + /// Resolved sandbox GID for PVC init container operations. + sandbox_gid: u32, } impl Default for SandboxPodParams<'_> { @@ -1159,6 +1247,8 @@ impl Default for SandboxPodParams<'_> { sa_token_ttl_secs: 3600, provider_spiffe_enabled: false, provider_spiffe_workload_api_socket_path: "", + sandbox_uid: DEFAULT_SANDBOX_UID, + sandbox_gid: DEFAULT_SANDBOX_UID, } } } @@ -1510,13 +1600,21 @@ fn sandbox_template_to_k8s( params.supervisor_image, params.supervisor_image_pull_policy, params.supervisor_sideload_method, + params.sandbox_uid, + params.sandbox_gid, ); // Inject workspace persistence (init container + PVC volume mount) so // that /sandbox data survives pod rescheduling. Skipped when the user // provides custom volumeClaimTemplates to avoid conflicts. if inject_workspace { - apply_workspace_persistence(&mut result, image, params.image_pull_policy); + apply_workspace_persistence( + &mut result, + image, + params.image_pull_policy, + params.sandbox_uid, + params.sandbox_gid, + ); } result @@ -2083,6 +2181,8 @@ mod tests { "custom-image:latest", "IfNotPresent", SupervisorSideloadMethod::InitContainer, + 1500, // sandbox_uid + 1500, // sandbox_gid ); let sc = &pod_template["spec"]["containers"][0]["securityContext"]; @@ -2112,6 +2212,8 @@ mod tests { "supervisor-image:latest", "IfNotPresent", SupervisorSideloadMethod::InitContainer, + 1000, // sandbox_uid + 1000, // sandbox_gid ); let sc = &pod_template["spec"]["containers"][0]["securityContext"]; @@ -2137,6 +2239,8 @@ mod tests { "supervisor-image:latest", "IfNotPresent", SupervisorSideloadMethod::InitContainer, + 1000, // sandbox_uid + 1000, // sandbox_gid ); // Volume should be an emptyDir @@ -2211,6 +2315,8 @@ mod tests { "supervisor-image:latest", "IfNotPresent", SupervisorSideloadMethod::ImageVolume, + 1000, // sandbox_uid + 1000, // sandbox_gid ); let volumes = pod_template["spec"]["volumes"] @@ -2265,6 +2371,8 @@ mod tests { "supervisor-image:latest", "", SupervisorSideloadMethod::ImageVolume, + 1000, // sandbox_uid + 1000, // sandbox_gid ); let volume = &pod_template["spec"]["volumes"][0]; @@ -2740,6 +2848,8 @@ mod tests { &mut pod_template, "openshell/sandbox:latest", "IfNotPresent", + 1000, // sandbox_uid + 1000, // sandbox_gid ); // Init container @@ -2750,7 +2860,8 @@ mod tests { assert_eq!(init_containers[0]["name"], WORKSPACE_INIT_CONTAINER_NAME); assert_eq!(init_containers[0]["image"], "openshell/sandbox:latest"); assert_eq!(init_containers[0]["imagePullPolicy"], "IfNotPresent"); - assert_eq!(init_containers[0]["securityContext"]["runAsUser"], 0); + // init container runs as the resolved sandbox UID (not root) + assert_eq!(init_containers[0]["securityContext"]["runAsUser"], 1000); // Init container mounts PVC at temp path, not /sandbox let init_mounts = init_containers[0]["volumeMounts"] @@ -2793,7 +2904,13 @@ mod tests { } }); - apply_workspace_persistence(&mut pod_template, "my-custom-image:v2", "IfNotPresent"); + apply_workspace_persistence( + &mut pod_template, + "my-custom-image:v2", + "IfNotPresent", + 1000, + 1000, + ); let init_image = pod_template["spec"]["initContainers"][0]["image"] .as_str() @@ -2815,7 +2932,7 @@ mod tests { } }); - apply_workspace_persistence(&mut pod_template, "img:latest", "Always"); + apply_workspace_persistence(&mut pod_template, "img:latest", "Always", 1000, 1000); let cmd = pod_template["spec"]["initContainers"][0]["command"] .as_array() diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index f7eeeba42..7b3c09fa8 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -135,6 +135,8 @@ async fn main() -> Result<()> { provider_spiffe_workload_api_socket_path: args .provider_spiffe_workload_api_socket_path .unwrap_or_default(), + sandbox_uid: None, + sandbox_gid: None, }) .await .into_diagnostic()?; diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 30fecd8be..75943e5f4 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -204,7 +204,7 @@ enum GuestImagePayloadSource { LocalDocker { rootfs_archive: PathBuf }, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct VmDriverConfig { pub openshell_endpoint: String, pub state_dir: PathBuf, @@ -222,8 +222,19 @@ pub struct VmDriverConfig { pub gpu_enabled: bool, pub gpu_mem_mib: u32, pub gpu_vcpus: u8, + /// Resolved sandbox UID for rootfs `/etc/passwd` entry. + /// When empty, defaults to 10001 (the legacy hardcoded value). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub sandbox_uid: Option, + /// Resolved sandbox GID for rootfs `/etc/passwd` and `/etc/group` entries. + /// When empty, defaults to the resolved UID. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub sandbox_gid: Option, } +/// Default sandbox UID used by the VM driver when no config value is set. +pub const DEFAULT_SANDBOX_UID: u32 = 10001; + impl Default for VmDriverConfig { fn default() -> Self { Self { @@ -243,11 +254,23 @@ impl Default for VmDriverConfig { gpu_enabled: false, gpu_mem_mib: 8192, gpu_vcpus: 4, + sandbox_uid: None, + sandbox_gid: None, } } } impl VmDriverConfig { + /// Resolve the sandbox UID, falling back to DEFAULT_SANDBOX_UID. + pub fn resolve_sandbox_uid(&self) -> u32 { + self.sandbox_uid.unwrap_or(DEFAULT_SANDBOX_UID) + } + + /// Resolve the sandbox GID, falling back to the resolved UID. + pub fn resolve_sandbox_gid(&self, resolved_uid: u32) -> u32 { + self.sandbox_gid.unwrap_or(resolved_uid) + } + fn requires_tls_materials(&self) -> bool { self.openshell_endpoint.starts_with("https://") } @@ -2537,14 +2560,19 @@ impl VmDriver { let image_identity_owned = image_identity.to_string(); let exported_rootfs_for_build = exported_rootfs.clone(); let prepared_rootfs_for_build = prepared_rootfs.clone(); + let sandbox_uid = self.config.resolve_sandbox_uid(); + let sandbox_gid = self.config.resolve_sandbox_gid(sandbox_uid); self.publish_vm_progress( sandbox_id, "PreparingRootfs", - format!("Preparing VM rootfs for local image \"{image_ref}\""), + format!( + "Preparing VM rootfs for local image \"{image_ref}\" (sandbox uid={sandbox_uid})" + ), HashMap::from([ ("image_ref".to_string(), image_ref.to_string()), ("image_source".to_string(), "local_docker".to_string()), ("image_identity".to_string(), image_identity.to_string()), + ("sandbox_uid".to_string(), sandbox_uid.to_string()), ]), ); let prepare_result = tokio::task::spawn_blocking(move || { @@ -2552,6 +2580,8 @@ impl VmDriver { prepare_sandbox_rootfs_from_image_root( &prepared_rootfs_for_build, &image_identity_owned, + sandbox_uid, + sandbox_gid, ) .map_err(|err| { format!("vm sandbox image '{image_ref_owned}' is not base-compatible: {err}") @@ -2670,20 +2700,25 @@ impl VmDriver { let image_ref_owned = image_ref.to_string(); let image_identity_owned = image_identity.to_string(); let prepared_rootfs_for_build = prepared_rootfs.clone(); + let sandbox_uid = self.config.resolve_sandbox_uid(); + let sandbox_gid = self.config.resolve_sandbox_gid(sandbox_uid); self.publish_vm_progress( sandbox_id, "PreparingRootfs", - format!("Preparing VM rootfs for image \"{image_ref}\""), + format!("Preparing VM rootfs for image \"{image_ref}\" (sandbox uid={sandbox_uid})"), HashMap::from([ ("image_ref".to_string(), image_ref.to_string()), ("image_source".to_string(), "registry".to_string()), ("image_identity".to_string(), image_identity.to_string()), + ("sandbox_uid".to_string(), sandbox_uid.to_string()), ]), ); let prepare_result = tokio::task::spawn_blocking(move || { prepare_sandbox_rootfs_from_image_root( &prepared_rootfs_for_build, &image_identity_owned, + sandbox_uid, + sandbox_gid, ) .map_err(|err| { format!("vm sandbox image '{image_ref_owned}' is not base-compatible: {err}") diff --git a/crates/openshell-driver-vm/src/main.rs b/crates/openshell-driver-vm/src/main.rs index 57db7b64b..17718f952 100644 --- a/crates/openshell-driver-vm/src/main.rs +++ b/crates/openshell-driver-vm/src/main.rs @@ -214,6 +214,8 @@ async fn main() -> Result<()> { gpu_enabled: args.gpu, gpu_mem_mib: args.gpu_mem_mib, gpu_vcpus: args.gpu_vcpus, + sandbox_uid: None, + sandbox_gid: None, }) .await .map_err(|err| miette::miette!("{err}"))?; diff --git a/crates/openshell-driver-vm/src/rootfs.rs b/crates/openshell-driver-vm/src/rootfs.rs index d59e7b4b9..6669ae5c3 100644 --- a/crates/openshell-driver-vm/src/rootfs.rs +++ b/crates/openshell-driver-vm/src/rootfs.rs @@ -29,8 +29,10 @@ pub const fn sandbox_guest_init_path() -> &'static str { pub fn prepare_sandbox_rootfs_from_image_root( rootfs: &Path, image_identity: &str, + sandbox_uid: u32, + sandbox_gid: u32, ) -> Result<(), String> { - prepare_sandbox_rootfs(rootfs)?; + prepare_sandbox_rootfs(rootfs, sandbox_uid, sandbox_gid)?; validate_sandbox_rootfs(rootfs)?; fs::write( rootfs.join(ROOTFS_VARIANT_MARKER), @@ -348,7 +350,7 @@ fn append_symlink_to_archive( .map_err(|e| format!("append symlink {}: {e}", source_path.display())) } -fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { +fn prepare_sandbox_rootfs(rootfs: &Path, sandbox_uid: u32, sandbox_gid: u32) -> Result<(), String> { for relative in ["opt/openshell/.initialized", "opt/openshell/.rootfs-type"] { remove_rootfs_path(rootfs, relative)?; } @@ -377,7 +379,7 @@ fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { fs::create_dir_all(&opt_dir).map_err(|e| format!("create {}: {e}", opt_dir.display()))?; fs::write(opt_dir.join(".rootfs-type"), "sandbox\n") .map_err(|e| format!("write sandbox rootfs marker: {e}"))?; - ensure_sandbox_guest_user(rootfs)?; + ensure_sandbox_guest_user(rootfs, sandbox_uid, sandbox_gid)?; create_sandbox_mountpoint(&rootfs.join("sandbox"))?; create_sandbox_mountpoint(&rootfs.join("image-cache"))?; create_sandbox_mountpoint(&rootfs.join("lower"))?; @@ -752,16 +754,17 @@ fn temporary_injection_path(image_path: &Path) -> PathBuf { )) } -fn ensure_sandbox_guest_user(rootfs: &Path) -> Result<(), String> { - const SANDBOX_UID: u32 = 10001; - const SANDBOX_GID: u32 = 10001; - +fn ensure_sandbox_guest_user( + rootfs: &Path, + sandbox_uid: u32, + sandbox_gid: u32, +) -> Result<(), String> { let etc_dir = rootfs.join("etc"); fs::create_dir_all(&etc_dir).map_err(|e| format!("create {}: {e}", etc_dir.display()))?; ensure_line_in_file( &etc_dir.join("group"), - &format!("sandbox:x:{SANDBOX_GID}:"), + &format!("sandbox:x:{sandbox_gid}:"), |line| line.starts_with("sandbox:"), )?; ensure_line_in_file(&etc_dir.join("gshadow"), "sandbox:!::", |line| { @@ -769,7 +772,7 @@ fn ensure_sandbox_guest_user(rootfs: &Path) -> Result<(), String> { })?; ensure_line_in_file( &etc_dir.join("passwd"), - &format!("sandbox:x:{SANDBOX_UID}:{SANDBOX_GID}:OpenShell Sandbox:/sandbox:/bin/bash"), + &format!("sandbox:x:{sandbox_uid}:{sandbox_gid}:OpenShell Sandbox:/sandbox:/bin/bash"), |line| line.starts_with("sandbox:"), )?; ensure_line_in_file( @@ -936,7 +939,9 @@ mod tests { fs::write(rootfs.join("bin/sed"), b"sed").expect("write sed"); fs::write(rootfs.join("sbin/ip"), b"ip").expect("write ip"); - prepare_sandbox_rootfs(&rootfs).expect("prepare sandbox rootfs"); + // Use a non-standard UID so the test doesn't collide with the default. + let uid = 20001; + prepare_sandbox_rootfs(&rootfs, uid, uid).expect("prepare sandbox rootfs"); validate_sandbox_rootfs(&rootfs).expect("validate sandbox rootfs"); assert!(rootfs.join("srv/openshell-vm-sandbox-init.sh").is_file()); @@ -955,12 +960,14 @@ mod tests { assert!( fs::read_to_string(rootfs.join("etc/passwd")) .expect("read passwd") - .contains("sandbox:x:10001:10001:OpenShell Sandbox:/sandbox:/bin/bash") + .contains(&format!( + "sandbox:x:{uid}:{uid}:OpenShell Sandbox:/sandbox:/bin/bash" + )) ); assert!( fs::read_to_string(rootfs.join("etc/group")) .expect("read group") - .contains("sandbox:x:10001:") + .contains(&format!("sandbox:x:{uid}:")) ); assert_eq!( fs::read_to_string(rootfs.join("etc/hosts")).expect("read hosts"), @@ -980,7 +987,7 @@ mod tests { fs::create_dir_all(rootfs.join("sandbox")).expect("create sandbox workdir"); fs::write(rootfs.join("sandbox/app.py"), "print('hello')\n").expect("write app"); - prepare_sandbox_rootfs(&rootfs).expect("prepare sandbox rootfs"); + prepare_sandbox_rootfs(&rootfs, 10001, 10001).expect("prepare sandbox rootfs"); assert!(rootfs.join("sandbox").is_dir()); assert_eq!( diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index aaabbf926..f52137cd2 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -537,6 +537,41 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { } } +// --------------------------------------------------------------------------- +// Sandbox UID/GID constants +// --------------------------------------------------------------------------- + +/// Minimum accepted UID for sandbox process identity. +/// UIDs below this are reserved for system users and are rejected. +pub const MIN_SANDBOX_UID: u32 = 1000; + +/// Maximum accepted UID for sandbox process identity. +/// UIDs above this exceed typical OS limits and are rejected. +pub const MAX_SANDBOX_UID: u32 = 2_000_000_000; + +/// The literal string value accepted as a valid sandbox user/group name. +const SANDBOX_NAME: &str = "sandbox"; + +/// Validate whether a process identity field value is acceptable. +/// +/// Accepts either the literal `"sandbox"` or a numeric UID/GID parsed as +/// `u32` within the range `[MIN_SANDBOX_UID, MAX_SANDBOX_UID]`. +/// +/// Rejects: +/// - The empty string (callers should use `ensure_sandbox_process_identity` +/// to fill defaults before validation) +/// - UID 0 or values below `MIN_SANDBOX_UID` +/// - Values above `MAX_SANDBOX_UID` +/// - Non-numeric strings other than `"sandbox"` (e.g. `"root"`, `"nobody"`) +pub fn is_valid_sandbox_identity(value: &str) -> bool { + if value == SANDBOX_NAME { + return true; + } + value + .parse::() + .is_ok_and(|uid| (MIN_SANDBOX_UID..=MAX_SANDBOX_UID).contains(&uid)) +} + // --------------------------------------------------------------------------- // Public API // --------------------------------------------------------------------------- @@ -700,7 +735,10 @@ impl fmt::Display for PolicyViolation { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::InvalidProcessIdentity { field, value } => { - write!(f, "{field} must be 'sandbox', got '{value}'") + write!( + f, + "{field} must be 'sandbox' or a numeric UID/GID in range [{MIN_SANDBOX_UID}, {MAX_SANDBOX_UID}], got '{value}'" + ) } Self::PathTraversal { path } => { write!(f, "path contains '..' traversal component: {path}") @@ -753,17 +791,18 @@ pub fn validate_sandbox_policy( ) -> std::result::Result<(), Vec> { let mut violations = Vec::new(); - // Check process identity — must be "sandbox". + // Check process identity — must be "sandbox" or a numeric UID/GID + // within the acceptable sandbox range. // `ensure_sandbox_process_identity` should be called before this to - // fill in defaults; anything other than "sandbox" is rejected. + // fill in defaults; any invalid value is rejected. if let Some(ref process) = policy.process { - if process.run_as_user != "sandbox" { + if !is_valid_sandbox_identity(&process.run_as_user) { violations.push(PolicyViolation::InvalidProcessIdentity { field: "run_as_user", value: process.run_as_user.clone(), }); } - if process.run_as_group != "sandbox" { + if !is_valid_sandbox_identity(&process.run_as_group) { violations.push(PolicyViolation::InvalidProcessIdentity { field: "run_as_group", value: process.run_as_group.clone(), @@ -1402,6 +1441,180 @@ network_policies: assert!(s.contains("sandbox")); } + // ---- is_valid_sandbox_identity tests ---- + + #[test] + fn valid_identity_accepts_sandbox() { + assert!(is_valid_sandbox_identity("sandbox")); + } + + #[test] + fn valid_identity_accepts_numeric_uid_in_range() { + assert!(is_valid_sandbox_identity("1000")); + assert!(is_valid_sandbox_identity("50000")); + assert!(is_valid_sandbox_identity("1000660000")); + } + + #[test] + fn valid_identity_accepts_boundary_uids() { + assert!(is_valid_sandbox_identity(&MIN_SANDBOX_UID.to_string())); + assert!(is_valid_sandbox_identity(&MAX_SANDBOX_UID.to_string())); + } + + #[test] + fn valid_identity_rejects_zero() { + assert!(!is_valid_sandbox_identity("0")); + } + + #[test] + fn valid_identity_rejects_system_uids_below_min() { + assert!(!is_valid_sandbox_identity("999")); + assert!(!is_valid_sandbox_identity("100")); + assert!(!is_valid_sandbox_identity("1")); + } + + #[test] + fn valid_identity_rejects_uid_above_max() { + assert!(!is_valid_sandbox_identity( + &MAX_SANDBOX_UID.saturating_add(1).to_string() + )); + } + + #[test] + fn valid_identity_rejects_non_numeric_names() { + assert!(!is_valid_sandbox_identity("root")); + assert!(!is_valid_sandbox_identity("nobody")); + assert!(!is_valid_sandbox_identity("user")); + } + + #[test] + fn valid_identity_rejects_empty_string() { + assert!(!is_valid_sandbox_identity("")); + } + + // ---- Policy validation with numeric UIDs ---- + + #[test] + fn validate_accepts_numeric_uid_in_range() { + let policy = SandboxPolicy { + version: 1, + process: Some(ProcessPolicy { + run_as_user: "1000".into(), + run_as_group: "5000".into(), + }), + filesystem: None, + landlock: None, + network_policies: HashMap::new(), + }; + assert!(validate_sandbox_policy(&policy).is_ok()); + } + + #[test] + fn validate_accepts_boundary_uids() { + let policy = SandboxPolicy { + version: 1, + process: Some(ProcessPolicy { + run_as_user: MIN_SANDBOX_UID.to_string(), + run_as_group: MAX_SANDBOX_UID.to_string(), + }), + filesystem: None, + landlock: None, + network_policies: HashMap::new(), + }; + assert!(validate_sandbox_policy(&policy).is_ok()); + } + + #[test] + fn validate_rejects_uid_out_of_range_low() { + let mut policy = restrictive_default_policy(); + policy.process = Some(ProcessPolicy { + run_as_user: "500".into(), + run_as_group: "sandbox".into(), + }); + let violations = validate_sandbox_policy(&policy).unwrap_err(); + assert!(violations.iter().any(|v| matches!( + v, + PolicyViolation::InvalidProcessIdentity { + field: "run_as_user", + .. + } + ))); + } + + #[test] + fn validate_rejects_uid_out_of_range_high() { + let mut policy = restrictive_default_policy(); + policy.process = Some(ProcessPolicy { + run_as_user: (MAX_SANDBOX_UID + 1).to_string(), + run_as_group: "sandbox".into(), + }); + let violations = validate_sandbox_policy(&policy).unwrap_err(); + assert!(violations.iter().any(|v| matches!( + v, + PolicyViolation::InvalidProcessIdentity { + field: "run_as_user", + .. + } + ))); + } + + #[test] + fn validate_rejects_root_string() { + let mut policy = restrictive_default_policy(); + policy.process = Some(ProcessPolicy { + run_as_user: "root".into(), + run_as_group: "sandbox".into(), + }); + let violations = validate_sandbox_policy(&policy).unwrap_err(); + assert!(violations.iter().any(|v| matches!( + v, + PolicyViolation::InvalidProcessIdentity { + field: "run_as_user", + .. + } + ))); + } + + #[test] + fn validate_rejects_nobody_string() { + let mut policy = restrictive_default_policy(); + policy.process = Some(ProcessPolicy { + run_as_user: "nobody".into(), + run_as_group: "nogroup".into(), + }); + let violations = validate_sandbox_policy(&policy).unwrap_err(); + assert_eq!(violations.len(), 2); + } + + #[test] + fn validate_accepts_mixed_sandbox_name_and_uid() { + // run_as_user as "sandbox" name, run_as_group as numeric UID + let policy = SandboxPolicy { + version: 1, + process: Some(ProcessPolicy { + run_as_user: "sandbox".into(), + run_as_group: "1000".into(), + }), + filesystem: None, + landlock: None, + network_policies: HashMap::new(), + }; + assert!(validate_sandbox_policy(&policy).is_ok()); + } + + #[test] + fn policy_violation_display_includes_range() { + let v = PolicyViolation::InvalidProcessIdentity { + field: "run_as_user", + value: "root".into(), + }; + let s = format!("{v}"); + assert!(s.contains("sandbox")); + assert!(s.contains(&MIN_SANDBOX_UID.to_string())); + assert!(s.contains(&MAX_SANDBOX_UID.to_string())); + assert!(s.contains("root")); + } + // ---- Multi-port and host wildcard tests ---- #[test] diff --git a/crates/openshell-supervisor-process/Cargo.toml b/crates/openshell-supervisor-process/Cargo.toml index b2dad859e..7752cc8af 100644 --- a/crates/openshell-supervisor-process/Cargo.toml +++ b/crates/openshell-supervisor-process/Cargo.toml @@ -13,6 +13,7 @@ rust-version.workspace = true [dependencies] openshell-core = { path = "../openshell-core" } openshell-ocsf = { path = "../openshell-ocsf" } +openshell-policy = { path = "../openshell-policy" } anyhow = { workspace = true } base64 = { workspace = true } diff --git a/crates/openshell-supervisor-process/src/process.rs b/crates/openshell-supervisor-process/src/process.rs index 9f9fe1822..05d187428 100644 --- a/crates/openshell-supervisor-process/src/process.rs +++ b/crates/openshell-supervisor-process/src/process.rs @@ -11,7 +11,7 @@ use crate::netns::NetworkNamespace; use crate::sandbox; use miette::{IntoDiagnostic, Result}; use nix::sys::signal::{self, Signal}; -use nix::unistd::{Group, Pid, User}; +use nix::unistd::{Gid, Group, Pid, Uid, User}; use openshell_core::policy::{NetworkMode, SandboxPolicy}; use std::collections::HashMap; use std::ffi::CString; @@ -748,17 +748,36 @@ impl Drop for ProcessHandle { } } -/// Validate that the `sandbox` user exists in this image. +/// Validate that the configured sandbox identity exists in this image. /// -/// All sandbox images must include a `sandbox` user for privilege dropping. -/// This check runs at supervisor startup (inside the container) where we can -/// inspect `/etc/passwd`. If the user is missing, the sandbox fails fast -/// with a clear error instead of silently running child processes as root. +/// When the identity is the literal `"sandbox"`, verifies the user exists +/// in `/etc/passwd` (all sandbox images ship with one). +/// +/// When the identity is a numeric UID, skips the passwd lookup entirely — +/// the kernel will use the resolved UID regardless of whether an entry +/// exists in `/etc/passwd`. Logs an OCSF event confirming numeric UID usage. +/// Non-numeric, non-"sandbox" values are rejected. #[cfg(unix)] pub fn validate_sandbox_user(policy: &SandboxPolicy) -> Result<()> { - let user_name = policy.process.run_as_user.as_deref().unwrap_or("sandbox"); + let identity = policy.process.run_as_user.as_deref().unwrap_or("sandbox"); + + // Numeric UID — no passwd entry required; kernel resolves directly. + if openshell_policy::is_valid_sandbox_identity(identity) && identity.parse::().is_ok() { + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(openshell_ocsf::ctx::ctx()) + .severity(openshell_ocsf::SeverityId::Informational) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Enabled, "validated") + .message(format!( + "Accepted numeric UID {identity} (no passwd entry required)" + )) + .build() + ); + return Ok(()); + } - if user_name.is_empty() || user_name == "sandbox" { + // "sandbox" name — must exist in /etc/passwd. + if identity == "sandbox" { match User::from_name("sandbox") { Ok(Some(_)) => { openshell_ocsf::ocsf_emit!( @@ -780,11 +799,36 @@ pub fn validate_sandbox_user(policy: &SandboxPolicy) -> Result<()> { return Err(miette::miette!("failed to look up 'sandbox' user: {e}")); } } + } else if !identity.is_empty() { + // Non-numeric, non-sandbox string — attempt passwd lookup. + // This catches cases where someone accidentally put "root" or similar. + match User::from_name(identity) { + Ok(Some(_)) => { + tracing::warn!( + identity, + "non-sandbox user accepted via passwd entry; \ + consider using a numeric UID for UID-injected images" + ); + } + Ok(None) => { + return Err(miette::miette!( + "unrecognized sandbox identity '{identity}'; \ + expected 'sandbox' or a numeric UID in range [{MIN_SANDBOX_UID}, {MAX_SANDBOX_UID}]" + )); + } + Err(e) => { + return Err(miette::miette!( + "failed to look up identity '{identity}': {e}" + )); + } + } } Ok(()) } +pub use openshell_policy::{MAX_SANDBOX_UID, MIN_SANDBOX_UID}; + /// Prepare a `read_write` path for the sandboxed process. /// /// Returns `true` when the path was created by the supervisor and therefore @@ -823,9 +867,13 @@ fn prepare_read_write_path(path: &Path) -> Result { /// Creates `read_write` directories if they don't exist and sets ownership /// on newly-created paths to the configured sandbox user/group. This runs as /// the supervisor (root) before forking the child process. +/// +/// Accepts both name-based identities (resolved via `/etc/passwd`) and numeric +/// UIDs/GIDs (passed directly to `chown` without a passwd lookup). #[cfg(unix)] pub fn prepare_filesystem(policy: &SandboxPolicy) -> Result<()> { use nix::unistd::chown; + use nix::unistd::{Gid, Uid}; let user_name = match policy.process.run_as_user.as_deref() { Some(name) if !name.is_empty() => Some(name), @@ -841,27 +889,22 @@ pub fn prepare_filesystem(policy: &SandboxPolicy) -> Result<()> { return Ok(()); } - // Resolve user and group - let uid = if let Some(name) = user_name { - Some( - User::from_name(name) - .into_diagnostic()? - .ok_or_else(|| miette::miette!("Sandbox user not found: {name}"))? - .uid, - ) - } else { - None + // Resolve UID: numeric values are passed directly; names resolve via passwd. + let uid = match user_name { + Some(name) if name.parse::().is_ok() => { + Some(Uid::from_raw(name.parse().into_diagnostic()?)) + } + Some(name) => User::from_name(name).into_diagnostic()?.map(|u| u.uid), + _ => None, }; - let gid = if let Some(name) = group_name { - Some( - Group::from_name(name) - .into_diagnostic()? - .ok_or_else(|| miette::miette!("Sandbox group not found: {name}"))? - .gid, - ) - } else { - None + // Resolve GID: numeric values are passed directly; names resolve via group. + let gid = match group_name { + Some(name) if name.parse::().is_ok() => { + Some(Gid::from_raw(name.parse().into_diagnostic()?)) + } + Some(name) => Group::from_name(name).into_diagnostic()?.map(|g| g.gid), + _ => None, }; // Create missing read_write paths and only chown the ones we created. @@ -914,27 +957,59 @@ pub fn drop_privileges(policy: &SandboxPolicy) -> Result<()> { return Ok(()); } - let user = if let Some(name) = user_name { - User::from_name(name) - .into_diagnostic()? - .ok_or_else(|| miette::miette!("Sandbox user not found: {name}"))? - } else { - User::from_uid(nix::unistd::geteuid()) - .into_diagnostic()? - .ok_or_else(|| miette::miette!("Failed to resolve current user"))? + // Resolve UID: numeric values are used directly; names resolve via passwd. + let target_uid = match user_name { + Some(name) if name.parse::().is_ok() => Uid::from_raw(name.parse().into_diagnostic()?), + Some(name) => { + User::from_name(name) + .into_diagnostic()? + .ok_or_else(|| miette::miette!("Sandbox user not found: {name}"))? + .uid + } + None => nix::unistd::geteuid(), }; - let group = if let Some(name) = group_name { - Group::from_name(name) + // Resolve group: if a numeric GID is configured use it directly. + // Otherwise try name resolution, then fall back to current user's primary group. + let target_gid = match group_name { + Some(name) if name.parse::().is_ok() => Gid::from_raw(name.parse().into_diagnostic()?), + Some(name) => { + Group::from_name(name) + .into_diagnostic()? + .ok_or_else(|| miette::miette!("Sandbox group not found: {name}"))? + .gid + } + None => match target_uid.as_raw() { + 0 => nix::unistd::getegid(), + _ => Group::from_gid( + User::from_uid(target_uid) + .into_diagnostic()? + .ok_or_else(|| miette::miette!("Failed to resolve user from UID {target_uid}"))? + .gid, + ) .into_diagnostic()? - .ok_or_else(|| miette::miette!("Sandbox group not found: {name}"))? + .map_or_else(nix::unistd::getegid, |g| g.gid), + }, + }; + + // Resolve the user record for initgroups (if name-based) or skip it (numeric UID). + let user = if user_name.is_some() { + Some( + User::from_uid(target_uid) + .into_diagnostic()? + .ok_or_else(|| { + miette::miette!("Failed to resolve user record for UID {target_uid}") + })?, + ) } else { - Group::from_gid(user.gid) - .into_diagnostic()? - .ok_or_else(|| miette::miette!("Failed to resolve user primary group"))? + None }; - if user_name.is_some() { + // Set supplementary groups only when we have a name-based identity. + // Numeric UIDs may not have a passwd entry, so initgroups would fail. + if let Some(ref user) = user + && target_uid != nix::unistd::geteuid() + { let user_cstr = CString::new(user.name.clone()).map_err(|_| miette::miette!("Invalid user name"))?; #[cfg(any( @@ -953,31 +1028,35 @@ pub fn drop_privileges(policy: &SandboxPolicy) -> Result<()> { target_os = "redox" )))] { - nix::unistd::initgroups(user_cstr.as_c_str(), group.gid).into_diagnostic()?; + nix::unistd::initgroups(user_cstr.as_c_str(), target_gid).into_diagnostic()?; } } - nix::unistd::setgid(group.gid).into_diagnostic()?; + if target_gid != nix::unistd::getegid() { + nix::unistd::setgid(target_gid).into_diagnostic()?; + } // Verify effective GID actually changed (defense-in-depth, CWE-250 / CERT POS37-C) let effective_gid = nix::unistd::getegid(); - if effective_gid != group.gid { + if effective_gid != target_gid { return Err(miette::miette!( "Privilege drop verification failed: expected effective GID {}, got {}", - group.gid, + target_gid, effective_gid )); } - if user_name.is_some() { - nix::unistd::setuid(user.uid).into_diagnostic()?; + if let Some(_user) = user { + if target_uid != nix::unistd::geteuid() { + nix::unistd::setuid(target_uid).into_diagnostic()?; + } // Verify effective UID actually changed (defense-in-depth, CWE-250 / CERT POS37-C) let effective_uid = nix::unistd::geteuid(); - if effective_uid != user.uid { + if effective_uid != target_uid { return Err(miette::miette!( "Privilege drop verification failed: expected effective UID {}, got {}", - user.uid, + target_uid, effective_uid )); } @@ -985,11 +1064,11 @@ pub fn drop_privileges(policy: &SandboxPolicy) -> Result<()> { // Verify root cannot be re-acquired (CERT POS37-C hardening). // If we dropped from root, setuid(0) must fail; success means privileges // were not fully relinquished. - if nix::unistd::setuid(nix::unistd::Uid::from_raw(0)).is_ok() && user.uid.as_raw() != 0 { + if nix::unistd::setuid(Uid::from_raw(0)).is_ok() && target_uid.as_raw() != 0 { return Err(miette::miette!( "Privilege drop verification failed: process can still re-acquire root (UID 0) \ after switching to UID {}", - user.uid + target_uid )); } } @@ -1403,7 +1482,7 @@ mod tests { let current_user = User::from_uid(nix::unistd::geteuid()) .unwrap() .expect("current user entry"); - let restricted_group = Group::from_gid(nix::unistd::Gid::from_raw(0)) + let restricted_group = Group::from_gid(Gid::from_raw(0)) .unwrap() .expect("gid 0 group entry"); if restricted_group.gid == nix::unistd::getegid() { @@ -1538,4 +1617,54 @@ mod tests { Some(PathBuf::from("/run/spire")) ); } + + // ---- Numeric UID tests (Phase 2) ---- + + #[test] + fn drop_privileges_accepts_numeric_uid() { + // When running as non-root, a numeric UID/GID that matches the + // current process should succeed without any passwd lookup. + if nix::unistd::geteuid().is_root() { + return; + } + + let uid_raw = nix::unistd::geteuid().as_raw(); + let gid_raw = nix::unistd::getegid().as_raw(); + + let policy = policy_with_process(ProcessPolicy { + run_as_user: Some(uid_raw.to_string()), + run_as_group: Some(gid_raw.to_string()), + }); + + assert!( + drop_privileges(&policy).is_ok(), + "should accept current process UID/GID as numeric strings" + ); + } + + #[test] + fn drop_privileges_numeric_uid_skips_initgroups() { + // When running as non-root with a numeric user but group matches, + // initgroups should not be called (guard: target_uid != geteuid()). + if nix::unistd::geteuid().is_root() { + return; + } + + let current_uid = nix::unistd::geteuid().as_raw(); + + // Use a different group name that exists (the current one). + let current_group = Group::from_gid(nix::unistd::getegid()) + .expect("should resolve current group") + .expect("current group should exist"); + + let policy = policy_with_process(ProcessPolicy { + run_as_user: Some(current_uid.to_string()), // numeric UID, no passwd entry needed + run_as_group: Some(current_group.name), // name-based group + }); + + assert!( + drop_privileges(&policy).is_ok(), + "should accept numeric UID with name-based group (initgroups guarded)" + ); + } } diff --git a/crates/openshell-supervisor-process/src/ssh.rs b/crates/openshell-supervisor-process/src/ssh.rs index 465604407..e180471e5 100644 --- a/crates/openshell-supervisor-process/src/ssh.rs +++ b/crates/openshell-supervisor-process/src/ssh.rs @@ -661,12 +661,20 @@ impl Default for PtyRequest { /// Derive the session USER and HOME from the policy's `run_as_user`. /// -/// Falls back to `("sandbox", "/sandbox")` when the policy has no explicit user, -/// preserving backward compatibility with images that use the default layout. +/// For name-based identities, looks up the home directory via `/etc/passwd` +/// (or defaults to `/home/{user}`). +/// +/// For numeric UIDs, there is no passwd entry — falls back to +/// `("{uid}", "/sandbox")` so the agent session still has a meaningful +/// USER identifier. fn session_user_and_home(policy: &SandboxPolicy) -> (String, String) { match policy.process.run_as_user.as_deref() { Some(user) if !user.is_empty() => { - // Look up the user's home directory from /etc/passwd. + // Numeric UID — no passwd entry expected; use default HOME. + if user.parse::().is_ok() { + return (user.to_string(), "/sandbox".to_string()); + } + // Name-based identity — look up home from /etc/passwd. let home = nix::unistd::User::from_name(user) .ok() .flatten() @@ -1527,6 +1535,112 @@ mod tests { assert_eq!(rx_b.recv().unwrap(), b"still-alive"); } + // ----------------------------------------------------------------------- + // session_user_and_home tests (Phase 2: numeric UID support) + // ----------------------------------------------------------------------- + + #[test] + fn session_user_and_home_returns_numeric_uid_as_user() { + use openshell_core::policy::{ + FilesystemPolicy, LandlockPolicy, NetworkPolicy, ProcessPolicy, + }; + let policy = SandboxPolicy { + version: 1, + filesystem: FilesystemPolicy::default(), + network: NetworkPolicy::default(), + landlock: LandlockPolicy::default(), + process: ProcessPolicy { + run_as_user: Some("1000".into()), + run_as_group: None, + }, + }; + let (user, home) = session_user_and_home(&policy); + assert_eq!(user, "1000"); + // Numeric UID has no passwd entry — defaults to /sandbox. + assert_eq!(home, "/sandbox"); + } + + #[test] + fn session_user_and_home_returns_name_from_passwd() { + use openshell_core::policy::{ + FilesystemPolicy, LandlockPolicy, NetworkPolicy, ProcessPolicy, + }; + let policy = SandboxPolicy { + version: 1, + filesystem: FilesystemPolicy::default(), + network: NetworkPolicy::default(), + landlock: LandlockPolicy::default(), + process: ProcessPolicy { + run_as_user: Some("sandbox".into()), + run_as_group: None, + }, + }; + let (user, home) = session_user_and_home(&policy); + assert_eq!(user, "sandbox"); + // Name-based — should resolve via passwd (or /home/{user}). + assert!(!home.is_empty()); + } + + #[test] + fn session_user_and_home_defaults_to_sandbox_when_empty() { + use openshell_core::policy::{ + FilesystemPolicy, LandlockPolicy, NetworkPolicy, ProcessPolicy, + }; + let policy = SandboxPolicy { + version: 1, + filesystem: FilesystemPolicy::default(), + network: NetworkPolicy::default(), + landlock: LandlockPolicy::default(), + process: ProcessPolicy { + run_as_user: Some(String::new()), + run_as_group: None, + }, + }; + let (user, home) = session_user_and_home(&policy); + assert_eq!(user, "sandbox"); + assert_eq!(home, "/sandbox"); + } + + #[test] + fn session_user_and_home_defaults_to_sandbox_when_none() { + use openshell_core::policy::{ + FilesystemPolicy, LandlockPolicy, NetworkPolicy, ProcessPolicy, + }; + let policy = SandboxPolicy { + version: 1, + filesystem: FilesystemPolicy::default(), + network: NetworkPolicy::default(), + landlock: LandlockPolicy::default(), + process: ProcessPolicy { + run_as_user: None, + run_as_group: None, + }, + }; + let (user, home) = session_user_and_home(&policy); + assert_eq!(user, "sandbox"); + assert_eq!(home, "/sandbox"); + } + + #[test] + fn session_user_and_home_handles_large_numeric_uid() { + use openshell_core::policy::{ + FilesystemPolicy, LandlockPolicy, NetworkPolicy, ProcessPolicy, + }; + let policy = SandboxPolicy { + version: 1, + filesystem: FilesystemPolicy::default(), + network: NetworkPolicy::default(), + landlock: LandlockPolicy::default(), + process: ProcessPolicy { + run_as_user: Some("1000660000".into()), + run_as_group: None, + }, + }; + let (user, home) = session_user_and_home(&policy); + assert_eq!(user, "1000660000"); + assert_eq!(home, "/sandbox"); + } + /// `install_pre_exec_no_pty` runs drop_privileges and succeeds when the /// current user/group is already the configured one (no actual uid change). /// diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index ff4542136..cd110f68e 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -194,6 +194,12 @@ sa_token_ttl_secs = 3600 # shared roots such as /run, /var, /tmp, and /etc are rejected. # Supervisor-to-gateway auth still uses gateway JWTs. provider_spiffe_workload_api_socket_path = "/spiffe-workload-api/spire-agent.sock" +# Explicit sandbox UID/GID for the supervisor container securityContext and +# PVC init container. When unset, the driver auto-detects from OpenShift SCC +# namespace annotations (openshift.io/sa.scc.uid-range) if present, falling +# back to 1000 on non-OpenShift clusters. +# sandbox_uid = 1500 +# sandbox_gid = 1500 ``` ### Docker @@ -305,4 +311,7 @@ overlay_disk_mib = 4096 guest_tls_ca = "/var/lib/openshell/guest-tls/ca.pem" guest_tls_cert = "/var/lib/openshell/guest-tls/client.pem" guest_tls_key = "/var/lib/openshell/guest-tls/client-key.pem" +# Resolved sandbox UID/GID for the rootfs /etc/passwd entry. +# Defaults to 10001 when unset; matching GID is used if sandbox_gid is empty. +# sandbox_uid = 20001 ``` diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx index 95a319c37..8b49ca421 100644 --- a/docs/reference/sandbox-compute-drivers.mdx +++ b/docs/reference/sandbox-compute-drivers.mdx @@ -273,3 +273,29 @@ The Kubernetes driver creates namespaced `agents.x-k8s.io/v1alpha1` `Sandbox` re `Sandbox.spec.volumeClaimTemplates` is immutable after creation. To change storage configuration, delete the sandbox and create a new one with the updated spec. + +## Sandbox User Identity + +OpenShell accepts both the hardcoded username `"sandbox"` and numeric UIDs in `[1000, 2_000_000_000]` for the supervisor's process identity (the policy's `run_as_user` field). The driver resolves the UID at sandbox creation time and passes it to the supervisor via environment variables. + +### Kubernetes / OpenShift + +The Kubernetes driver auto-detects the sandbox UID from OpenShift SCC namespace annotations: + +- `openshift.io/sa.scc.uid-range` (format: `/`, e.g. `1000000000/10000`) provides the UID. +- `openshift.io/sa.scc.supplemental-groups` provides the GID when present; otherwise the resolved UID is used as the GID. +- On non-OpenShift clusters, or when annotations are absent, the driver falls back to `1000`. + +You can override autodetection with explicit `sandbox_uid` / `sandbox_gid` config in `[openshell.drivers.kubernetes]`. When set, the driver skips namespace annotation lookup entirely. + +The resolved UID/GID appear in: +- Supervisor container environment variables (`OPENSHELL_SANDBOX_UID`, `OPENSHELL_SANDBOX_GID`) for direct kernel-level privilege dropping without `/etc/passwd` lookups. +- PVC init container `securityContext.runAsUser/runAsGroup/fsGroup` for workspace ownership operations. + +### VM Driver + +The VM driver injects the sandbox UID into the rootfs guest's `/etc/passwd`, `/etc/group`, and `/etc/gshadow` during rootfs preparation. Default UID is `10001`; configure `sandbox_uid` in `[openshell.drivers.vm]` to use a different value. + +### Custom Images + +Custom sandbox images no longer need a baked-in `"sandbox"` user. If your image requires a passwd entry for tools like `sudo` or `ssh`, add one manually (e.g. `RUN useradd -m -u 1500 deploy`). The supervisor resolves the numeric UID directly via `setuid()` without needing `/etc/passwd`. diff --git a/examples/bring-your-own-container/Dockerfile b/examples/bring-your-own-container/Dockerfile index 61f283970..fc65bd695 100644 --- a/examples/bring-your-own-container/Dockerfile +++ b/examples/bring-your-own-container/Dockerfile @@ -14,15 +14,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl iproute2 nftables \ && rm -rf /var/lib/apt/lists/* -# Create the sandbox user for non-root execution. -# Use a high UID range to avoid conflicts with host users when running without -# user namespace remapping (UID in container = UID on host). -RUN groupadd -g 1000660000 sandbox && \ - useradd -m -u 1000660000 -g sandbox sandbox +# The sandbox user is injected at runtime by the compute driver. +# Kubernetes: resolved from OpenShift SCC namespace annotations or explicit +# sandbox_uid config. VM: resolves to 10001 by default, configurable in +# gateway TOML. +# +# Images no longer need a baked-in "sandbox" user — numeric UIDs are accepted +# and the driver passes them directly to setuid()/chown() at sandbox start. +# If your image requires a passwd entry for tools like ssh or sudo, add one +# manually (e.g. RUN useradd -m -u 1500 deploy). -RUN install -d -o sandbox -g sandbox /sandbox +RUN install -d /sandbox WORKDIR /sandbox -COPY --chown=sandbox:sandbox app.py . +COPY app.py . EXPOSE 8080