From 9506f162427e16dd2c262815f429aac2e2c71e9d Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Mon, 8 Jun 2026 18:22:20 +0200
Subject: [PATCH 1/2] feat(gpu)!: add resource requirements

BREAKING CHANGE: SandboxSpec.gpu and DriverSandboxSpec.gpu were replaced with
resource_requirements.gpu. This intentionally reuses protobuf field 9, changing
it from a bool to a message for both public and driver APIs.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 architecture/compute-runtimes.md              |  4 +-
 crates/openshell-cli/src/main.rs              | 37 +++++++-
 crates/openshell-cli/src/run.rs               | 51 ++++++----
 .../sandbox_create_lifecycle_integration.rs   | 93 +++++++++++++++----
 crates/openshell-core/src/gpu.rs              | 60 +++++++++---
 crates/openshell-driver-docker/README.md      |  2 +-
 crates/openshell-driver-docker/src/lib.rs     | 28 +++---
 crates/openshell-driver-docker/src/tests.rs   | 26 ++++--
 crates/openshell-driver-kubernetes/README.md  | 11 ++-
 .../openshell-driver-kubernetes/src/driver.rs | 88 +++++++++++++-----
 crates/openshell-driver-podman/README.md      |  2 +-
 .../openshell-driver-podman/src/container.rs  | 22 +++--
 crates/openshell-driver-podman/src/driver.rs  | 31 ++++++-
 crates/openshell-driver-vm/src/driver.rs      | 44 ++++++---
 crates/openshell-server/src/compute/mod.rs    | 62 +++++++++----
 crates/openshell-server/src/grpc/sandbox.rs   |  4 +-
 .../openshell-server/src/grpc/validation.rs   |  4 +-
 docs/reference/sandbox-compute-drivers.mdx    |  2 +-
 proto/compute_driver.proto                    | 13 ++-
 proto/openshell.proto                         | 13 ++-
 20 files changed, 444 insertions(+), 153 deletions(-)

diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md
index 092f2c049..24e0d78ff 100644
--- a/architecture/compute-runtimes.md
+++ b/architecture/compute-runtimes.md
@@ -98,7 +98,9 @@ users.
 Custom sandbox images must include the agent runtime and any system
 dependencies, but they should not need to include the gateway. GPU-capable
 images must include the user-space libraries required by the workload. The
-runtime still owns GPU device injection.
+runtime still owns GPU device injection. GPU requests are explicit, and can be
+refined with a driver-native device identifier; the gateway validates the
+request shape and each runtime enforces the GPU allocation modes it supports.
 
 ## Deployment Shape
 
diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs
index a21431e81..9d0044886 100644
--- a/crates/openshell-cli/src/main.rs
+++ b/crates/openshell-cli/src/main.rs
@@ -19,6 +19,7 @@ use openshell_bootstrap::{
 use openshell_cli::completers;
 use openshell_cli::run;
 use openshell_cli::tls::TlsOptions;
+use openshell_core::proto::GpuResourceRequirements;
 
 /// Resolved gateway context: name + gateway endpoint.
 struct GatewayContext {
@@ -2625,6 +2626,7 @@ async fn main() -> Result<()> {
                         .map(|s| openshell_core::forward::ForwardSpec::parse(&s))
                         .transpose()?;
                     let keep = keep || !no_keep || editor.is_some() || forward.is_some();
+                    let gpu_requirements = gpu.then_some(GpuResourceRequirements {});
 
                     let ctx = resolve_gateway(&cli.gateway, &cli.gateway_endpoint)?;
                     let endpoint = &ctx.endpoint;
@@ -2637,7 +2639,7 @@ async fn main() -> Result<()> {
                         &ctx.name,
                         &upload_specs,
                         keep,
-                        gpu,
+                        gpu_requirements,
                         cpu.as_deref(),
                         memory.as_deref(),
                         driver_config_json.as_deref(),
@@ -4495,6 +4497,39 @@ mod tests {
         }
     }
 
+    #[test]
+    fn sandbox_create_gpu_parses_driver_default() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu"])
+            .expect("sandbox create --gpu should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, .. }),
+                ..
+            }) => {
+                assert!(gpu);
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_driver_default_allows_trailing_command() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu", "--", "claude"])
+            .expect("sandbox create --gpu -- claude should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, command, .. }),
+                ..
+            }) => {
+                assert!(gpu);
+                assert_eq!(command, vec!["claude".to_string()]);
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
     #[test]
     fn service_expose_accepts_positional_target_port_and_service() {
         let cli = Cli::try_parse_from([
diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs
index 6f5520755..418b200ec 100644
--- a/crates/openshell-cli/src/run.rs
+++ b/crates/openshell-cli/src/run.rs
@@ -42,12 +42,13 @@ use openshell_core::proto::{
     GetClusterInferenceRequest, GetDraftHistoryRequest, GetDraftPolicyRequest,
     GetGatewayConfigRequest, GetProviderProfileRequest, GetProviderRefreshStatusRequest,
     GetProviderRequest, GetSandboxConfigRequest, GetSandboxLogsRequest,
-    GetSandboxPolicyStatusRequest, GetSandboxRequest, GetServiceRequest, HealthRequest,
-    ImportProviderProfilesRequest, LintProviderProfilesRequest, ListProviderProfilesRequest,
-    ListProvidersRequest, ListSandboxPoliciesRequest, ListSandboxProvidersRequest,
-    ListSandboxesRequest, ListServicesRequest, PlatformEvent, PolicySource, PolicyStatus, Provider,
-    ProviderCredentialRefreshStatus, ProviderCredentialRefreshStrategy, ProviderProfile,
-    ProviderProfileDiagnostic, ProviderProfileImportItem, RejectDraftChunkRequest,
+    GetSandboxPolicyStatusRequest, GetSandboxRequest, GetServiceRequest, GpuResourceRequirements,
+    HealthRequest, ImportProviderProfilesRequest, LintProviderProfilesRequest,
+    ListProviderProfilesRequest, ListProvidersRequest, ListSandboxPoliciesRequest,
+    ListSandboxProvidersRequest, ListSandboxesRequest, ListServicesRequest, PlatformEvent,
+    PolicySource, PolicyStatus, Provider, ProviderCredentialRefreshStatus,
+    ProviderCredentialRefreshStrategy, ProviderProfile, ProviderProfileDiagnostic,
+    ProviderProfileImportItem, RejectDraftChunkRequest, ResourceRequirements,
     RevokeSshSessionRequest, RotateProviderCredentialRequest, Sandbox, SandboxPhase, SandboxPolicy,
     SandboxSpec, SandboxTemplate, ServiceEndpointResponse, SetClusterInferenceRequest,
     SettingScope, SettingValue, TcpForwardFrame, TcpForwardInit, TcpRelayTarget,
@@ -123,7 +124,7 @@ fn ready_false_condition_message(
 
 fn provisioning_timeout_message(
     timeout_secs: u64,
-    requested_gpu: bool,
+    resource_requirements: Option<&ResourceRequirements>,
     condition_message: Option<&str>,
 ) -> String {
     let mut message = format!("sandbox provisioning timed out after {timeout_secs}s");
@@ -133,7 +134,7 @@ fn provisioning_timeout_message(
         message.push_str(condition_message);
     }
 
-    if requested_gpu {
+    if resource_requirements.is_some_and(|requirements| requirements.gpu.is_some()) {
         message.push_str(
             ". Hint: this may be because the available GPU is already in use by another sandbox.",
         );
@@ -1753,7 +1754,7 @@ pub async fn sandbox_create(
     gateway_name: &str,
     uploads: &[(String, Option<String>, bool)],
     keep: bool,
-    gpu: bool,
+    gpu_requirements: Option<GpuResourceRequirements>,
     cpu: Option<&str>,
     memory: Option<&str>,
     driver_config_json: Option<&str>,
@@ -1809,8 +1810,6 @@ pub async fn sandbox_create(
         }
         None => None,
     };
-    let requested_gpu = gpu;
-
     let providers_v2_enabled = gateway_providers_v2_enabled(&mut client).await?;
     let inferred_types: Vec<String> = if providers_v2_enabled {
         Vec::new()
@@ -1842,9 +1841,11 @@ pub async fn sandbox_create(
         None
     };
 
+    let resource_requirements = gpu_requirements.map(|gpu| ResourceRequirements { gpu: Some(gpu) });
+
     let request = CreateSandboxRequest {
         spec: Some(SandboxSpec {
-            gpu: requested_gpu,
+            resource_requirements,
             environment: environment.clone(),
             policy,
             providers: configured_providers,
@@ -1989,7 +1990,7 @@ pub async fn sandbox_create(
         if remaining.is_zero() {
             let timeout_message = provisioning_timeout_message(
                 provision_timeout.as_secs(),
-                requested_gpu,
+                resource_requirements.as_ref(),
                 last_condition_message.as_deref(),
             );
             if let Some(d) = display.as_mut() {
@@ -2008,7 +2009,7 @@ pub async fn sandbox_create(
                 // Timeout fired — the stream was idle for too long.
                 let timeout_message = provisioning_timeout_message(
                     provision_timeout.as_secs(),
-                    requested_gpu,
+                    resource_requirements.as_ref(),
                     last_condition_message.as_deref(),
                 );
                 if let Some(d) = display.as_mut() {
@@ -7686,9 +7687,10 @@ mod tests {
         PROGRESS_STEP_STARTING_SANDBOX,
     };
     use openshell_core::proto::{
-        Provider, ProviderCredentialRefresh, ProviderCredentialRefreshStatus,
-        ProviderCredentialRefreshStrategy, ProviderCredentialTokenGrant, ProviderProfile,
-        ProviderProfileCredential, SandboxCondition, SandboxStatus, datamodel::v1::ObjectMeta,
+        GpuResourceRequirements, Provider, ProviderCredentialRefresh,
+        ProviderCredentialRefreshStatus, ProviderCredentialRefreshStrategy,
+        ProviderCredentialTokenGrant, ProviderProfile, ProviderProfileCredential,
+        ResourceRequirements, SandboxCondition, SandboxStatus, datamodel::v1::ObjectMeta,
     };
 
     struct EnvVarGuard {
@@ -8392,9 +8394,12 @@ mod tests {
 
     #[test]
     fn provisioning_timeout_message_includes_condition_and_gpu_hint() {
+        let resource_requirements = ResourceRequirements {
+            gpu: Some(GpuResourceRequirements {}),
+        };
         let message = provisioning_timeout_message(
             120,
-            true,
+            Some(&resource_requirements),
             Some("DependenciesNotReady: Pod exists with phase: Pending; Service Exists"),
         );
 
@@ -8405,7 +8410,15 @@ mod tests {
 
     #[test]
     fn provisioning_timeout_message_omits_gpu_hint_for_non_gpu_requests() {
-        let message = provisioning_timeout_message(120, false, None);
+        let message = provisioning_timeout_message(120, None, None);
+
+        assert_eq!(message, "sandbox provisioning timed out after 120s");
+    }
+
+    #[test]
+    fn provisioning_timeout_message_omits_gpu_hint_without_gpu_requirements() {
+        let resource_requirements = ResourceRequirements { gpu: None };
+        let message = provisioning_timeout_message(120, Some(&resource_requirements), None);
 
         assert_eq!(message, "sandbox provisioning timed out after 120s");
     }
diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
index ef89cdf1e..3565c3b2c 100644
--- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
+++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
@@ -18,13 +18,14 @@ use openshell_core::proto::{
     ExecSandboxInput, ExecSandboxRequest, GatewayMessage, GetGatewayConfigRequest,
     GetGatewayConfigResponse, GetProviderRequest, GetSandboxConfigRequest,
     GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest,
-    GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse,
-    ListProvidersRequest, ListProvidersResponse, ListSandboxProvidersRequest,
-    ListSandboxProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, PlatformEvent,
-    ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, Sandbox, SandboxCondition,
-    SandboxLogLine, SandboxPhase, SandboxResponse, SandboxStatus, SandboxStreamEvent,
-    ServiceStatus, SettingValue, SupervisorMessage, UpdateProviderRequest, WatchSandboxRequest,
-    sandbox_stream_event, setting_value,
+    GetSandboxProviderEnvironmentResponse, GetSandboxRequest, GpuResourceRequirements,
+    HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse,
+    ListSandboxProvidersRequest, ListSandboxProvidersResponse, ListSandboxesRequest,
+    ListSandboxesResponse, PlatformEvent, ProviderResponse, RevokeSshSessionRequest,
+    RevokeSshSessionResponse, Sandbox, SandboxCondition, SandboxLogLine, SandboxPhase,
+    SandboxResponse, SandboxStatus, SandboxStreamEvent, ServiceStatus, SettingValue,
+    SupervisorMessage, UpdateProviderRequest, WatchSandboxRequest, sandbox_stream_event,
+    setting_value,
 };
 use std::collections::HashMap;
 use std::fs;
@@ -1087,6 +1088,10 @@ fn test_tls(server: &TestServer) -> TlsOptions {
     server.tls.with_gateway_name("openshell")
 }
 
+fn gpu_requirements() -> GpuResourceRequirements {
+    GpuResourceRequirements {}
+}
+
 #[tokio::test]
 async fn sandbox_create_keeps_command_sessions_by_default() {
     let server = run_server().await;
@@ -1103,7 +1108,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         None,
         None,
         None,
@@ -1146,7 +1151,7 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         Some("500m"),
         Some("2Gi"),
         None,
@@ -1223,7 +1228,7 @@ async fn sandbox_create_sends_driver_config_json() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         None,
         None,
         Some(r#"{"kubernetes":{"pod":{"priority_class_name":"batch-low"}}}"#),
@@ -1280,6 +1285,56 @@ async fn sandbox_create_sends_driver_config_json() {
     );
 }
 
+#[tokio::test]
+async fn sandbox_create_sends_gpu_default_request() {
+    let server = run_server().await;
+    let fake_ssh_dir = tempfile::tempdir().unwrap();
+    let xdg_dir = tempfile::tempdir().unwrap();
+    let _env = test_env(&fake_ssh_dir, &xdg_dir);
+    let tls = test_tls(&server);
+    install_fake_ssh(&fake_ssh_dir);
+
+    run::sandbox_create(
+        &server.endpoint,
+        Some("gpu-default"),
+        None,
+        "openshell",
+        &[],
+        true,
+        Some(gpu_requirements()),
+        None,
+        None,
+        None,
+        None,
+        &[],
+        None,
+        None,
+        &["echo".to_string(), "OK".to_string()],
+        Some(false),
+        Some(false),
+        &HashMap::new(),
+        &HashMap::new(),
+        "manual",
+        &tls,
+    )
+    .await
+    .expect("sandbox create should succeed");
+
+    let requests = create_requests(&server).await;
+    let gpu = requests[0]
+        .spec
+        .as_ref()
+        .and_then(|spec| spec.resource_requirements.as_ref())
+        .and_then(|requirements| requirements.gpu.as_ref())
+        .expect("GPU requirement should be sent");
+
+    assert!(requests[0]
+        .spec
+        .as_ref()
+        .and_then(|spec| spec.resource_requirements.as_ref())
+        .is_some_and(|requirements| requirements.gpu.is_some()));
+}
+
 #[tokio::test]
 async fn sandbox_create_does_not_infer_command_providers_when_v2_enabled() {
     let server = run_server().await;
@@ -1297,7 +1352,7 @@ async fn sandbox_create_does_not_infer_command_providers_when_v2_enabled() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         None,
         None,
         None,
@@ -1355,7 +1410,7 @@ async fn sandbox_create_returns_vm_error_without_waiting_for_timeout() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         None,
         None,
         None,
@@ -1409,7 +1464,7 @@ async fn sandbox_create_keeps_waiting_while_vm_progress_arrives() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         None,
         None,
         None,
@@ -1455,7 +1510,7 @@ async fn sandbox_create_times_out_when_only_logs_arrive() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         None,
         None,
         None,
@@ -1497,7 +1552,7 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() {
         "openshell",
         &[],
         false,
-        false,
+        None,
         None,
         None,
         None,
@@ -1543,7 +1598,7 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() {
         "openshell",
         &[],
         false,
-        false,
+        None,
         None,
         None,
         None,
@@ -1589,7 +1644,7 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         None,
         None,
         None,
@@ -1635,7 +1690,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
         "openshell",
         &[],
         false,
-        false,
+        None,
         None,
         None,
         None,
@@ -1787,7 +1842,7 @@ async fn sandbox_create_sends_environment_variables() {
         "openshell",
         &[],
         true,
-        false,
+        None,
         None,
         None,
         None,
diff --git a/crates/openshell-core/src/gpu.rs b/crates/openshell-core/src/gpu.rs
index 3e5b38d7a..fe6744a28 100644
--- a/crates/openshell-core/src/gpu.rs
+++ b/crates/openshell-core/src/gpu.rs
@@ -1,12 +1,39 @@
 // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-//! Shared GPU request helpers.
+//! Shared GPU resource requirement helpers.
 
 use std::fmt;
 use std::sync::atomic::{AtomicUsize, Ordering};
 
 use crate::config::CDI_GPU_DEVICE_ALL;
+use crate::proto::ResourceRequirements as SandboxResourceRequirements;
+use crate::proto::compute::v1::{
+    GpuResourceRequirements as DriverGpuResourceRequirements,
+    ResourceRequirements as DriverResourceRequirements,
+};
+
+/// Return whether sandbox resource requirements request a GPU.
+#[must_use]
+pub fn sandbox_gpu_requested(resources: Option<&SandboxResourceRequirements>) -> bool {
+    resources
+        .and_then(|resources| resources.gpu.as_ref())
+        .is_some()
+}
+
+/// Return whether compute-driver resource requirements request a GPU.
+#[must_use]
+pub fn driver_gpu_requested(resources: Option<&DriverResourceRequirements>) -> bool {
+    driver_gpu_requirements(resources).is_some()
+}
+
+/// Return the requested compute-driver GPU requirements, if present.
+#[must_use]
+pub fn driver_gpu_requirements(
+    resources: Option<&DriverResourceRequirements>,
+) -> Option<&DriverGpuResourceRequirements> {
+    resources.and_then(|resources| resources.gpu.as_ref())
+}
 
 const CDI_NVIDIA_GPU_PREFIX: &str = "nvidia.com/gpu=";
 const CDI_NVIDIA_GPU_ALL_SUFFIX: &str = "all";
@@ -158,17 +185,17 @@ impl fmt::Display for CdiGpuSelectionError {
 
 impl std::error::Error for CdiGpuSelectionError {}
 
-/// Resolve a local runtime GPU request into CDI device identifiers.
+/// Resolve a compute-driver GPU request into CDI device identifiers.
 ///
 /// `None` means no GPU was requested. Explicit driver-configured CDI devices
 /// pass through unchanged. A default GPU request uses the driver-selected
 /// default CDI ID.
 pub fn cdi_gpu_device_ids(
-    gpu: bool,
+    gpu: Option<&DriverGpuResourceRequirements>,
     cdi_devices: &[String],
     selected_default_device: Option<&str>,
 ) -> Result<Option<Vec<String>>, CdiGpuSelectionError> {
-    if !gpu {
+    if gpu.is_none() {
         return Ok(None);
     }
     if !cdi_devices.is_empty() {
@@ -180,8 +207,11 @@ pub fn cdi_gpu_device_ids(
 
 /// Resolve a GPU request with the legacy all-GPU default.
 #[must_use]
-pub fn cdi_gpu_device_ids_or_all(gpu: bool, cdi_devices: &[String]) -> Option<Vec<String>> {
-    gpu.then(|| {
+pub fn cdi_gpu_device_ids_or_all(
+    gpu: Option<&DriverGpuResourceRequirements>,
+    cdi_devices: &[String],
+) -> Option<Vec<String>> {
+    gpu.map(|_| {
         if cdi_devices.is_empty() {
             vec![CDI_GPU_DEVICE_ALL.to_string()]
         } else {
@@ -200,30 +230,36 @@ mod tests {
 
     #[test]
     fn cdi_gpu_device_ids_returns_none_when_absent() {
-        assert_eq!(cdi_gpu_device_ids(false, &[], None), Ok(None));
+        assert_eq!(cdi_gpu_device_ids(None, &[], None), Ok(None));
     }
 
     #[test]
     fn cdi_gpu_device_ids_uses_selected_default_device() {
+        let gpu = DriverGpuResourceRequirements {};
+
         assert_eq!(
-            cdi_gpu_device_ids(true, &[], Some("nvidia.com/gpu=0")),
+            cdi_gpu_device_ids(Some(&gpu), &[], Some("nvidia.com/gpu=0")),
             Ok(Some(vec!["nvidia.com/gpu=0".to_string()]))
         );
     }
 
     #[test]
     fn cdi_gpu_device_ids_rejects_missing_default_device() {
+        let gpu = DriverGpuResourceRequirements {};
+
         assert_eq!(
-            cdi_gpu_device_ids(true, &[], None),
+            cdi_gpu_device_ids(Some(&gpu), &[], None),
             Err(CdiGpuSelectionError::MissingDefaultDevice)
         );
     }
 
     #[test]
     fn cdi_gpu_device_ids_passes_explicit_device_ids_through() {
+        let gpu = DriverGpuResourceRequirements {};
+
         assert_eq!(
             cdi_gpu_device_ids(
-                true,
+                Some(&gpu),
                 &[
                     "nvidia.com/gpu=0".to_string(),
                     "nvidia.com/gpu=1".to_string()
@@ -239,8 +275,10 @@ mod tests {
 
     #[test]
     fn cdi_gpu_device_ids_or_all_uses_all_when_no_devices_are_configured() {
+        let gpu = DriverGpuResourceRequirements {};
+
         assert_eq!(
-            cdi_gpu_device_ids_or_all(true, &[]),
+            cdi_gpu_device_ids_or_all(Some(&gpu), &[]),
             Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
         );
     }
diff --git a/crates/openshell-driver-docker/README.md b/crates/openshell-driver-docker/README.md
index 71159fe66..468d6d8cd 100644
--- a/crates/openshell-driver-docker/README.md
+++ b/crates/openshell-driver-docker/README.md
@@ -32,7 +32,7 @@ contract:
 | `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. |
 | `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. |
 | `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. |
-| CDI GPU request | Uses `driver_config.cdi_devices` when set; otherwise selects one concrete NVIDIA CDI GPU when the sandbox spec asks for GPU support and daemon CDI support is detected. Docker daemon `/info` can permit `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback. |
+| CDI GPU request | Uses `driver_config.cdi_devices` when set; otherwise selects one concrete NVIDIA CDI GPU when the sandbox resource requirements ask for GPU support and daemon CDI support is detected. Docker daemon `/info` can permit `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback. Counted GPU requests are rejected. |
 
 The agent child process does not retain these supervisor privileges.
 
diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs
index 963e7a0f7..ae3da7798 100644
--- a/crates/openshell-driver-docker/src/lib.rs
+++ b/crates/openshell-driver-docker/src/lib.rs
@@ -29,6 +29,7 @@ use openshell_core::driver_utils::{
 };
 use openshell_core::gpu::{
     CdiGpuInventory, CdiGpuRoundRobin, CdiGpuSelectionError, cdi_gpu_device_ids,
+    driver_gpu_requirements,
 };
 use openshell_core::progress::{
     PROGRESS_STEP_PULLING_IMAGE, PROGRESS_STEP_REQUESTING_SANDBOX, PROGRESS_STEP_STARTING_SANDBOX,
@@ -38,11 +39,11 @@ use openshell_core::proto::compute::v1::{
     CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse,
     DriverCondition, DriverPlatformEvent, DriverSandbox, DriverSandboxStatus,
     DriverSandboxTemplate, GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest,
-    GetSandboxResponse, ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest,
-    StopSandboxResponse, ValidateSandboxCreateRequest, ValidateSandboxCreateResponse,
-    WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesPlatformEvent,
-    WatchSandboxesRequest, WatchSandboxesSandboxEvent, compute_driver_server::ComputeDriver,
-    watch_sandboxes_event,
+    GetSandboxResponse, GpuResourceRequirements, ListSandboxesRequest, ListSandboxesResponse,
+    StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateRequest,
+    ValidateSandboxCreateResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent,
+    WatchSandboxesPlatformEvent, WatchSandboxesRequest, WatchSandboxesSandboxEvent,
+    compute_driver_server::ComputeDriver, watch_sandboxes_event,
 };
 use openshell_core::proto_struct::{
     deserialize_optional_non_empty_string_list, struct_to_json_value,
@@ -471,7 +472,8 @@ impl DockerComputeDriver {
 
         let driver_config =
             DockerSandboxDriverConfig::from_template(template).map_err(Status::invalid_argument)?;
-        Self::validate_gpu_request(spec.gpu, config.supports_gpu, &driver_config)?;
+        let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
+        Self::validate_gpu_request(gpu_requirements, config.supports_gpu, &driver_config)?;
         Ok(())
     }
 
@@ -519,17 +521,17 @@ impl DockerComputeDriver {
     }
 
     fn validate_gpu_request(
-        gpu: bool,
+        gpu_requirements: Option<&GpuResourceRequirements>,
         supports_gpu: bool,
         driver_config: &DockerSandboxDriverConfig,
     ) -> Result<(), Status> {
-        if !gpu && driver_config.cdi_devices.is_some() {
+        if gpu_requirements.is_none() && driver_config.cdi_devices.is_some() {
             return Err(Status::invalid_argument(
                 "driver_config.cdi_devices requires gpu=true",
             ));
         }
 
-        if gpu && !supports_gpu {
+        if gpu_requirements.is_some() && !supports_gpu {
             return Err(Status::failed_precondition(
                 "docker GPU sandboxes require Docker CDI support. Enable CDI on the Docker daemon, then restart the OpenShell gateway/server so GPU capability is detected.",
             ));
@@ -592,7 +594,8 @@ impl DockerComputeDriver {
         };
         let driver_config =
             DockerSandboxDriverConfig::from_sandbox(sandbox).map_err(Status::invalid_argument)?;
-        if !spec.gpu || driver_config.cdi_devices.is_some() {
+        let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
+        if gpu_requirements.is_none() || driver_config.cdi_devices.is_some() {
             return Ok(None);
         }
 
@@ -2219,13 +2222,14 @@ fn build_device_requests(
         .map_err(Status::invalid_argument)?
         .cdi_devices
         .unwrap_or_default();
-    if !spec.gpu && !cdi_devices.is_empty() {
+    let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
+    if gpu_requirements.is_none() && !cdi_devices.is_empty() {
         return Err(Status::invalid_argument(
             "driver_config.cdi_devices requires gpu=true",
         ));
     }
 
-    cdi_gpu_device_ids(spec.gpu, &cdi_devices, selected_default_device)
+    cdi_gpu_device_ids(gpu_requirements, &cdi_devices, selected_default_device)
         .map(|device_ids| {
             device_ids.map(|device_ids| {
                 vec![DeviceRequest {
diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs
index d5132fe1a..a48b82f74 100644
--- a/crates/openshell-driver-docker/src/tests.rs
+++ b/crates/openshell-driver-docker/src/tests.rs
@@ -13,7 +13,8 @@ use openshell_core::progress::{
     PROGRESS_STEP_STARTING_SANDBOX,
 };
 use openshell_core::proto::compute::v1::{
-    DriverResourceRequirements, DriverSandboxSpec, DriverSandboxTemplate,
+    DriverResourceRequirements, DriverSandboxSpec, DriverSandboxTemplate, GpuResourceRequirements,
+    ResourceRequirements,
 };
 use std::fs;
 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
@@ -41,7 +42,7 @@ fn test_sandbox() -> DriverSandbox {
                 environment: HashMap::from([("TEMPLATE_ENV".to_string(), "template".to_string())]),
                 ..Default::default()
             }),
-            gpu: false,
+            resource_requirements: None,
             sandbox_token: String::new(),
         }),
         status: None,
@@ -79,6 +80,12 @@ fn list_string_driver_config(field: &str, values: &[&str]) -> prost_types::Struc
     }
 }
 
+fn gpu_resources() -> ResourceRequirements {
+    ResourceRequirements {
+        gpu: Some(GpuResourceRequirements {}),
+    }
+}
+
 fn runtime_config() -> DockerDriverRuntimeConfig {
     DockerDriverRuntimeConfig {
         default_image: "image:latest".to_string(),
@@ -1045,7 +1052,7 @@ fn build_container_create_body_clears_inherited_cmd() {
 fn validate_sandbox_rejects_gpu_when_cdi_unavailable() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
-    sandbox.spec.as_mut().unwrap().gpu = true;
+    sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources());
 
     let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
 
@@ -1058,7 +1065,7 @@ fn validate_sandbox_rejects_invalid_cdi_devices_before_gpu_capability() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.gpu = true;
+    spec.resource_requirements = Some(gpu_resources());
     spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&[]));
 
     let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
@@ -1073,7 +1080,7 @@ fn validate_sandbox_rejects_unknown_driver_config_fields() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.gpu = true;
+    spec.resource_requirements = Some(gpu_resources());
     spec.template.as_mut().unwrap().driver_config =
         Some(cdi_device_typo_config(&["nvidia.com/gpu=0"]));
 
@@ -1083,12 +1090,11 @@ fn validate_sandbox_rejects_unknown_driver_config_fields() {
     assert!(err.message().contains("unknown field"));
 }
 
-#[test]
 fn validate_sandbox_rejects_template_errors_before_device_config() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.gpu = true;
+    spec.resource_requirements = Some(gpu_resources());
     let template = spec.template.as_mut().unwrap();
     template.agent_socket_path = "/tmp/agent.sock".to_string();
     template.driver_config = Some(cdi_devices_config(&[]));
@@ -1126,7 +1132,7 @@ fn build_container_create_body_maps_default_gpu_to_selected_cdi_device() {
     let mut config = runtime_config();
     config.supports_gpu = true;
     let mut sandbox = test_sandbox();
-    sandbox.spec.as_mut().unwrap().gpu = true;
+    sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources());
 
     let create_body =
         build_container_create_body_with_default(&sandbox, &config, Some("nvidia.com/gpu=1"))
@@ -1168,7 +1174,7 @@ fn build_container_create_body_passes_explicit_cdi_device_id_through() {
     config.supports_gpu = true;
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.gpu = true;
+    spec.resource_requirements = Some(gpu_resources());
     spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&["nvidia.com/gpu=0"]));
 
     let create_body = build_container_create_body(&sandbox, &config).unwrap();
@@ -1207,7 +1213,7 @@ fn build_container_create_body_rejects_cdi_devices_without_gpu() {
 fn build_container_create_body_rejects_empty_cdi_devices() {
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.gpu = true;
+    spec.resource_requirements = Some(gpu_resources());
     spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&[]));
 
     let err = build_container_create_body(&sandbox, &runtime_config()).unwrap_err();
diff --git a/crates/openshell-driver-kubernetes/README.md b/crates/openshell-driver-kubernetes/README.md
index 6ad0b27c8..3cdb9fa57 100644
--- a/crates/openshell-driver-kubernetes/README.md
+++ b/crates/openshell-driver-kubernetes/README.md
@@ -62,9 +62,9 @@ the supervisor's network namespace mount setup on AppArmor-enabled nodes.
 ## GPU Support
 
 When a sandbox requests GPU support, the driver checks node allocatable capacity
-for `nvidia.com/gpu` and requests one GPU resource in the workload spec. The
-sandbox image must provide the user-space libraries needed by the agent
-workload.
+for `nvidia.com/gpu` and requests the configured GPU count in the workload spec.
+When no count is set, the driver requests one GPU resource. The sandbox image
+must provide the user-space libraries needed by the agent workload.
 
 ## Driver Config POC
 
@@ -97,5 +97,6 @@ POC parser renders the keys listed above and rejects unknown fields.
 `pod.runtime_class_name` maps to PodSpec `runtimeClassName` and overrides the
 driver's configured `default_runtime_class_name`; the typed public
 `SandboxTemplate.runtime_class_name` still takes precedence when set. Use the
-public `gpu` flag for the default GPU request and `driver_config` only for
-additional driver-owned resource details.
+public `--gpu` flag for the default GPU request, pass a count to `--gpu` for
+counted GPU requests, and use `driver_config` only for additional driver-owned
+resource details.
diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs
index dc636efc3..d634cea9a 100644
--- a/crates/openshell-driver-kubernetes/src/driver.rs
+++ b/crates/openshell-driver-kubernetes/src/driver.rs
@@ -17,6 +17,7 @@ use kube::{Client, Error as KubeError};
 use openshell_core::driver_utils::{
     LABEL_MANAGED_BY, LABEL_MANAGED_BY_VALUE, LABEL_SANDBOX_ID, SUPERVISOR_IMAGE_BINARY_PATH,
 };
+use openshell_core::gpu::driver_gpu_requirements;
 use openshell_core::progress::{
     PROGRESS_STEP_PULLING_IMAGE, PROGRESS_STEP_REQUESTING_SANDBOX, PROGRESS_STEP_STARTING_SANDBOX,
     format_bytes, mark_progress_active, mark_progress_complete, mark_progress_detail,
@@ -25,8 +26,9 @@ use openshell_core::proto::compute::v1::{
     DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent,
     DriverSandbox as Sandbox, DriverSandboxSpec as SandboxSpec,
     DriverSandboxStatus as SandboxStatus, DriverSandboxTemplate as SandboxTemplate,
-    GetCapabilitiesResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent,
-    WatchSandboxesPlatformEvent, WatchSandboxesSandboxEvent, watch_sandboxes_event,
+    GetCapabilitiesResponse, GpuResourceRequirements, WatchSandboxesDeletedEvent,
+    WatchSandboxesEvent, WatchSandboxesPlatformEvent, WatchSandboxesSandboxEvent,
+    watch_sandboxes_event,
 };
 use openshell_core::proto_struct::{struct_to_json_object, value_to_json};
 use serde::Deserialize;
@@ -281,8 +283,12 @@ impl KubernetesComputeDriver {
     pub async fn validate_sandbox_create(&self, sandbox: &Sandbox) -> Result<(), tonic::Status> {
         let _ = KubernetesSandboxDriverConfig::from_sandbox(sandbox)
             .map_err(tonic::Status::invalid_argument)?;
-        let gpu_requested = sandbox.spec.as_ref().is_some_and(|spec| spec.gpu);
-        if gpu_requested
+        let gpu_requirements = sandbox
+            .spec
+            .as_ref()
+            .and_then(|spec| driver_gpu_requirements(spec.resource_requirements.as_ref()));
+        validate_gpu_request(gpu_requirements)?;
+        if gpu_requirements.is_some()
             && !self.has_gpu_capacity().await.map_err(|err| {
                 tonic::Status::internal(format!("check GPU node capacity failed: {err}"))
             })?
@@ -376,6 +382,13 @@ impl KubernetesComputeDriver {
     pub async fn create_sandbox(&self, sandbox: &Sandbox) -> Result<(), KubernetesDriverError> {
         let _ = KubernetesSandboxDriverConfig::from_sandbox(sandbox)
             .map_err(KubernetesDriverError::InvalidArgument)?;
+        let gpu_requirements = sandbox
+            .spec
+            .as_ref()
+            .and_then(|spec| driver_gpu_requirements(spec.resource_requirements.as_ref()));
+        validate_gpu_request(gpu_requirements).map_err(|status| {
+            KubernetesDriverError::InvalidArgument(status.message().to_string())
+        })?;
         let name = sandbox.name.as_str();
         info!(
             sandbox_id = %sandbox.id,
@@ -638,6 +651,12 @@ impl KubernetesComputeDriver {
     }
 }
 
+fn validate_gpu_request(
+    _gpu_requirements: Option<&GpuResourceRequirements>,
+) -> Result<(), tonic::Status> {
+    Ok(())
+}
+
 fn sandbox_labels(sandbox: &Sandbox) -> BTreeMap<String, String> {
     let mut labels = BTreeMap::new();
     labels.insert(LABEL_SANDBOX_ID.to_string(), sandbox.id.clone());
@@ -1201,7 +1220,13 @@ fn sandbox_to_k8s_spec(
         if let Some(template) = spec.template.as_ref() {
             root.insert(
                 "podTemplate".to_string(),
-                sandbox_template_to_k8s(template, spec.gpu, &pod_env, inject_workspace, params),
+                sandbox_template_to_k8s_with_gpu_requirements(
+                    template,
+                    driver_gpu_requirements(spec.resource_requirements.as_ref()),
+                    &pod_env,
+                    inject_workspace,
+                    params,
+                ),
             );
             if !template.agent_socket_path.is_empty() {
                 root.insert(
@@ -1231,9 +1256,9 @@ fn sandbox_to_k8s_spec(
         let pod_env = spec_pod_env(spec);
         root.insert(
             "podTemplate".to_string(),
-            sandbox_template_to_k8s(
+            sandbox_template_to_k8s_with_gpu_requirements(
                 &SandboxTemplate::default(),
-                spec.is_some_and(|s| s.gpu),
+                driver_gpu_requirements(spec.and_then(|s| s.resource_requirements.as_ref())),
                 &pod_env,
                 inject_workspace,
                 params,
@@ -1246,12 +1271,30 @@ fn sandbox_to_k8s_spec(
     )
 }
 
+#[cfg(test)]
 fn sandbox_template_to_k8s(
     template: &SandboxTemplate,
     gpu: bool,
     spec_environment: &std::collections::HashMap<String, String>,
     inject_workspace: bool,
     params: &SandboxPodParams<'_>,
+) -> serde_json::Value {
+    let gpu_requirements = gpu.then_some(GpuResourceRequirements {});
+    sandbox_template_to_k8s_with_gpu_requirements(
+        template,
+        gpu_requirements.as_ref(),
+        spec_environment,
+        inject_workspace,
+        params,
+    )
+}
+
+fn sandbox_template_to_k8s_with_gpu_requirements(
+    template: &SandboxTemplate,
+    gpu_requirements: Option<&GpuResourceRequirements>,
+    spec_environment: &std::collections::HashMap<String, String>,
+    inject_workspace: bool,
+    params: &SandboxPodParams<'_>,
 ) -> serde_json::Value {
     let driver_config = kubernetes_driver_config(template);
 
@@ -1331,7 +1374,7 @@ fn sandbox_template_to_k8s(
 
     if use_user_namespaces {
         spec.insert("hostUsers".to_string(), serde_json::json!(false));
-        if gpu {
+        if gpu_requirements.is_some() {
             warn!(
                 "GPU sandbox with user namespaces enabled — \
                  NVIDIA device plugin compatibility is unverified"
@@ -1440,7 +1483,7 @@ fn sandbox_template_to_k8s(
         serde_json::Value::Array(volume_mounts),
     );
 
-    if let Some(resources) = container_resources(template, gpu) {
+    if let Some(resources) = container_resources(template, gpu_requirements) {
         container.insert("resources".to_string(), resources);
     }
     apply_agent_driver_resources(&mut container, &driver_config.containers.agent.resources);
@@ -1618,7 +1661,10 @@ fn app_armor_profile_to_k8s(profile: &AppArmorProfile) -> serde_json::Value {
     value
 }
 
-fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_json::Value> {
+fn container_resources(
+    template: &SandboxTemplate,
+    gpu_requirements: Option<&GpuResourceRequirements>,
+) -> Option<serde_json::Value> {
     // Start from the raw resources passthrough in platform_config (preserves
     // custom resource types like GPU limits that users set via the public API
     // Struct), then overlay the typed DriverResourceRequirements on top.
@@ -1651,8 +1697,8 @@ fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_js
         apply("requests", "memory", memory_request);
     }
 
-    if gpu {
-        apply_gpu_limit(&mut resources);
+    if gpu_requirements.is_some() {
+        apply_gpu_limit(&mut resources, GPU_RESOURCE_QUANTITY);
     }
     if resources.as_object().is_some_and(serde_json::Map::is_empty) {
         None
@@ -1661,10 +1707,10 @@ fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_js
     }
 }
 
-fn apply_gpu_limit(resources: &mut serde_json::Value) {
+fn apply_gpu_limit(resources: &mut serde_json::Value, quantity: &str) {
     let Some(resources_obj) = resources.as_object_mut() else {
         *resources = serde_json::json!({});
-        return apply_gpu_limit(resources);
+        return apply_gpu_limit(resources, quantity);
     };
 
     let limits = resources_obj
@@ -1672,13 +1718,10 @@ fn apply_gpu_limit(resources: &mut serde_json::Value) {
         .or_insert_with(|| serde_json::json!({}));
     let Some(limits_obj) = limits.as_object_mut() else {
         *limits = serde_json::json!({});
-        return apply_gpu_limit(resources);
+        return apply_gpu_limit(resources, quantity);
     };
 
-    limits_obj.insert(
-        GPU_RESOURCE_NAME.to_string(),
-        serde_json::json!(GPU_RESOURCE_QUANTITY),
-    );
+    limits_obj.insert(GPU_RESOURCE_NAME.to_string(), serde_json::json!(quantity));
 }
 
 #[allow(clippy::too_many_arguments)]
@@ -1927,6 +1970,7 @@ mod tests {
         PROGRESS_ACTIVE_DETAIL_KEY, PROGRESS_ACTIVE_STEP_KEY, PROGRESS_COMPLETE_LABEL_KEY,
         PROGRESS_COMPLETE_STEP_KEY,
     };
+    use openshell_core::proto::compute::v1::GpuResourceRequirements;
     use prost_types::{Struct, Value, value::Kind};
 
     static ENV_LOCK: std::sync::LazyLock<std::sync::Mutex<()>> =
@@ -2000,10 +2044,9 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                gpu: true,
                 template: Some(SandboxTemplate {
                     driver_config: Some(json_struct(serde_json::json!({
-                        "cdi_devices": ["nvidia.com/gpu=0"]
+                        "gpu_device_ids": ["0000:2d:00.0"]
                     }))),
                     ..Default::default()
                 }),
@@ -2014,9 +2057,9 @@ mod tests {
 
         let err = KubernetesSandboxDriverConfig::from_sandbox(&sandbox).unwrap_err();
         assert!(err.contains("unknown field"));
+        assert!(err.contains("gpu_device_ids"));
     }
 
-    #[test]
     fn kube_pulling_event_adds_image_progress_metadata() {
         let mut metadata = std::collections::HashMap::new();
 
@@ -2345,7 +2388,6 @@ mod tests {
         );
     }
 
-    #[test]
     fn gpu_sandbox_uses_template_runtime_class_name_when_set() {
         let template = SandboxTemplate {
             platform_config: Some(Struct {
diff --git a/crates/openshell-driver-podman/README.md b/crates/openshell-driver-podman/README.md
index 6342c760e..1291dbc29 100644
--- a/crates/openshell-driver-podman/README.md
+++ b/crates/openshell-driver-podman/README.md
@@ -46,7 +46,7 @@ The container spec in `container.rs` sets these security-critical fields:
 | `no_new_privileges` | `true` | Prevents privilege escalation after exec. |
 | `seccomp_profile_path` | `unconfined` | The supervisor installs its own policy-aware BPF filter. A container-level profile can block Landlock/seccomp syscalls during setup. |
 | `mounts` | Private tmpfs at `/run/netns` | Lets the supervisor create named network namespaces in rootless Podman. |
-| CDI GPU devices | `driver_config.cdi_devices` when set, otherwise one concrete NVIDIA CDI GPU. Local `/dev/dxg` permits `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback. | Exposes requested GPUs to GPU-enabled sandbox containers. |
+| CDI GPU devices | `driver_config.cdi_devices` when set, otherwise one concrete NVIDIA CDI GPU. Local `/dev/dxg` permits `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback. | Exposes requested GPUs to GPU-enabled sandbox containers. Counted GPU requests are rejected. |
 
 The restricted agent child does not retain these supervisor privileges.
 
diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs
index e1a511819..348ffe265 100644
--- a/crates/openshell-driver-podman/src/container.rs
+++ b/crates/openshell-driver-podman/src/container.rs
@@ -5,7 +5,7 @@
 
 use crate::config::PodmanComputeConfig;
 use openshell_core::ComputeDriverError;
-use openshell_core::gpu::cdi_gpu_device_ids;
+use openshell_core::gpu::{cdi_gpu_device_ids, driver_gpu_requirements};
 use openshell_core::proto::compute::v1::{DriverSandbox, DriverSandboxTemplate};
 use openshell_core::proto_struct::deserialize_optional_non_empty_string_list;
 use openshell_core::{driver_mounts, proto_struct};
@@ -487,13 +487,14 @@ fn build_devices(
     let cdi_devices = PodmanSandboxDriverConfig::from_sandbox(sandbox)?
         .cdi_devices
         .unwrap_or_default();
-    if !spec.gpu && !cdi_devices.is_empty() {
+    let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
+    if gpu_requirements.is_none() && !cdi_devices.is_empty() {
         return Err(ComputeDriverError::InvalidArgument(
             "driver_config.cdi_devices requires gpu=true".to_string(),
         ));
     }
 
-    cdi_gpu_device_ids(spec.gpu, &cdi_devices, selected_default_device)
+    cdi_gpu_device_ids(gpu_requirements, &cdi_devices, selected_default_device)
         .map(|device_ids| {
             device_ids.map(|device_ids| {
                 device_ids
@@ -1107,6 +1108,7 @@ fn parse_memory_to_bytes(quantity: &str) -> Option<u64> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use openshell_core::proto::compute::v1::{GpuResourceRequirements, ResourceRequirements};
 
     static ENV_LOCK: std::sync::LazyLock<std::sync::Mutex<()>> =
         std::sync::LazyLock::new(|| std::sync::Mutex::new(()));
@@ -1148,6 +1150,12 @@ mod tests {
         }
     }
 
+    fn gpu_resources() -> ResourceRequirements {
+        ResourceRequirements {
+            gpu: Some(GpuResourceRequirements {}),
+        }
+    }
+
     #[test]
     fn parse_cpu_millicore() {
         assert_eq!(parse_cpu_to_microseconds("500m"), Some(50_000));
@@ -1260,7 +1268,7 @@ mod tests {
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            gpu: true,
+            resource_requirements: Some(gpu_resources()),
             ..Default::default()
         });
         let config = test_config();
@@ -1302,7 +1310,7 @@ mod tests {
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            gpu: true,
+            resource_requirements: Some(gpu_resources()),
             template: Some(DriverSandboxTemplate {
                 driver_config: Some(cdi_devices_config(&["nvidia.com/gpu=0"])),
                 ..Default::default()
@@ -1343,7 +1351,7 @@ mod tests {
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            gpu: true,
+            resource_requirements: Some(gpu_resources()),
             template: Some(DriverSandboxTemplate {
                 driver_config: Some(cdi_devices_config(&[])),
                 ..Default::default()
@@ -1363,7 +1371,7 @@ mod tests {
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            gpu: true,
+            resource_requirements: Some(gpu_resources()),
             template: Some(DriverSandboxTemplate {
                 driver_config: Some(cdi_device_typo_config(&["nvidia.com/gpu=0"])),
                 ..Default::default()
diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs
index f6d266ee4..927c02c28 100644
--- a/crates/openshell-driver-podman/src/driver.rs
+++ b/crates/openshell-driver-podman/src/driver.rs
@@ -12,8 +12,12 @@ use crate::watcher::{
 use openshell_core::ComputeDriverError;
 use openshell_core::config::CDI_GPU_DEVICE_ALL;
 use openshell_core::driver_utils::supervisor_image_should_refresh;
-use openshell_core::gpu::{CdiGpuInventory, CdiGpuRoundRobin, CdiGpuSelectionError};
-use openshell_core::proto::compute::v1::{DriverSandbox, GetCapabilitiesResponse};
+use openshell_core::gpu::{
+    CdiGpuInventory, CdiGpuRoundRobin, CdiGpuSelectionError, driver_gpu_requirements,
+};
+use openshell_core::proto::compute::v1::{
+    DriverSandbox, GetCapabilitiesResponse, GpuResourceRequirements,
+};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::Duration;
@@ -338,18 +342,34 @@ impl PodmanComputeDriver {
         &self,
         sandbox: &DriverSandbox,
     ) -> Result<(), ComputeDriverError> {
-        let gpu_requested = sandbox.spec.as_ref().is_some_and(|s| s.gpu);
+        let gpu_requirements = sandbox
+            .spec
+            .as_ref()
+            .and_then(|spec| spec.resource_requirements.as_ref())
+            .and_then(|requirements| driver_gpu_requirements(Some(requirements)));
         let driver_config = PodmanSandboxDriverConfig::from_sandbox(sandbox)?;
-        if !gpu_requested && driver_config.cdi_devices.is_some() {
+        if gpu_requirements.is_none() && driver_config.cdi_devices.is_some() {
             return Err(ComputeDriverError::InvalidArgument(
                 "driver_config.cdi_devices requires gpu=true".to_string(),
             ));
         }
+        Self::validate_gpu_request(gpu_requirements)?;
         self.validate_user_volume_mounts_available(sandbox).await?;
         let _ = self.peek_default_gpu_device(sandbox)?;
         Ok(())
     }
 
+    fn validate_gpu_request(
+        gpu_requirements: Option<&GpuResourceRequirements>,
+    ) -> Result<(), ComputeDriverError> {
+        if gpu_requirements.is_some() && !Self::has_gpu_capacity() {
+            return Err(ComputeDriverError::Precondition(
+                "GPU sandbox requested, but no NVIDIA GPU devices are available.".to_string(),
+            ));
+        }
+        Ok(())
+    }
+
     fn peek_default_gpu_device(
         &self,
         sandbox: &DriverSandbox,
@@ -373,7 +393,8 @@ impl PodmanComputeDriver {
             return Ok(None);
         };
         let driver_config = PodmanSandboxDriverConfig::from_sandbox(sandbox)?;
-        if !spec.gpu || driver_config.cdi_devices.is_some() {
+        let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
+        if gpu_requirements.is_none() || driver_config.cdi_devices.is_some() {
             return Ok(None);
         }
 
diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs
index 30fecd8be..f0c669b87 100644
--- a/crates/openshell-driver-vm/src/driver.rs
+++ b/crates/openshell-driver-vm/src/driver.rs
@@ -29,6 +29,7 @@ use oci_client::manifest::{
 };
 use oci_client::secrets::RegistryAuth;
 use oci_client::{Reference, RegistryOperation};
+use openshell_core::gpu::driver_gpu_requirements;
 use openshell_core::progress::{
     PROGRESS_STEP_PULLING_IMAGE, PROGRESS_STEP_REQUESTING_SANDBOX, PROGRESS_STEP_STARTING_SANDBOX,
     format_bytes, mark_progress_active, mark_progress_complete, mark_progress_detail,
@@ -627,7 +628,12 @@ impl VmDriver {
         overlay_preparation: OverlayPreparation,
     ) -> Result<(), Status> {
         self.ensure_provisioning_active(&sandbox.id).await?;
-        let is_gpu = sandbox.spec.as_ref().is_some_and(|spec| spec.gpu);
+        let is_gpu = sandbox
+            .spec
+            .as_ref()
+            .and_then(|spec| spec.resource_requirements.as_ref())
+            .and_then(|requirements| driver_gpu_requirements(Some(requirements)))
+            .is_some();
         self.publish_platform_event(
             sandbox.id.clone(),
             platform_event(
@@ -3079,7 +3085,7 @@ fn validate_vm_sandbox(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Statu
     if let Some(template) = spec.template.as_ref() {
         validate_vm_sandbox_template(template)?;
     }
-    validate_vm_gpu_request(sandbox, gpu_enabled)?;
+    validate_gpu_request(sandbox, gpu_enabled)?;
 
     Ok(())
 }
@@ -3100,14 +3106,16 @@ fn validate_vm_sandbox_template(template: &SandboxTemplate) -> Result<(), Status
 }
 
 #[allow(clippy::result_large_err)]
-fn validate_vm_gpu_request(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Status> {
+fn validate_gpu_request(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Status> {
     let spec = sandbox
         .spec
         .as_ref()
         .ok_or_else(|| Status::invalid_argument("sandbox spec is required"))?;
 
+    let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
+
     let _ = vm_gpu_device_id(sandbox)?;
-    if spec.gpu && !gpu_enabled {
+    if gpu_requirements.is_some() && !gpu_enabled {
         return Err(Status::failed_precondition(
             "GPU support is not enabled on this driver; start with --gpu",
         ));
@@ -3124,7 +3132,8 @@ fn vm_gpu_device_id(sandbox: &Sandbox) -> Result<Option<String>, Status> {
         .map_err(Status::invalid_argument)?
         .gpu_device_ids
         .unwrap_or_default();
-    if !spec.gpu && !gpu_device_ids.is_empty() {
+    let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
+    if gpu_requirements.is_none() && !gpu_device_ids.is_empty() {
         return Err(Status::invalid_argument(
             "driver_config.gpu_device_ids requires gpu=true",
         ));
@@ -3135,8 +3144,8 @@ fn vm_gpu_device_id(sandbox: &Sandbox) -> Result<Option<String>, Status> {
         ));
     }
 
-    Ok(spec
-        .gpu
+    Ok(gpu_requirements
+        .is_some()
         .then(|| gpu_device_ids.into_iter().next().unwrap_or_default()))
 }
 
@@ -5064,6 +5073,7 @@ mod tests {
     };
     use openshell_core::proto::compute::v1::{
         DriverSandboxSpec as SandboxSpec, DriverSandboxTemplate as SandboxTemplate,
+        GpuResourceRequirements, ResourceRequirements,
     };
     use prost_types::{Struct, Value, value::Kind};
     use std::fs;
@@ -5102,6 +5112,12 @@ mod tests {
         }
     }
 
+    fn gpu_resources() -> ResourceRequirements {
+        ResourceRequirements {
+            gpu: Some(GpuResourceRequirements {}),
+        }
+    }
+
     #[test]
     fn vm_pulling_layer_event_adds_progress_detail_metadata() {
         let mut event = platform_event(
@@ -5169,7 +5185,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources()),
                 ..Default::default()
             }),
             ..Default::default()
@@ -5185,7 +5201,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources()),
                 ..Default::default()
             }),
             ..Default::default()
@@ -5193,12 +5209,10 @@ mod tests {
         validate_vm_sandbox(&sandbox, true).expect("gpu should be accepted when enabled");
     }
 
-    #[test]
     fn validate_vm_sandbox_rejects_gpu_device_without_gpu() {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                gpu: false,
                 template: Some(SandboxTemplate {
                     driver_config: Some(gpu_device_ids_config(&["0000:2d:00.0"])),
                     ..Default::default()
@@ -5218,7 +5232,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources()),
                 template: Some(SandboxTemplate {
                     driver_config: Some(gpu_device_ids_config(&["0000:2d:00.0", "0000:31:00.0"])),
                     ..Default::default()
@@ -5238,7 +5252,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources()),
                 template: Some(SandboxTemplate {
                     driver_config: Some(gpu_device_ids_config(&[])),
                     ..Default::default()
@@ -5258,7 +5272,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources()),
                 template: Some(SandboxTemplate {
                     driver_config: Some(gpu_device_id_typo_config(&["0000:2d:00.0"])),
                     ..Default::default()
@@ -5278,7 +5292,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources()),
                 template: Some(SandboxTemplate {
                     agent_socket_path: "/tmp/agent.sock".to_string(),
                     driver_config: Some(gpu_device_ids_config(&[])),
diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs
index 272c3907f..3dc28943f 100644
--- a/crates/openshell-server/src/compute/mod.rs
+++ b/crates/openshell-server/src/compute/mod.rs
@@ -20,10 +20,11 @@ use openshell_core::ComputeDriverKind;
 use openshell_core::proto::compute::v1::{
     CreateSandboxRequest, DeleteSandboxRequest, DriverCondition, DriverPlatformEvent,
     DriverResourceRequirements, DriverSandbox, DriverSandboxSpec, DriverSandboxStatus,
-    DriverSandboxTemplate, GetCapabilitiesRequest, GetSandboxRequest, ListSandboxesRequest,
-    ValidateSandboxCreateRequest, WatchSandboxesEvent, WatchSandboxesRequest,
-    compute_driver_client::ComputeDriverClient, compute_driver_server::ComputeDriver,
-    watch_sandboxes_event,
+    DriverSandboxTemplate, GetCapabilitiesRequest, GetSandboxRequest,
+    GpuResourceRequirements as DriverGpuResourceRequirements, ListSandboxesRequest,
+    ResourceRequirements as DriverSandboxResourceRequirements, ValidateSandboxCreateRequest,
+    WatchSandboxesEvent, WatchSandboxesRequest, compute_driver_client::ComputeDriverClient,
+    compute_driver_server::ComputeDriver, watch_sandboxes_event,
 };
 use openshell_core::proto::{
     PlatformEvent, Sandbox, SandboxCondition, SandboxPhase, SandboxSpec, SandboxStatus,
@@ -1500,7 +1501,14 @@ fn driver_sandbox_spec_from_public(
             .as_ref()
             .map(|template| driver_sandbox_template_from_public(template, driver_kind))
             .transpose()?,
-        gpu: spec.gpu,
+        resource_requirements: spec.resource_requirements.as_ref().map(|requirements| {
+            DriverSandboxResourceRequirements {
+                gpu: requirements
+                    .gpu
+                    .as_ref()
+                    .map(|_| DriverGpuResourceRequirements {}),
+            }
+        }),
         sandbox_token: String::new(),
     })
 }
@@ -1881,7 +1889,9 @@ fn derive_phase(status: Option<&DriverSandboxStatus>) -> SandboxPhase {
 }
 
 fn rewrite_user_facing_conditions(status: &mut Option<SandboxStatus>, spec: Option<&SandboxSpec>) {
-    let gpu_requested = spec.is_some_and(|sandbox_spec| sandbox_spec.gpu);
+    let gpu_requested = spec
+        .and_then(|sandbox_spec| sandbox_spec.resource_requirements.as_ref())
+        .is_some_and(|requirements| openshell_core::gpu::sandbox_gpu_requested(Some(requirements)));
     if !gpu_requested {
         return;
     }
@@ -2078,6 +2088,26 @@ mod tests {
         }
     }
 
+    #[test]
+    fn driver_sandbox_spec_from_public_preserves_gpu_requirement() {
+        let public = SandboxSpec {
+            resource_requirements: Some(openshell_core::proto::ResourceRequirements {
+                gpu: Some(openshell_core::proto::GpuResourceRequirements {}),
+            }),
+            ..Default::default()
+        };
+
+        let driver =
+            driver_sandbox_spec_from_public(&public, None).expect("driver spec should map");
+
+        let gpu = driver
+            .resource_requirements
+            .as_ref()
+            .and_then(|requirements| requirements.gpu.as_ref())
+            .expect("driver GPU requirement should be set");
+        assert_eq!(gpu, &DriverGpuResourceRequirements {});
+    }
+
     #[test]
     fn select_driver_config_forwards_only_matching_driver_block() {
         let config = prost_types::Struct {
@@ -2584,7 +2614,9 @@ mod tests {
         rewrite_user_facing_conditions(
             &mut status,
             Some(&SandboxSpec {
-                gpu: true,
+                resource_requirements: Some(openshell_core::proto::ResourceRequirements {
+                    gpu: Some(openshell_core::proto::GpuResourceRequirements {}),
+                }),
                 ..Default::default()
             }),
         );
@@ -2612,13 +2644,7 @@ mod tests {
             ..Default::default()
         });
 
-        rewrite_user_facing_conditions(
-            &mut status,
-            Some(&SandboxSpec {
-                gpu: false,
-                ..Default::default()
-            }),
-        );
+        rewrite_user_facing_conditions(&mut status, Some(&SandboxSpec::default()));
 
         assert_eq!(status.unwrap().conditions[0].message, original);
     }
@@ -2949,7 +2975,9 @@ mod tests {
 
         let sandbox = Sandbox {
             spec: Some(SandboxSpec {
-                gpu: true,
+                resource_requirements: Some(openshell_core::proto::ResourceRequirements {
+                    gpu: Some(openshell_core::proto::GpuResourceRequirements {}),
+                }),
                 ..Default::default()
             }),
             ..sandbox_record("sb-1", "sandbox-a", SandboxPhase::Provisioning)
@@ -2972,7 +3000,9 @@ mod tests {
             SandboxPhase::try_from(stored.phase()).unwrap(),
             SandboxPhase::Ready
         );
-        assert!(stored.spec.as_ref().is_some_and(|spec| spec.gpu));
+        assert!(stored.spec.as_ref().is_some_and(|spec| {
+            openshell_core::gpu::sandbox_gpu_requested(spec.resource_requirements.as_ref())
+        }));
     }
 
     #[tokio::test]
diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs
index e60ce3995..b32901bb2 100644
--- a/crates/openshell-server/src/grpc/sandbox.rs
+++ b/crates/openshell-server/src/grpc/sandbox.rs
@@ -98,9 +98,11 @@ fn emit_sandbox_create_telemetry(
     } else {
         SandboxTemplateSource::Default
     };
+    let gpu_requested =
+        openshell_core::gpu::sandbox_gpu_requested(spec.resource_requirements.as_ref());
     openshell_core::telemetry::emit_sandbox_create(
         outcome,
-        spec.gpu,
+        gpu_requested,
         spec.providers.len() as u64,
         spec.policy.is_some(),
         template_source,
diff --git a/crates/openshell-server/src/grpc/validation.rs b/crates/openshell-server/src/grpc/validation.rs
index 03a69d6e9..2ca71f7b3 100644
--- a/crates/openshell-server/src/grpc/validation.rs
+++ b/crates/openshell-server/src/grpc/validation.rs
@@ -760,7 +760,9 @@ mod tests {
     #[test]
     fn validate_sandbox_spec_accepts_gpu_flag() {
         let spec = SandboxSpec {
-            gpu: true,
+            resource_requirements: Some(openshell_core::proto::ResourceRequirements {
+                gpu: Some(openshell_core::proto::GpuResourceRequirements {}),
+            }),
             ..Default::default()
         };
         assert!(validate_sandbox_spec("gpu-sandbox", &spec).is_ok());
diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx
index 95a319c37..e76a4b1ca 100644
--- a/docs/reference/sandbox-compute-drivers.mdx
+++ b/docs/reference/sandbox-compute-drivers.mdx
@@ -53,7 +53,7 @@ openshell sandbox create \
 ```
 
 Driver config is for fields without a stable public flag. Prefer `--cpu`,
-`--memory`, and `--gpu` for portable resource intent.
+`--memory`, and `--gpu` for supported resource intent.
 
 Exact GPU device selection remains driver-owned and requires `--gpu`. Docker
 and Podman accept `cdi_devices`; replace the top-level `docker` key with
diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto
index dbcb9e818..bfff3d42d 100644
--- a/proto/compute_driver.proto
+++ b/proto/compute_driver.proto
@@ -83,8 +83,9 @@ message DriverSandboxSpec {
   map<string, string> environment = 5;
   // Runtime template consumed by the driver during provisioning.
   DriverSandboxTemplate template = 6;
-  // Request NVIDIA GPU resources for this sandbox.
-  bool gpu = 9;
+  // Portable resource requirements used by the gateway for driver selection
+  // and by drivers for provisioning.
+  ResourceRequirements resource_requirements = 9;
   reserved 10;
   reserved "gpu_device";
   // Gateway-minted JWT identifying this sandbox to the gateway. Set by
@@ -96,6 +97,14 @@ message DriverSandboxSpec {
   string sandbox_token = 11;
 }
 
+message ResourceRequirements {
+  // GPU requirements for the sandbox. Presence indicates a GPU request.
+  GpuResourceRequirements gpu = 1;
+}
+
+// Driver GPU resource requirements.
+message GpuResourceRequirements {}
+
 // Driver-owned runtime template consumed by the compute platform.
 //
 // This message describes the sandbox workload in backend-neutral terms.
diff --git a/proto/openshell.proto b/proto/openshell.proto
index d701956d3..b7884f6e7 100644
--- a/proto/openshell.proto
+++ b/proto/openshell.proto
@@ -317,8 +317,9 @@ message SandboxSpec {
   openshell.sandbox.v1.SandboxPolicy policy = 7;
   // Provider names to attach to this sandbox.
   repeated string providers = 8;
-  // Request NVIDIA GPU resources for this sandbox.
-  bool gpu = 9;
+  // Portable resource requirements used by the gateway for driver selection
+  // and by drivers for provisioning.
+  ResourceRequirements resource_requirements = 9;
   reserved 10;
   reserved "gpu_device";
   // Field 11 was `proposal_approval_mode`. The approval mode is now a
@@ -329,6 +330,14 @@ message SandboxSpec {
   reserved "proposal_approval_mode";
 }
 
+message ResourceRequirements {
+  // GPU requirements for the sandbox. Presence indicates a GPU request.
+  GpuResourceRequirements gpu = 1;
+}
+
+// Public GPU resource requirements.
+message GpuResourceRequirements {}
+
 // Public sandbox template mapped onto compute-driver template inputs.
 message SandboxTemplate {
   // Fully-qualified OCI image reference used to boot the sandbox.

From 3a244beff8d7fc84689b19fec8d744893b48cacb Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Wed, 17 Jun 2026 11:19:16 +0200
Subject: [PATCH 2/2] feat(gpu): add GPU resource count

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 architecture/compute-runtimes.md              |  13 +-
 crates/openshell-cli/src/main.rs              | 138 ++++-
 crates/openshell-cli/src/run.rs               |   2 +-
 .../sandbox_create_lifecycle_integration.rs   |  52 +-
 crates/openshell-core/src/gpu.rs              | 538 +++++++++++++-----
 crates/openshell-driver-docker/README.md      |   2 +-
 crates/openshell-driver-docker/src/lib.rs     | 294 +++++-----
 crates/openshell-driver-docker/src/tests.rs   | 222 ++++++--
 .../openshell-driver-kubernetes/src/driver.rs |  66 ++-
 crates/openshell-driver-podman/README.md      |   2 +-
 .../openshell-driver-podman/src/container.rs  | 148 +++--
 crates/openshell-driver-podman/src/driver.rs  | 262 ++++++---
 crates/openshell-driver-vm/src/driver.rs      | 183 +++++-
 crates/openshell-server/src/compute/mod.rs    |  10 +-
 .../openshell-server/src/grpc/validation.rs   |  37 +-
 docs/reference/sandbox-compute-drivers.mdx    |  27 +-
 docs/sandboxes/manage-sandboxes.mdx           |  24 +-
 proto/compute_driver.proto                    |   6 +-
 proto/openshell.proto                         |   6 +-
 .../README.md                                 | 228 ++++++--
 20 files changed, 1693 insertions(+), 567 deletions(-)

diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md
index 24e0d78ff..f5c795c52 100644
--- a/architecture/compute-runtimes.md
+++ b/architecture/compute-runtimes.md
@@ -55,11 +55,9 @@ through the driver configuration. The Helm chart defaults sandbox agents to
 `Unconfined` so runtime/default AppArmor profiles do not block supervisor
 network namespace setup on AppArmor-enabled nodes.
 
-GPU requests enter the driver layer through `SandboxSpec.gpu` and
-`SandboxSpec.gpu_device`. Docker and Podman map default GPU requests to one
-concrete NVIDIA CDI device when individual CDI devices are available, use
-`nvidia.com/gpu=all` only for WSL2/all-only compatibility, and pass explicit
-driver-native device IDs through.
+Resource requirements enter the driver layer through `SandboxSpec.resource_requirements`. This includes a set of GPU requirements, where a user
+can request a specific number of GPUs or the driver-specific default behaviour.
+For all in-tree drivers, this is equivalent to selecting a single GPU.
 
 VM runtime state paths are derived only from driver-validated sandbox IDs
 matching `[A-Za-z0-9._-]{1,128}`. The gateway-owned VM driver socket uses a
@@ -99,8 +97,9 @@ Custom sandbox images must include the agent runtime and any system
 dependencies, but they should not need to include the gateway. GPU-capable
 images must include the user-space libraries required by the workload. The
 runtime still owns GPU device injection. GPU requests are explicit, and can be
-refined with a driver-native device identifier; the gateway validates the
-request shape and each runtime enforces the GPU allocation modes it supports.
+refined with a driver-native device identifier or requested count; the gateway
+validates the request shape and each runtime enforces the GPU allocation modes it
+supports.
 
 ## Deployment Shape
 
diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs
index 9d0044886..ee138b430 100644
--- a/crates/openshell-cli/src/main.rs
+++ b/crates/openshell-cli/src/main.rs
@@ -29,6 +29,21 @@ struct GatewayContext {
     endpoint: String,
 }
 
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum GpuCliRequest {
+    DriverDefault,
+    Count(u32),
+}
+
+impl From<GpuCliRequest> for GpuResourceRequirements {
+    fn from(gpu: GpuCliRequest) -> Self {
+        match gpu {
+            GpuCliRequest::Count(count) => Self { count: Some(count) },
+            GpuCliRequest::DriverDefault => Self { count: None },
+        }
+    }
+}
+
 /// Resolve the gateway name to a [`GatewayContext`] with the gateway endpoint.
 ///
 /// Resolution priority:
@@ -110,6 +125,21 @@ fn resolve_gateway(
     })
 }
 
+fn parse_gpu_request(value: &str) -> std::result::Result<GpuCliRequest, String> {
+    if value.is_empty() {
+        return Ok(GpuCliRequest::DriverDefault);
+    }
+
+    let count = value
+        .parse::<u32>()
+        .map_err(|_| "GPU count must be a positive integer".to_string())?;
+    if count == 0 {
+        return Err("GPU count must be greater than 0".to_string());
+    }
+
+    Ok(GpuCliRequest::Count(count))
+}
+
 fn resolve_gateway_name(gateway_flag: &Option<String>) -> Option<String> {
     gateway_flag
         .clone()
@@ -1217,8 +1247,11 @@ enum SandboxCommands {
         editor: Option<CliEditor>,
 
         /// Request GPU resources for the sandbox.
-        #[arg(long)]
-        gpu: bool,
+        ///
+        /// Omit COUNT for the driver's default GPU selection, or pass COUNT
+        /// to request a specific number of GPUs.
+        #[arg(long, num_args = 0..=1, value_name = "COUNT", default_missing_value = "", value_parser = parse_gpu_request)]
+        gpu: Option<GpuCliRequest>,
 
         /// CPU limit for the sandbox (for example: 500m, 1, 2.5).
         #[arg(long)]
@@ -2626,7 +2659,7 @@ async fn main() -> Result<()> {
                         .map(|s| openshell_core::forward::ForwardSpec::parse(&s))
                         .transpose()?;
                     let keep = keep || !no_keep || editor.is_some() || forward.is_some();
-                    let gpu_requirements = gpu.then_some(GpuResourceRequirements {});
+                    let gpu_requirements: Option<GpuResourceRequirements> = gpu.map(Into::into);
 
                     let ctx = resolve_gateway(&cli.gateway, &cli.gateway_endpoint)?;
                     let endpoint = &ctx.endpoint;
@@ -3636,6 +3669,27 @@ mod tests {
         });
     }
 
+    #[test]
+    fn gpu_cli_request_option_maps_absent_gpu_to_no_requirements() {
+        let gpu: Option<GpuResourceRequirements> = Option::<GpuCliRequest>::None.map(Into::into);
+
+        assert_eq!(gpu, None);
+    }
+
+    #[test]
+    fn gpu_cli_request_driver_default_converts_to_requirements() {
+        let gpu = GpuResourceRequirements::from(GpuCliRequest::DriverDefault);
+
+        assert_eq!(gpu.count, None);
+    }
+
+    #[test]
+    fn gpu_cli_request_count_converts_to_requirements() {
+        let gpu = GpuResourceRequirements::from(GpuCliRequest::Count(2));
+
+        assert_eq!(gpu.count, Some(2));
+    }
+
     #[test]
     fn apply_auth_uses_stored_token() {
         let tmp = tempfile::tempdir().unwrap();
@@ -4507,7 +4561,23 @@ mod tests {
                 command: Some(SandboxCommands::Create { gpu, .. }),
                 ..
             }) => {
-                assert!(gpu);
+                assert_eq!(gpu, Some(GpuCliRequest::DriverDefault));
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_parses_from_gpu_flag() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu", "2"])
+            .expect("sandbox create --gpu 2 should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, .. }),
+                ..
+            }) => {
+                assert_eq!(gpu, Some(GpuCliRequest::Count(2)));
             }
             other => panic!("expected SandboxCommands::Create, got: {other:?}"),
         }
@@ -4523,13 +4593,71 @@ mod tests {
                 command: Some(SandboxCommands::Create { gpu, command, .. }),
                 ..
             }) => {
-                assert!(gpu);
+                assert_eq!(gpu, Some(GpuCliRequest::DriverDefault));
+                assert_eq!(command, vec!["claude".to_string()]);
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_allows_trailing_command() {
+        let cli = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu",
+            "2",
+            "--",
+            "claude",
+        ])
+        .expect("sandbox create --gpu 2 -- claude should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, command, .. }),
+                ..
+            }) => {
+                assert_eq!(gpu, Some(GpuCliRequest::Count(2)));
                 assert_eq!(command, vec!["claude".to_string()]);
             }
             other => panic!("expected SandboxCommands::Create, got: {other:?}"),
         }
     }
 
+    #[test]
+    fn sandbox_create_gpu_count_rejects_zero() {
+        let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu", "0"]);
+
+        assert!(result.is_err(), "sandbox create --gpu 0 should be rejected");
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_accepts_equals_syntax() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu=2"])
+            .expect("sandbox create --gpu=2 should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, .. }),
+                ..
+            }) => {
+                assert_eq!(gpu, Some(GpuCliRequest::Count(2)));
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_rejects_non_integer() {
+        let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu", "many"]);
+
+        assert!(
+            result.is_err(),
+            "sandbox create --gpu many should be rejected"
+        );
+    }
+
     #[test]
     fn service_expose_accepts_positional_target_port_and_service() {
         let cli = Cli::try_parse_from([
diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs
index 418b200ec..a77e986d4 100644
--- a/crates/openshell-cli/src/run.rs
+++ b/crates/openshell-cli/src/run.rs
@@ -8395,7 +8395,7 @@ mod tests {
     #[test]
     fn provisioning_timeout_message_includes_condition_and_gpu_hint() {
         let resource_requirements = ResourceRequirements {
-            gpu: Some(GpuResourceRequirements {}),
+            gpu: Some(GpuResourceRequirements { count: None }),
         };
         let message = provisioning_timeout_message(
             120,
diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
index 3565c3b2c..4d01f33a9 100644
--- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
+++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
@@ -1088,8 +1088,8 @@ fn test_tls(server: &TestServer) -> TlsOptions {
     server.tls.with_gateway_name("openshell")
 }
 
-fn gpu_requirements() -> GpuResourceRequirements {
-    GpuResourceRequirements {}
+fn gpu_requirements(count: Option<u32>) -> GpuResourceRequirements {
+    GpuResourceRequirements { count }
 }
 
 #[tokio::test]
@@ -1301,7 +1301,7 @@ async fn sandbox_create_sends_gpu_default_request() {
         "openshell",
         &[],
         true,
-        Some(gpu_requirements()),
+        Some(gpu_requirements(None)),
         None,
         None,
         None,
@@ -1328,11 +1328,53 @@ async fn sandbox_create_sends_gpu_default_request() {
         .and_then(|requirements| requirements.gpu.as_ref())
         .expect("GPU requirement should be sent");
 
-    assert!(requests[0]
+    assert_eq!(gpu.count, None);
+}
+
+#[tokio::test]
+async fn sandbox_create_sends_gpu_count_request() {
+    let server = run_server().await;
+    let fake_ssh_dir = tempfile::tempdir().unwrap();
+    let xdg_dir = tempfile::tempdir().unwrap();
+    let _env = test_env(&fake_ssh_dir, &xdg_dir);
+    let tls = test_tls(&server);
+    install_fake_ssh(&fake_ssh_dir);
+
+    run::sandbox_create(
+        &server.endpoint,
+        Some("gpu-two"),
+        None,
+        "openshell",
+        &[],
+        true,
+        Some(gpu_requirements(Some(2))),
+        None,
+        None,
+        None,
+        None,
+        &[],
+        None,
+        None,
+        &["echo".to_string(), "OK".to_string()],
+        Some(false),
+        Some(false),
+        &HashMap::new(),
+        &HashMap::new(),
+        "manual",
+        &tls,
+    )
+    .await
+    .expect("sandbox create should succeed");
+
+    let requests = create_requests(&server).await;
+    let gpu = requests[0]
         .spec
         .as_ref()
         .and_then(|spec| spec.resource_requirements.as_ref())
-        .is_some_and(|requirements| requirements.gpu.is_some()));
+        .and_then(|requirements| requirements.gpu.as_ref())
+        .expect("GPU requirement should be sent");
+
+    assert_eq!(gpu.count, Some(2));
 }
 
 #[tokio::test]
diff --git a/crates/openshell-core/src/gpu.rs b/crates/openshell-core/src/gpu.rs
index fe6744a28..f5ff67cd3 100644
--- a/crates/openshell-core/src/gpu.rs
+++ b/crates/openshell-core/src/gpu.rs
@@ -3,7 +3,9 @@
 
 //! Shared GPU resource requirement helpers.
 
+use std::collections::HashSet;
 use std::fmt;
+use std::sync::RwLock;
 use std::sync::atomic::{AtomicUsize, Ordering};
 
 use crate::config::CDI_GPU_DEVICE_ALL;
@@ -21,10 +23,31 @@ pub fn sandbox_gpu_requested(resources: Option<&SandboxResourceRequirements>) ->
         .is_some()
 }
 
-/// Return whether compute-driver resource requirements request a GPU.
+/// Return the requested sandbox GPU count, if one was specified.
 #[must_use]
-pub fn driver_gpu_requested(resources: Option<&DriverResourceRequirements>) -> bool {
-    driver_gpu_requirements(resources).is_some()
+pub fn sandbox_gpu_count(resources: Option<&SandboxResourceRequirements>) -> Option<u32> {
+    resources
+        .and_then(|resources| resources.gpu.as_ref())
+        .and_then(|gpu| gpu.count)
+}
+
+/// Return the effective compute-driver GPU count.
+///
+/// A present GPU requirement with an omitted count requests one GPU.
+///
+/// # Errors
+/// Returns an error when a GPU requirement explicitly requests zero GPUs.
+pub fn effective_driver_gpu_count(
+    gpu: Option<&DriverGpuResourceRequirements>,
+) -> Result<Option<u32>, String> {
+    let Some(gpu) = gpu else {
+        return Ok(None);
+    };
+    let count = gpu.count.unwrap_or(1);
+    if count == 0 {
+        return Err("gpu count must be greater than 0".to_string());
+    }
+    Ok(Some(count))
 }
 
 /// Return the requested compute-driver GPU requirements, if present.
@@ -113,9 +136,84 @@ impl CdiGpuInventory {
     }
 }
 
+#[derive(Debug)]
+struct CdiGpuSelectorState {
+    inventory: CdiGpuInventory,
+    allow_all_devices: bool,
+}
+
+/// Concurrency-safe default CDI GPU selector used by local container drivers.
+#[derive(Debug)]
+pub struct CdiGpuDefaultSelector {
+    state: RwLock<CdiGpuSelectorState>,
+    round_robin: CdiGpuRoundRobin,
+}
+
+impl CdiGpuDefaultSelector {
+    /// Create a selector with an initial discovered CDI GPU inventory snapshot.
+    #[must_use]
+    pub fn new(inventory: CdiGpuInventory, allow_all_devices: bool) -> Self {
+        Self {
+            state: RwLock::new(CdiGpuSelectorState {
+                inventory,
+                allow_all_devices,
+            }),
+            round_robin: CdiGpuRoundRobin::new(),
+        }
+    }
+
+    /// Replace the cached inventory snapshot without resetting the cursor.
+    pub fn refresh(&self, inventory: CdiGpuInventory, allow_all_devices: bool) {
+        let mut state = self
+            .state
+            .write()
+            .expect("CDI GPU selector state lock poisoned");
+        state.inventory = inventory;
+        state.allow_all_devices = allow_all_devices;
+    }
+
+    /// Return the cached normalized inventory snapshot.
+    #[must_use]
+    pub fn device_ids(&self) -> Vec<String> {
+        self.state
+            .read()
+            .expect("CDI GPU selector state lock poisoned")
+            .inventory
+            .as_slice()
+            .to_vec()
+    }
+
+    /// Return the current default device IDs without advancing the cursor.
+    pub fn peek_device_ids(&self, count: u32) -> Result<Vec<String>, CdiGpuSelectionError> {
+        self.selected_device_ids(count, false)
+    }
+
+    /// Return the next default device IDs and advance the cursor.
+    pub fn next_device_ids(&self, count: u32) -> Result<Vec<String>, CdiGpuSelectionError> {
+        self.selected_device_ids(count, true)
+    }
+
+    fn selected_device_ids(
+        &self,
+        count: u32,
+        consume: bool,
+    ) -> Result<Vec<String>, CdiGpuSelectionError> {
+        let state = self
+            .state
+            .read()
+            .expect("CDI GPU selector state lock poisoned");
+        self.round_robin.selected_default_device_ids(
+            &state.inventory,
+            count,
+            consume,
+            state.allow_all_devices,
+        )
+    }
+}
+
 /// Concurrency-safe round-robin cursor for default CDI GPU selection.
 #[derive(Debug, Default)]
-pub struct CdiGpuRoundRobin {
+struct CdiGpuRoundRobin {
     next: AtomicUsize,
 }
 
@@ -127,37 +225,34 @@ impl CdiGpuRoundRobin {
         }
     }
 
-    /// Return the next default device ID and advance the cursor.
-    pub fn next_default_device_id(
-        &self,
-        inventory: &CdiGpuInventory,
-        allow_all_devices: bool,
-    ) -> Result<String, CdiGpuSelectionError> {
-        self.selected_default_device_id(inventory, true, allow_all_devices)
-    }
-
-    /// Return the current default device ID without advancing the cursor.
-    pub fn peek_default_device_id(
-        &self,
-        inventory: &CdiGpuInventory,
-        allow_all_devices: bool,
-    ) -> Result<String, CdiGpuSelectionError> {
-        self.selected_default_device_id(inventory, false, allow_all_devices)
-    }
-
-    fn selected_default_device_id(
+    fn selected_default_device_ids(
         &self,
         inventory: &CdiGpuInventory,
+        count: u32,
         consume: bool,
         allow_all_devices: bool,
-    ) -> Result<String, CdiGpuSelectionError> {
+    ) -> Result<Vec<String>, CdiGpuSelectionError> {
         let devices = inventory.default_device_family(allow_all_devices)?;
+        let count =
+            usize::try_from(count).map_err(|_| CdiGpuSelectionError::InsufficientDevices {
+                requested: count,
+                available: u32::try_from(devices.len()).unwrap_or(u32::MAX),
+            })?;
+        let available = devices.len();
+        if count > available {
+            return Err(CdiGpuSelectionError::InsufficientDevices {
+                requested: u32::try_from(count).unwrap_or(u32::MAX),
+                available: u32::try_from(available).unwrap_or(u32::MAX),
+            });
+        }
         let base = if consume {
-            self.next.fetch_add(1, Ordering::Relaxed)
+            self.next.fetch_add(count, Ordering::Relaxed)
         } else {
             self.next.load(Ordering::Relaxed)
         };
-        Ok(devices[base % devices.len()].clone())
+        Ok((0..count)
+            .map(|offset| devices[(base + offset) % available].clone())
+            .collect())
     }
 }
 
@@ -165,121 +260,108 @@ impl CdiGpuRoundRobin {
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum CdiGpuSelectionError {
     NoAvailableDevices,
-    MissingDefaultDevice,
     AllDevicesDefaultUnsupported,
+    InsufficientDevices { requested: u32, available: u32 },
 }
 
 impl fmt::Display for CdiGpuSelectionError {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
             Self::NoAvailableDevices => f.write_str("no NVIDIA CDI GPU devices were discovered"),
-            Self::MissingDefaultDevice => {
-                f.write_str("GPU request requires a selected default CDI GPU device")
-            }
             Self::AllDevicesDefaultUnsupported => f.write_str(
                 "default GPU request resolved only to nvidia.com/gpu=all, which is not allowed on this platform; set driver_config.cdi_devices to [\"nvidia.com/gpu=all\"] explicitly to request all GPUs",
             ),
+            Self::InsufficientDevices {
+                requested,
+                available: 0,
+            } => write!(
+                f,
+                "GPU sandbox requested {requested} GPUs, but no selectable NVIDIA CDI GPU devices are available"
+            ),
+            Self::InsufficientDevices {
+                requested,
+                available: 1,
+            } => write!(
+                f,
+                "GPU sandbox requested {requested} GPUs, but only 1 selectable NVIDIA CDI GPU device is available"
+            ),
+            Self::InsufficientDevices {
+                requested,
+                available,
+            } => write!(
+                f,
+                "GPU sandbox requested {requested} GPUs, but only {available} selectable NVIDIA CDI GPU devices are available"
+            ),
         }
     }
 }
 
 impl std::error::Error for CdiGpuSelectionError {}
 
-/// Resolve a compute-driver GPU request into CDI device identifiers.
-///
-/// `None` means no GPU was requested. Explicit driver-configured CDI devices
-/// pass through unchanged. A default GPU request uses the driver-selected
-/// default CDI ID.
-pub fn cdi_gpu_device_ids(
-    gpu: Option<&DriverGpuResourceRequirements>,
-    cdi_devices: &[String],
-    selected_default_device: Option<&str>,
-) -> Result<Option<Vec<String>>, CdiGpuSelectionError> {
-    if gpu.is_none() {
-        return Ok(None);
-    }
-    if !cdi_devices.is_empty() {
-        return Ok(Some(cdi_devices.to_vec()));
-    }
-    let device = selected_default_device.ok_or(CdiGpuSelectionError::MissingDefaultDevice)?;
-    Ok(Some(vec![device.to_string()]))
-}
-
-/// Resolve a GPU request with the legacy all-GPU default.
-#[must_use]
-pub fn cdi_gpu_device_ids_or_all(
-    gpu: Option<&DriverGpuResourceRequirements>,
-    cdi_devices: &[String],
-) -> Option<Vec<String>> {
-    gpu.map(|_| {
-        if cdi_devices.is_empty() {
-            vec![CDI_GPU_DEVICE_ALL.to_string()]
-        } else {
-            cdi_devices.to_vec()
-        }
-    })
-}
-
 fn cdi_nvidia_gpu_suffix(id: &str) -> Option<&str> {
     id.strip_prefix(CDI_NVIDIA_GPU_PREFIX)
 }
 
-#[cfg(test)]
-mod tests {
-    use super::*;
+/// Validate a compute-driver GPU request against driver-owned specific devices.
+///
+/// Drivers call this when a sandbox request combines portable GPU requirements
+/// with exact device identifiers in `driver_config`.
+///
+/// # Errors
+/// Returns an error when the sandbox GPU request is absent, when `gpu.count`
+/// is zero, when device IDs are duplicated, or when the effective GPU count
+/// does not equal the number of specific devices.
+pub fn validate_specific_gpu_device_request(
+    gpu: Option<&DriverGpuResourceRequirements>,
+    specific_devices: &[String],
+    driver_config_field: &str,
+) -> Result<(), String> {
+    let device_count = specific_devices.len();
+    if device_count == 0 {
+        return Ok(());
+    }
 
-    #[test]
-    fn cdi_gpu_device_ids_returns_none_when_absent() {
-        assert_eq!(cdi_gpu_device_ids(None, &[], None), Ok(None));
+    let mut seen = HashSet::with_capacity(device_count);
+    for device in specific_devices {
+        if !seen.insert(device.as_str()) {
+            return Err(format!(
+                "{driver_config_field} contains duplicate device ID '{device}'"
+            ));
+        }
     }
 
-    #[test]
-    fn cdi_gpu_device_ids_uses_selected_default_device() {
-        let gpu = DriverGpuResourceRequirements {};
+    let Some(count) = effective_driver_gpu_count(gpu)? else {
+        return Err(format!("{driver_config_field} requires a gpu request"));
+    };
 
-        assert_eq!(
-            cdi_gpu_device_ids(Some(&gpu), &[], Some("nvidia.com/gpu=0")),
-            Ok(Some(vec!["nvidia.com/gpu=0".to_string()]))
-        );
+    if usize::try_from(count).ok() != Some(device_count) {
+        return Err(format!(
+            "gpu count ({count}) must match {driver_config_field} length ({device_count})"
+        ));
     }
 
-    #[test]
-    fn cdi_gpu_device_ids_rejects_missing_default_device() {
-        let gpu = DriverGpuResourceRequirements {};
+    Ok(())
+}
 
-        assert_eq!(
-            cdi_gpu_device_ids(Some(&gpu), &[], None),
-            Err(CdiGpuSelectionError::MissingDefaultDevice)
-        );
-    }
+#[cfg(test)]
+mod tests {
+    use super::*;
 
     #[test]
-    fn cdi_gpu_device_ids_passes_explicit_device_ids_through() {
-        let gpu = DriverGpuResourceRequirements {};
+    fn effective_driver_gpu_count_normalizes_missing_count() {
+        let gpu = DriverGpuResourceRequirements { count: None };
 
-        assert_eq!(
-            cdi_gpu_device_ids(
-                Some(&gpu),
-                &[
-                    "nvidia.com/gpu=0".to_string(),
-                    "nvidia.com/gpu=1".to_string()
-                ],
-                None
-            ),
-            Ok(Some(vec![
-                "nvidia.com/gpu=0".to_string(),
-                "nvidia.com/gpu=1".to_string()
-            ]))
-        );
+        assert_eq!(effective_driver_gpu_count(Some(&gpu)), Ok(Some(1)));
+        assert_eq!(effective_driver_gpu_count(None), Ok(None));
     }
 
     #[test]
-    fn cdi_gpu_device_ids_or_all_uses_all_when_no_devices_are_configured() {
-        let gpu = DriverGpuResourceRequirements {};
+    fn effective_driver_gpu_count_rejects_zero_count() {
+        let gpu = DriverGpuResourceRequirements { count: Some(0) };
 
         assert_eq!(
-            cdi_gpu_device_ids_or_all(Some(&gpu), &[]),
-            Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
+            effective_driver_gpu_count(Some(&gpu)),
+            Err("gpu count must be greater than 0".to_string())
         );
     }
 
@@ -302,69 +384,69 @@ mod tests {
     }
 
     #[test]
-    fn round_robin_prefers_indexed_family_and_sorts_numerically() {
+    fn selector_prefers_indexed_family_and_sorts_numerically() {
         let inventory = CdiGpuInventory::new([
             "nvidia.com/gpu=10",
             "nvidia.com/gpu=UUID-b",
             "nvidia.com/gpu=2",
             "nvidia.com/gpu=all",
         ]);
-        let selector = CdiGpuRoundRobin::new();
+        let selector = CdiGpuDefaultSelector::new(inventory, false);
 
         assert_eq!(
-            selector.next_default_device_id(&inventory, false),
-            Ok("nvidia.com/gpu=2".to_string())
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=2".to_string()])
         );
         assert_eq!(
-            selector.next_default_device_id(&inventory, false),
-            Ok("nvidia.com/gpu=10".to_string())
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=10".to_string()])
         );
         assert_eq!(
-            selector.next_default_device_id(&inventory, false),
-            Ok("nvidia.com/gpu=2".to_string())
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=2".to_string()])
         );
     }
 
     #[test]
-    fn round_robin_uses_named_family_when_no_indexed_ids_exist() {
+    fn selector_uses_named_family_when_no_indexed_ids_exist() {
         let inventory = CdiGpuInventory::new(["nvidia.com/gpu=UUID-b", "nvidia.com/gpu=UUID-a"]);
-        let selector = CdiGpuRoundRobin::new();
+        let selector = CdiGpuDefaultSelector::new(inventory, false);
 
         assert_eq!(
-            selector.next_default_device_id(&inventory, false),
-            Ok("nvidia.com/gpu=UUID-a".to_string())
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=UUID-a".to_string()])
         );
     }
 
     #[test]
-    fn round_robin_uses_all_only_inventory_when_allowed() {
+    fn selector_uses_all_only_inventory_when_allowed() {
         let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]);
-        let selector = CdiGpuRoundRobin::new();
+        let selector = CdiGpuDefaultSelector::new(inventory, true);
 
         assert_eq!(
-            selector.next_default_device_id(&inventory, true),
-            Ok(CDI_GPU_DEVICE_ALL.to_string())
+            selector.next_device_ids(1),
+            Ok(vec![CDI_GPU_DEVICE_ALL.to_string()])
         );
     }
 
     #[test]
-    fn round_robin_rejects_all_only_inventory_when_not_allowed() {
+    fn selector_rejects_all_only_inventory_when_not_allowed() {
         let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]);
-        let selector = CdiGpuRoundRobin::new();
+        let selector = CdiGpuDefaultSelector::new(inventory, false);
 
         assert_eq!(
-            selector.next_default_device_id(&inventory, false),
+            selector.next_device_ids(1),
             Err(CdiGpuSelectionError::AllDevicesDefaultUnsupported)
         );
     }
 
     #[test]
-    fn round_robin_rejects_empty_inventory() {
+    fn selector_rejects_empty_inventory() {
         let inventory = CdiGpuInventory::new(["vendor.example/device=0"]);
-        let selector = CdiGpuRoundRobin::new();
+        let selector = CdiGpuDefaultSelector::new(inventory, false);
 
         assert_eq!(
-            selector.next_default_device_id(&inventory, false),
+            selector.next_device_ids(1),
             Err(CdiGpuSelectionError::NoAvailableDevices)
         );
     }
@@ -372,23 +454,205 @@ mod tests {
     #[test]
     fn peek_does_not_advance_round_robin_cursor() {
         let inventory = CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]);
-        let selector = CdiGpuRoundRobin::new();
+        let selector = CdiGpuDefaultSelector::new(inventory, false);
+
+        assert_eq!(
+            selector.peek_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=0".to_string()])
+        );
+        assert_eq!(
+            selector.peek_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=0".to_string()])
+        );
+        assert_eq!(
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=0".to_string()])
+        );
+        assert_eq!(
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=1".to_string()])
+        );
+    }
+
+    #[test]
+    fn selector_selects_multiple_distinct_devices_in_cursor_order() {
+        let inventory =
+            CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1", "nvidia.com/gpu=2"]);
+        let selector = CdiGpuDefaultSelector::new(inventory, false);
+
+        assert_eq!(
+            selector.next_device_ids(2),
+            Ok(vec![
+                "nvidia.com/gpu=0".to_string(),
+                "nvidia.com/gpu=1".to_string()
+            ])
+        );
+        assert_eq!(
+            selector.next_device_ids(2),
+            Ok(vec![
+                "nvidia.com/gpu=2".to_string(),
+                "nvidia.com/gpu=0".to_string()
+            ])
+        );
+    }
+
+    #[test]
+    fn selector_rejects_count_larger_than_selectable_family_without_advancing() {
+        let inventory = CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]);
+        let selector = CdiGpuDefaultSelector::new(inventory, false);
 
         assert_eq!(
-            selector.peek_default_device_id(&inventory, false),
-            Ok("nvidia.com/gpu=0".to_string())
+            selector.next_device_ids(3),
+            Err(CdiGpuSelectionError::InsufficientDevices {
+                requested: 3,
+                available: 2
+            })
         );
         assert_eq!(
-            selector.peek_default_device_id(&inventory, false),
-            Ok("nvidia.com/gpu=0".to_string())
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=0".to_string()])
         );
+    }
+
+    #[test]
+    fn selector_treats_all_only_inventory_as_one_selectable_device() {
+        let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]);
+        let selector = CdiGpuDefaultSelector::new(inventory, true);
+
         assert_eq!(
-            selector.next_default_device_id(&inventory, false),
-            Ok("nvidia.com/gpu=0".to_string())
+            selector.next_device_ids(2),
+            Err(CdiGpuSelectionError::InsufficientDevices {
+                requested: 2,
+                available: 1
+            })
         );
+    }
+
+    #[test]
+    fn selector_refreshes_inventory_without_resetting_cursor() {
+        let inventory = CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]);
+        let selector = CdiGpuDefaultSelector::new(inventory, false);
+
+        assert_eq!(
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=0".to_string()])
+        );
+        selector.refresh(
+            CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1", "nvidia.com/gpu=2"]),
+            false,
+        );
+        assert_eq!(
+            selector.next_device_ids(1),
+            Ok(vec!["nvidia.com/gpu=1".to_string()])
+        );
+    }
+
+    #[test]
+    fn validate_specific_gpu_device_request_ignores_empty_devices() {
+        validate_specific_gpu_device_request(None, &[], "driver_config.cdi_devices")
+            .expect("empty exact device lists should not be validated");
+    }
+
+    #[test]
+    fn validate_specific_gpu_device_request_accepts_matching_count() {
+        let gpu = DriverGpuResourceRequirements { count: Some(2) };
+        let specific_devices = vec![
+            "nvidia.com/gpu=0".to_string(),
+            "nvidia.com/gpu=1".to_string(),
+        ];
+
+        validate_specific_gpu_device_request(
+            Some(&gpu),
+            &specific_devices,
+            "driver_config.cdi_devices",
+        )
+        .expect("matching count should be accepted");
+    }
+
+    #[test]
+    fn validate_specific_gpu_device_request_accepts_missing_count_for_one_device() {
+        let gpu = DriverGpuResourceRequirements { count: None };
+        let specific_devices = vec!["nvidia.com/gpu=0".to_string()];
+
+        validate_specific_gpu_device_request(
+            Some(&gpu),
+            &specific_devices,
+            "driver_config.cdi_devices",
+        )
+        .expect("single exact device should be compatible with a default GPU request");
+    }
+
+    #[test]
+    fn validate_specific_gpu_device_request_rejects_missing_gpu_request() {
+        let specific_devices = vec!["nvidia.com/gpu=0".to_string()];
+
+        let err = validate_specific_gpu_device_request(
+            None,
+            &specific_devices,
+            "driver_config.cdi_devices",
+        )
+        .expect_err("missing GPU request should be rejected");
+
+        assert_eq!(err, "driver_config.cdi_devices requires a gpu request");
+    }
+
+    #[test]
+    fn validate_specific_gpu_device_request_rejects_missing_count_for_multiple_devices() {
+        let gpu = DriverGpuResourceRequirements { count: None };
+        let specific_devices = vec![
+            "nvidia.com/gpu=0".to_string(),
+            "nvidia.com/gpu=1".to_string(),
+        ];
+
+        let err = validate_specific_gpu_device_request(
+            Some(&gpu),
+            &specific_devices,
+            "driver_config.cdi_devices",
+        )
+        .expect_err("missing count should be rejected for multiple devices");
+
+        assert_eq!(
+            err,
+            "gpu count (1) must match driver_config.cdi_devices length (2)"
+        );
+    }
+
+    #[test]
+    fn validate_specific_gpu_device_request_rejects_mismatch() {
+        let gpu = DriverGpuResourceRequirements { count: Some(2) };
+        let specific_devices = vec!["nvidia.com/gpu=0".to_string()];
+
+        let err = validate_specific_gpu_device_request(
+            Some(&gpu),
+            &specific_devices,
+            "driver_config.cdi_devices",
+        )
+        .expect_err("mismatched count should be rejected");
+
+        assert_eq!(
+            err,
+            "gpu count (2) must match driver_config.cdi_devices length (1)"
+        );
+    }
+
+    #[test]
+    fn validate_specific_gpu_device_request_rejects_duplicate_ids() {
+        let gpu = DriverGpuResourceRequirements { count: Some(2) };
+        let specific_devices = vec![
+            "nvidia.com/gpu=0".to_string(),
+            "nvidia.com/gpu=0".to_string(),
+        ];
+
+        let err = validate_specific_gpu_device_request(
+            Some(&gpu),
+            &specific_devices,
+            "driver_config.cdi_devices",
+        )
+        .expect_err("duplicates should be rejected");
+
         assert_eq!(
-            selector.next_default_device_id(&inventory, false),
-            Ok("nvidia.com/gpu=1".to_string())
+            err,
+            "driver_config.cdi_devices contains duplicate device ID 'nvidia.com/gpu=0'"
         );
     }
 }
diff --git a/crates/openshell-driver-docker/README.md b/crates/openshell-driver-docker/README.md
index 468d6d8cd..aedfa4e44 100644
--- a/crates/openshell-driver-docker/README.md
+++ b/crates/openshell-driver-docker/README.md
@@ -32,7 +32,7 @@ contract:
 | `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. |
 | `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. |
 | `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. |
-| CDI GPU request | Uses `driver_config.cdi_devices` when set; otherwise selects one concrete NVIDIA CDI GPU when the sandbox resource requirements ask for GPU support and daemon CDI support is detected. Docker daemon `/info` can permit `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback. Counted GPU requests are rejected. |
+| CDI GPU request | Uses opaque `driver_config.cdi_devices` values when set; otherwise selects the requested count of NVIDIA CDI GPUs in round-robin order when daemon CDI support is detected. Docker daemon `/info` can permit `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback, where it counts as one selectable device. Exact CDI device lists must not contain duplicates and must match the effective GPU count. |
 
 The agent child process does not retain these supervisor privileges.
 
diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs
index ae3da7798..6ced54892 100644
--- a/crates/openshell-driver-docker/src/lib.rs
+++ b/crates/openshell-driver-docker/src/lib.rs
@@ -28,8 +28,8 @@ use openshell_core::driver_utils::{
     LABEL_SANDBOX_NAMESPACE, SUPERVISOR_IMAGE_BINARY_PATH, supervisor_image_should_refresh,
 };
 use openshell_core::gpu::{
-    CdiGpuInventory, CdiGpuRoundRobin, CdiGpuSelectionError, cdi_gpu_device_ids,
-    driver_gpu_requirements,
+    CdiGpuDefaultSelector, CdiGpuInventory, CdiGpuSelectionError, driver_gpu_requirements,
+    effective_driver_gpu_count, validate_specific_gpu_device_request,
 };
 use openshell_core::progress::{
     PROGRESS_STEP_PULLING_IMAGE, PROGRESS_STEP_REQUESTING_SANDBOX, PROGRESS_STEP_STARTING_SANDBOX,
@@ -234,7 +234,6 @@ struct DockerDriverRuntimeConfig {
     guest_tls: Option<DockerGuestTlsPaths>,
     daemon_version: String,
     supports_gpu: bool,
-    cdi_gpu_inventory: CdiGpuInventory,
     allow_all_default_gpu: bool,
     sandbox_pids_limit: i64,
     enable_bind_mounts: bool,
@@ -256,7 +255,7 @@ pub struct DockerComputeDriver {
     events: broadcast::Sender<WatchSandboxesEvent>,
     pending: Arc<Mutex<HashMap<String, PendingSandboxRecord>>>,
     supervisor_readiness: Arc<dyn SupervisorReadiness>,
-    gpu_selector: Arc<CdiGpuRoundRobin>,
+    gpu_selector: Arc<CdiGpuDefaultSelector>,
 }
 
 struct PendingSandboxRecord {
@@ -287,19 +286,13 @@ struct DockerSandboxDriverConfig {
     mounts: Vec<DockerDriverMountConfig>,
 }
 
-impl DockerSandboxDriverConfig {
-    fn from_sandbox(sandbox: &DriverSandbox) -> Result<Self, String> {
-        let Some(template) = sandbox
-            .spec
-            .as_ref()
-            .and_then(|spec| spec.template.as_ref())
-        else {
-            return Ok(Self::default());
-        };
-
-        Self::from_template(template)
-    }
+struct ValidatedDockerSandbox<'a> {
+    template: &'a DriverSandboxTemplate,
+    driver_config: DockerSandboxDriverConfig,
+    gpu_requirements: Option<&'a GpuResourceRequirements>,
+}
 
+impl DockerSandboxDriverConfig {
     fn from_template(template: &DriverSandboxTemplate) -> Result<Self, String> {
         let Some(config) = template.driver_config.as_ref() else {
             return Ok(Self::default());
@@ -420,7 +413,6 @@ impl DockerComputeDriver {
                 guest_tls,
                 daemon_version: version.version.unwrap_or_else(|| "unknown".to_string()),
                 supports_gpu,
-                cdi_gpu_inventory,
                 allow_all_default_gpu,
                 sandbox_pids_limit: docker_config.sandbox_pids_limit,
                 enable_bind_mounts: docker_config.enable_bind_mounts,
@@ -428,7 +420,10 @@ impl DockerComputeDriver {
             events: broadcast::channel(WATCH_BUFFER).0,
             pending: Arc::new(Mutex::new(HashMap::new())),
             supervisor_readiness,
-            gpu_selector: Arc::new(CdiGpuRoundRobin::new()),
+            gpu_selector: Arc::new(CdiGpuDefaultSelector::new(
+                cdi_gpu_inventory,
+                allow_all_default_gpu,
+            )),
         };
 
         let poll_driver = driver.clone();
@@ -455,10 +450,19 @@ impl DockerComputeDriver {
         )
     }
 
+    #[cfg(test)]
     fn validate_sandbox(
         sandbox: &DriverSandbox,
         config: &DockerDriverRuntimeConfig,
     ) -> Result<(), Status> {
+        let _ = Self::validated_sandbox(sandbox, config)?;
+        Ok(())
+    }
+
+    fn validated_sandbox<'a>(
+        sandbox: &'a DriverSandbox,
+        config: &DockerDriverRuntimeConfig,
+    ) -> Result<ValidatedDockerSandbox<'a>, Status> {
         let spec = sandbox
             .spec
             .as_ref()
@@ -468,19 +472,21 @@ impl DockerComputeDriver {
             .as_ref()
             .ok_or_else(|| Status::invalid_argument("sandbox.spec.template is required"))?;
 
-        Self::validate_sandbox_template(template, config)?;
-
+        Self::validate_sandbox_template_base(template)?;
+        let _ = docker_resource_limits(template)?;
         let driver_config =
             DockerSandboxDriverConfig::from_template(template).map_err(Status::invalid_argument)?;
+        validate_docker_driver_mounts(&driver_config.mounts, config.enable_bind_mounts)?;
         let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
         Self::validate_gpu_request(gpu_requirements, config.supports_gpu, &driver_config)?;
-        Ok(())
+        Ok(ValidatedDockerSandbox {
+            template,
+            driver_config,
+            gpu_requirements,
+        })
     }
 
-    fn validate_sandbox_template(
-        template: &DriverSandboxTemplate,
-        config: &DockerDriverRuntimeConfig,
-    ) -> Result<(), Status> {
+    fn validate_sandbox_template_base(template: &DriverSandboxTemplate) -> Result<(), Status> {
         if template.image.trim().is_empty() {
             return Err(Status::failed_precondition(
                 "docker sandboxes require a template image",
@@ -501,8 +507,6 @@ impl DockerComputeDriver {
             ));
         }
 
-        let _ = docker_driver_config(template, config.enable_bind_mounts)?;
-        let _ = docker_resource_limits(template)?;
         Ok(())
     }
 
@@ -525,31 +529,31 @@ impl DockerComputeDriver {
         supports_gpu: bool,
         driver_config: &DockerSandboxDriverConfig,
     ) -> Result<(), Status> {
-        if gpu_requirements.is_none() && driver_config.cdi_devices.is_some() {
-            return Err(Status::invalid_argument(
-                "driver_config.cdi_devices requires gpu=true",
-            ));
-        }
-
-        if gpu_requirements.is_some() && !supports_gpu {
+        let requested_count =
+            effective_driver_gpu_count(gpu_requirements).map_err(Status::invalid_argument)?;
+        if requested_count.is_some() && !supports_gpu {
             return Err(Status::failed_precondition(
                 "docker GPU sandboxes require Docker CDI support. Enable CDI on the Docker daemon, then restart the OpenShell gateway/server so GPU capability is detected.",
             ));
         }
+
+        if let Some(cdi_devices) = driver_config.cdi_devices.as_deref() {
+            validate_specific_gpu_device_request(
+                gpu_requirements,
+                cdi_devices,
+                "driver_config.cdi_devices",
+            )
+            .map_err(Status::invalid_argument)?;
+        }
+
         Ok(())
     }
 
     async fn validate_user_volume_mounts_available(
         &self,
-        sandbox: &DriverSandbox,
+        driver_config: &DockerSandboxDriverConfig,
     ) -> Result<(), Status> {
-        let template = sandbox
-            .spec
-            .as_ref()
-            .and_then(|spec| spec.template.as_ref())
-            .ok_or_else(|| Status::invalid_argument("sandbox.spec.template is required"))?;
-        let config = docker_driver_config(template, self.config.enable_bind_mounts)?;
-        for mount in config.mounts {
+        for mount in &driver_config.mounts {
             if let DockerDriverMountConfig::Volume { source, .. } = mount {
                 match self.docker.inspect_volume(source.trim()).await {
                     Ok(volume) => {
@@ -576,42 +580,48 @@ impl DockerComputeDriver {
         Ok(())
     }
 
-    fn peek_default_gpu_device(&self, sandbox: &DriverSandbox) -> Result<Option<String>, Status> {
-        self.selected_default_gpu_device(sandbox, false)
-    }
-
-    fn next_default_gpu_device(&self, sandbox: &DriverSandbox) -> Result<Option<String>, Status> {
-        self.selected_default_gpu_device(sandbox, true)
+    async fn refresh_gpu_inventory(&self) -> Result<(), Status> {
+        let info = self
+            .docker
+            .info()
+            .await
+            .map_err(|err| internal_status("query Docker daemon info", err))?;
+        self.gpu_selector.refresh(
+            docker_cdi_gpu_inventory(&info),
+            self.config.allow_all_default_gpu,
+        );
+        Ok(())
     }
 
-    fn selected_default_gpu_device(
+    async fn resolve_gpu_cdi_devices(
         &self,
-        sandbox: &DriverSandbox,
-        consume: bool,
-    ) -> Result<Option<String>, Status> {
-        let Some(spec) = sandbox.spec.as_ref() else {
+        gpu_requirements: Option<&GpuResourceRequirements>,
+        driver_config: &DockerSandboxDriverConfig,
+        select_default_devices: fn(
+            &CdiGpuDefaultSelector,
+            u32,
+        ) -> Result<Vec<String>, CdiGpuSelectionError>,
+    ) -> Result<Option<Vec<String>>, Status> {
+        if let Some(cdi_devices) = driver_config.cdi_devices.as_deref() {
+            validate_specific_gpu_device_request(
+                gpu_requirements,
+                cdi_devices,
+                "driver_config.cdi_devices",
+            )
+            .map_err(Status::invalid_argument)?;
+            return Ok(Some(cdi_devices.to_vec()));
+        }
+
+        let Some(count) =
+            effective_driver_gpu_count(gpu_requirements).map_err(Status::invalid_argument)?
+        else {
             return Ok(None);
         };
-        let driver_config =
-            DockerSandboxDriverConfig::from_sandbox(sandbox).map_err(Status::invalid_argument)?;
-        let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
-        if gpu_requirements.is_none() || driver_config.cdi_devices.is_some() {
-            return Ok(None);
-        }
 
-        let selected = if consume {
-            self.gpu_selector.next_default_device_id(
-                &self.config.cdi_gpu_inventory,
-                self.config.allow_all_default_gpu,
-            )
-        } else {
-            self.gpu_selector.peek_default_device_id(
-                &self.config.cdi_gpu_inventory,
-                self.config.allow_all_default_gpu,
-            )
-        }
-        .map_err(docker_gpu_selection_status)?;
-        Ok(Some(selected))
+        self.refresh_gpu_inventory().await?;
+        select_default_devices(&self.gpu_selector, count)
+            .map(Some)
+            .map_err(docker_gpu_selection_status)
     }
 
     async fn get_sandbox_snapshot(
@@ -649,14 +659,22 @@ impl DockerComputeDriver {
     }
 
     async fn create_sandbox_inner(&self, sandbox: &DriverSandbox) -> Result<(), Status> {
-        Self::validate_sandbox(sandbox, &self.config)?;
+        let validated = Self::validated_sandbox(sandbox, &self.config)?;
         Self::validate_sandbox_auth(sandbox)?;
-        self.validate_user_volume_mounts_available(sandbox).await?;
-        let selected_default_gpu = self.peek_default_gpu_device(sandbox)?;
-        let _ = build_container_create_body_with_default(
+        self.validate_user_volume_mounts_available(&validated.driver_config)
+            .await?;
+        let gpu_devices = self
+            .resolve_gpu_cdi_devices(
+                validated.gpu_requirements,
+                &validated.driver_config,
+                CdiGpuDefaultSelector::peek_device_ids,
+            )
+            .await?;
+        let _ = build_container_create_body_with_gpu_devices(
             sandbox,
             &self.config,
-            selected_default_gpu.as_deref(),
+            &validated.driver_config,
+            gpu_devices.as_deref(),
         )?;
 
         if self
@@ -714,11 +732,10 @@ impl DockerComputeDriver {
         &self,
         sandbox: &DriverSandbox,
     ) -> Result<(), DockerProvisioningFailure> {
-        let template = sandbox
-            .spec
-            .as_ref()
-            .and_then(|spec| spec.template.as_ref())
-            .expect("validated sandbox has template");
+        let validated = Self::validated_sandbox(sandbox, &self.config).map_err(|status| {
+            DockerProvisioningFailure::new("ContainerCreateFailed", status.message())
+        })?;
+        let template = validated.template;
         self.ensure_image_available(&sandbox.id, &template.image)
             .await
             .map_err(|status| {
@@ -731,16 +748,24 @@ impl DockerComputeDriver {
             })?;
 
         let container_name = container_name_for_sandbox(sandbox);
-        let selected_default_gpu = self.next_default_gpu_device(sandbox).map_err(|status| {
-            if token_file_created {
-                cleanup_sandbox_token_file(sandbox, &self.config);
-            }
-            DockerProvisioningFailure::new("ContainerCreateFailed", status.message())
-        })?;
-        let create_body = build_container_create_body_with_default(
+        let gpu_devices = self
+            .resolve_gpu_cdi_devices(
+                validated.gpu_requirements,
+                &validated.driver_config,
+                CdiGpuDefaultSelector::next_device_ids,
+            )
+            .await
+            .map_err(|status| {
+                if token_file_created {
+                    cleanup_sandbox_token_file(sandbox, &self.config);
+                }
+                DockerProvisioningFailure::new("ContainerCreateFailed", status.message())
+            })?;
+        let create_body = build_container_create_body_with_gpu_devices(
             sandbox,
             &self.config,
-            selected_default_gpu.as_deref(),
+            &validated.driver_config,
+            gpu_devices.as_deref(),
         )
         .map_err(|status| {
             if token_file_created {
@@ -1377,9 +1402,16 @@ impl ComputeDriver for DockerComputeDriver {
             .into_inner()
             .sandbox
             .ok_or_else(|| Status::invalid_argument("sandbox is required"))?;
-        Self::validate_sandbox(&sandbox, &self.config)?;
-        self.validate_user_volume_mounts_available(&sandbox).await?;
-        let _ = self.peek_default_gpu_device(&sandbox)?;
+        let validated = Self::validated_sandbox(&sandbox, &self.config)?;
+        self.validate_user_volume_mounts_available(&validated.driver_config)
+            .await?;
+        let _ = self
+            .resolve_gpu_cdi_devices(
+                validated.gpu_requirements,
+                &validated.driver_config,
+                CdiGpuDefaultSelector::peek_device_ids,
+            )
+            .await?;
         Ok(Response::new(ValidateSandboxCreateResponse {}))
     }
 
@@ -1720,6 +1752,7 @@ fn attach_docker_progress_metadata(
     }
 }
 
+#[cfg(test)]
 fn docker_driver_config(
     template: &DriverSandboxTemplate,
     enable_bind_mounts: bool,
@@ -1730,11 +1763,7 @@ fn docker_driver_config(
     Ok(config)
 }
 
-fn docker_driver_mounts(
-    template: &DriverSandboxTemplate,
-    enable_bind_mounts: bool,
-) -> Result<Vec<Mount>, Status> {
-    let config = docker_driver_config(template, enable_bind_mounts)?;
+fn docker_driver_mounts(config: &DockerSandboxDriverConfig) -> Result<Vec<Mount>, Status> {
     config.mounts.iter().map(docker_mount_from_config).collect()
 }
 
@@ -2211,49 +2240,40 @@ fn docker_gpu_selection_status(err: CdiGpuSelectionError) -> Status {
     Status::failed_precondition(err.to_string())
 }
 
-fn build_device_requests(
-    sandbox: &DriverSandbox,
-    selected_default_device: Option<&str>,
-) -> Result<Option<Vec<DeviceRequest>>, Status> {
-    let Some(spec) = sandbox.spec.as_ref() else {
-        return Ok(None);
-    };
-    let cdi_devices = DockerSandboxDriverConfig::from_sandbox(sandbox)
-        .map_err(Status::invalid_argument)?
-        .cdi_devices
-        .unwrap_or_default();
-    let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
-    if gpu_requirements.is_none() && !cdi_devices.is_empty() {
-        return Err(Status::invalid_argument(
-            "driver_config.cdi_devices requires gpu=true",
-        ));
-    }
-
-    cdi_gpu_device_ids(gpu_requirements, &cdi_devices, selected_default_device)
-        .map(|device_ids| {
-            device_ids.map(|device_ids| {
-                vec![DeviceRequest {
-                    driver: Some("cdi".to_string()),
-                    device_ids: Some(device_ids),
-                    ..Default::default()
-                }]
-            })
-        })
-        .map_err(docker_gpu_selection_status)
-}
-
 #[cfg(test)]
 fn build_container_create_body(
     sandbox: &DriverSandbox,
     config: &DockerDriverRuntimeConfig,
 ) -> Result<ContainerCreateBody, Status> {
-    build_container_create_body_with_default(sandbox, config, None)
+    let template = sandbox
+        .spec
+        .as_ref()
+        .and_then(|spec| spec.template.as_ref())
+        .ok_or_else(|| Status::invalid_argument("sandbox.spec.template is required"))?;
+    let driver_config = docker_driver_config(template, config.enable_bind_mounts)?;
+    let gpu_requirements = sandbox
+        .spec
+        .as_ref()
+        .and_then(|spec| driver_gpu_requirements(spec.resource_requirements.as_ref()));
+    let cdi_devices = if let Some(cdi_devices) = driver_config.cdi_devices.as_ref() {
+        validate_specific_gpu_device_request(
+            gpu_requirements,
+            cdi_devices,
+            "driver_config.cdi_devices",
+        )
+        .map_err(Status::invalid_argument)?;
+        Some(cdi_devices.as_slice())
+    } else {
+        None
+    };
+    build_container_create_body_with_gpu_devices(sandbox, config, &driver_config, cdi_devices)
 }
 
-fn build_container_create_body_with_default(
+fn build_container_create_body_with_gpu_devices(
     sandbox: &DriverSandbox,
     config: &DockerDriverRuntimeConfig,
-    selected_default_device: Option<&str>,
+    driver_config: &DockerSandboxDriverConfig,
+    gpu_device_ids: Option<&[String]>,
 ) -> Result<ContainerCreateBody, Status> {
     let spec = sandbox
         .spec
@@ -2264,8 +2284,14 @@ fn build_container_create_body_with_default(
         .as_ref()
         .ok_or_else(|| Status::invalid_argument("sandbox.spec.template is required"))?;
     let resource_limits = docker_resource_limits(template)?;
-    let user_mounts = docker_driver_mounts(template, config.enable_bind_mounts)?;
-    let device_requests = build_device_requests(sandbox, selected_default_device)?;
+    let user_mounts = docker_driver_mounts(driver_config)?;
+    let device_requests = gpu_device_ids.map(|device_ids| {
+        vec![DeviceRequest {
+            driver: Some("cdi".to_string()),
+            device_ids: Some(device_ids.to_vec()),
+            ..Default::default()
+        }]
+    });
     let mut labels = template.labels.clone();
     labels.insert(
         LABEL_MANAGED_BY.to_string(),
diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs
index a48b82f74..4f5b26617 100644
--- a/crates/openshell-driver-docker/src/tests.rs
+++ b/crates/openshell-driver-docker/src/tests.rs
@@ -80,9 +80,9 @@ fn list_string_driver_config(field: &str, values: &[&str]) -> prost_types::Struc
     }
 }
 
-fn gpu_resources() -> ResourceRequirements {
+fn gpu_resources(count: Option<u32>) -> ResourceRequirements {
     ResourceRequirements {
-        gpu: Some(GpuResourceRequirements {}),
+        gpu: Some(GpuResourceRequirements { count }),
     }
 }
 
@@ -111,7 +111,6 @@ fn runtime_config() -> DockerDriverRuntimeConfig {
         }),
         daemon_version: "28.0.0".to_string(),
         supports_gpu: false,
-        cdi_gpu_inventory: CdiGpuInventory::default(),
         allow_all_default_gpu: false,
         sandbox_pids_limit: DEFAULT_SANDBOX_PIDS_LIMIT,
         enable_bind_mounts: false,
@@ -179,6 +178,7 @@ impl SupervisorReadiness for DisconnectedSupervisorReadiness {
 }
 
 fn test_driver_with_config(config: DockerDriverRuntimeConfig) -> DockerComputeDriver {
+    let allow_all_default_gpu = config.allow_all_default_gpu;
     DockerComputeDriver {
         docker: Arc::new(
             Docker::connect_with_http("http://127.0.0.1:2375", 1, bollard::API_DEFAULT_VERSION)
@@ -188,7 +188,10 @@ fn test_driver_with_config(config: DockerDriverRuntimeConfig) -> DockerComputeDr
         events: broadcast::channel(WATCH_BUFFER).0,
         pending: Arc::new(tokio::sync::Mutex::new(HashMap::new())),
         supervisor_readiness: Arc::new(DisconnectedSupervisorReadiness),
-        gpu_selector: Arc::new(CdiGpuRoundRobin::new()),
+        gpu_selector: Arc::new(CdiGpuDefaultSelector::new(
+            CdiGpuInventory::default(),
+            allow_all_default_gpu,
+        )),
     }
 }
 
@@ -1052,7 +1055,21 @@ fn build_container_create_body_clears_inherited_cmd() {
 fn validate_sandbox_rejects_gpu_when_cdi_unavailable() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
-    sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources());
+    sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources(None));
+
+    let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
+
+    assert_eq!(err.code(), tonic::Code::FailedPrecondition);
+    assert!(err.message().contains("Docker CDI"));
+}
+
+#[test]
+fn validate_sandbox_rejects_missing_gpu_support_before_request_shape() {
+    let config = runtime_config();
+    let mut sandbox = test_sandbox();
+    let spec = sandbox.spec.as_mut().unwrap();
+    spec.resource_requirements = Some(gpu_resources(Some(2)));
+    spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&["nvidia.com/gpu=0"]));
 
     let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
 
@@ -1065,7 +1082,7 @@ fn validate_sandbox_rejects_invalid_cdi_devices_before_gpu_capability() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.resource_requirements = Some(gpu_resources());
+    spec.resource_requirements = Some(gpu_resources(None));
     spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&[]));
 
     let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
@@ -1080,7 +1097,7 @@ fn validate_sandbox_rejects_unknown_driver_config_fields() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.resource_requirements = Some(gpu_resources());
+    spec.resource_requirements = Some(gpu_resources(None));
     spec.template.as_mut().unwrap().driver_config =
         Some(cdi_device_typo_config(&["nvidia.com/gpu=0"]));
 
@@ -1090,11 +1107,111 @@ fn validate_sandbox_rejects_unknown_driver_config_fields() {
     assert!(err.message().contains("unknown field"));
 }
 
+#[test]
+fn validate_sandbox_accepts_gpu_count_request_shape() {
+    let mut config = runtime_config();
+    config.supports_gpu = true;
+    let mut sandbox = test_sandbox();
+    sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources(Some(2)));
+
+    DockerComputeDriver::validate_sandbox(&sandbox, &config)
+        .expect("default GPU count shape should be accepted before inventory selection");
+}
+
+#[test]
+fn validate_sandbox_accepts_gpu_count_matching_cdi_devices() {
+    let mut config = runtime_config();
+    config.supports_gpu = true;
+    let mut sandbox = test_sandbox();
+    let spec = sandbox.spec.as_mut().unwrap();
+    spec.resource_requirements = Some(gpu_resources(Some(2)));
+    spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&[
+        "nvidia.com/gpu=0",
+        "nvidia.com/gpu=1",
+    ]));
+
+    DockerComputeDriver::validate_sandbox(&sandbox, &config)
+        .expect("matching explicit CDI device count should be accepted");
+}
+
+#[test]
+fn validate_sandbox_accepts_single_cdi_device_without_gpu_count() {
+    let mut config = runtime_config();
+    config.supports_gpu = true;
+    let mut sandbox = test_sandbox();
+    let spec = sandbox.spec.as_mut().unwrap();
+    spec.resource_requirements = Some(gpu_resources(None));
+    spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&["nvidia.com/gpu=0"]));
+
+    DockerComputeDriver::validate_sandbox(&sandbox, &config)
+        .expect("single exact CDI device should be compatible with a default GPU request");
+}
+
+#[test]
+fn validate_sandbox_rejects_multiple_cdi_devices_without_gpu_count() {
+    let mut config = runtime_config();
+    config.supports_gpu = true;
+    let mut sandbox = test_sandbox();
+    let spec = sandbox.spec.as_mut().unwrap();
+    spec.resource_requirements = Some(gpu_resources(None));
+    spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&[
+        "nvidia.com/gpu=0",
+        "nvidia.com/gpu=1",
+    ]));
+
+    let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
+
+    assert_eq!(err.code(), tonic::Code::InvalidArgument);
+    assert!(
+        err.message()
+            .contains("gpu count (1) must match driver_config.cdi_devices length (2)")
+    );
+}
+
+#[test]
+fn validate_sandbox_rejects_cdi_devices_without_gpu_request() {
+    let mut config = runtime_config();
+    config.supports_gpu = true;
+    let mut sandbox = test_sandbox();
+    sandbox
+        .spec
+        .as_mut()
+        .unwrap()
+        .template
+        .as_mut()
+        .unwrap()
+        .driver_config = Some(cdi_devices_config(&["nvidia.com/gpu=0"]));
+
+    let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
+
+    assert_eq!(err.code(), tonic::Code::InvalidArgument);
+    assert!(err.message().contains("requires a gpu request"));
+}
+
+#[test]
+fn validate_sandbox_rejects_gpu_count_mismatched_cdi_devices() {
+    let mut config = runtime_config();
+    config.supports_gpu = true;
+    let mut sandbox = test_sandbox();
+    let spec = sandbox.spec.as_mut().unwrap();
+    spec.resource_requirements = Some(gpu_resources(Some(2)));
+    spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&["nvidia.com/gpu=0"]));
+
+    let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err();
+
+    assert_eq!(err.code(), tonic::Code::InvalidArgument);
+    assert!(
+        err.message()
+            .contains("gpu count (2) must match driver_config.cdi_devices length (1)")
+    );
+}
+
+#[test]
 fn validate_sandbox_rejects_template_errors_before_device_config() {
     let config = runtime_config();
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.resource_requirements = Some(gpu_resources());
+    spec.resource_requirements = Some(gpu_resources(None));
     let template = spec.template.as_mut().unwrap();
     template.agent_socket_path = "/tmp/agent.sock".to_string();
     template.driver_config = Some(cdi_devices_config(&[]));
@@ -1132,11 +1249,17 @@ fn build_container_create_body_maps_default_gpu_to_selected_cdi_device() {
     let mut config = runtime_config();
     config.supports_gpu = true;
     let mut sandbox = test_sandbox();
-    sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources());
-
-    let create_body =
-        build_container_create_body_with_default(&sandbox, &config, Some("nvidia.com/gpu=1"))
-            .unwrap();
+    sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources(None));
+
+    let driver_config = DockerSandboxDriverConfig::default();
+    let gpu_devices = vec!["nvidia.com/gpu=1".to_string()];
+    let create_body = build_container_create_body_with_gpu_devices(
+        &sandbox,
+        &config,
+        &driver_config,
+        Some(&gpu_devices),
+    )
+    .unwrap();
     let request = create_body
         .host_config
         .as_ref()
@@ -1152,19 +1275,20 @@ fn build_container_create_body_maps_default_gpu_to_selected_cdi_device() {
 }
 
 #[test]
-fn build_container_create_body_rejects_missing_default_cdi_device() {
+fn build_container_create_body_omits_devices_without_resolved_default_cdi_devices() {
     let mut config = runtime_config();
     config.supports_gpu = true;
     let mut sandbox = test_sandbox();
-    sandbox.spec.as_mut().unwrap().gpu = true;
+    sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources(None));
 
-    let err = build_container_create_body(&sandbox, &config).unwrap_err();
+    let create_body = build_container_create_body(&sandbox, &config).unwrap();
 
-    assert_eq!(err.code(), tonic::Code::FailedPrecondition);
     assert!(
-        err.message().contains("selected default CDI GPU device"),
-        "unexpected error: {}",
-        err.message()
+        create_body
+            .host_config
+            .as_ref()
+            .and_then(|host_config| host_config.device_requests.as_ref())
+            .is_none()
     );
 }
 
@@ -1174,7 +1298,7 @@ fn build_container_create_body_passes_explicit_cdi_device_id_through() {
     config.supports_gpu = true;
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.resource_requirements = Some(gpu_resources());
+    spec.resource_requirements = Some(gpu_resources(None));
     spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&["nvidia.com/gpu=0"]));
 
     let create_body = build_container_create_body(&sandbox, &config).unwrap();
@@ -1193,7 +1317,25 @@ fn build_container_create_body_passes_explicit_cdi_device_id_through() {
 }
 
 #[test]
-fn build_container_create_body_rejects_cdi_devices_without_gpu() {
+fn build_container_create_body_rejects_gpu_count_mismatched_cdi_devices() {
+    let mut config = runtime_config();
+    config.supports_gpu = true;
+    let mut sandbox = test_sandbox();
+    let spec = sandbox.spec.as_mut().unwrap();
+    spec.resource_requirements = Some(gpu_resources(Some(2)));
+    spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&["nvidia.com/gpu=0"]));
+
+    let err = build_container_create_body(&sandbox, &config).unwrap_err();
+
+    assert_eq!(err.code(), tonic::Code::InvalidArgument);
+    assert!(
+        err.message()
+            .contains("gpu count (2) must match driver_config.cdi_devices length (1)")
+    );
+}
+
+#[test]
+fn build_container_create_body_rejects_cdi_devices_without_gpu_request() {
     let mut sandbox = test_sandbox();
     sandbox
         .spec
@@ -1206,14 +1348,14 @@ fn build_container_create_body_rejects_cdi_devices_without_gpu() {
 
     let err = build_container_create_body(&sandbox, &runtime_config()).unwrap_err();
     assert_eq!(err.code(), tonic::Code::InvalidArgument);
-    assert!(err.message().contains("requires gpu=true"));
+    assert!(err.message().contains("requires a gpu request"));
 }
 
 #[test]
 fn build_container_create_body_rejects_empty_cdi_devices() {
     let mut sandbox = test_sandbox();
     let spec = sandbox.spec.as_mut().unwrap();
-    spec.resource_requirements = Some(gpu_resources());
+    spec.resource_requirements = Some(gpu_resources(None));
     spec.template.as_mut().unwrap().driver_config = Some(cdi_devices_config(&[]));
 
     let err = build_container_create_body(&sandbox, &runtime_config()).unwrap_err();
@@ -1225,40 +1367,46 @@ fn build_container_create_body_rejects_empty_cdi_devices() {
 fn driver_default_gpu_selection_consumes_distinct_devices_for_creates() {
     let mut config = runtime_config();
     config.supports_gpu = true;
-    config.cdi_gpu_inventory = CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]);
     let driver = test_driver_with_config(config);
+    driver.gpu_selector.refresh(
+        CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]),
+        false,
+    );
     let mut first_sandbox = test_sandbox();
     first_sandbox.id = "sbx-first".to_string();
     first_sandbox.name = "first".to_string();
-    first_sandbox.spec.as_mut().unwrap().gpu = true;
+    first_sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources(None));
     let mut second_sandbox = test_sandbox();
     second_sandbox.id = "sbx-second".to_string();
     second_sandbox.name = "second".to_string();
-    second_sandbox.spec.as_mut().unwrap().gpu = true;
+    second_sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resources(None));
 
     DockerComputeDriver::validate_sandbox(&first_sandbox, &driver.config).unwrap();
     assert_eq!(
-        driver.peek_default_gpu_device(&first_sandbox).unwrap(),
-        Some("nvidia.com/gpu=0".to_string())
+        driver.gpu_selector.peek_device_ids(1).unwrap(),
+        vec!["nvidia.com/gpu=0".to_string()]
     );
-    let first_device = driver.next_default_gpu_device(&first_sandbox).unwrap();
-    let first_create_body = build_container_create_body_with_default(
+    let first_devices = driver.gpu_selector.next_device_ids(1).unwrap();
+    let driver_config = DockerSandboxDriverConfig::default();
+    let first_create_body = build_container_create_body_with_gpu_devices(
         &first_sandbox,
         &driver.config,
-        first_device.as_deref(),
+        &driver_config,
+        Some(&first_devices),
     )
     .unwrap();
 
     DockerComputeDriver::validate_sandbox(&second_sandbox, &driver.config).unwrap();
     assert_eq!(
-        driver.peek_default_gpu_device(&second_sandbox).unwrap(),
-        Some("nvidia.com/gpu=1".to_string())
+        driver.gpu_selector.peek_device_ids(1).unwrap(),
+        vec!["nvidia.com/gpu=1".to_string()]
     );
-    let second_device = driver.next_default_gpu_device(&second_sandbox).unwrap();
-    let second_create_body = build_container_create_body_with_default(
+    let second_devices = driver.gpu_selector.next_device_ids(1).unwrap();
+    let second_create_body = build_container_create_body_with_gpu_devices(
         &second_sandbox,
         &driver.config,
-        second_device.as_deref(),
+        &driver_config,
+        Some(&second_devices),
     )
     .unwrap();
 
diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs
index d634cea9a..a32034f3a 100644
--- a/crates/openshell-driver-kubernetes/src/driver.rs
+++ b/crates/openshell-driver-kubernetes/src/driver.rs
@@ -17,7 +17,7 @@ use kube::{Client, Error as KubeError};
 use openshell_core::driver_utils::{
     LABEL_MANAGED_BY, LABEL_MANAGED_BY_VALUE, LABEL_SANDBOX_ID, SUPERVISOR_IMAGE_BINARY_PATH,
 };
-use openshell_core::gpu::driver_gpu_requirements;
+use openshell_core::gpu::{driver_gpu_requirements, effective_driver_gpu_count};
 use openshell_core::progress::{
     PROGRESS_STEP_PULLING_IMAGE, PROGRESS_STEP_REQUESTING_SANDBOX, PROGRESS_STEP_STARTING_SANDBOX,
     format_bytes, mark_progress_active, mark_progress_complete, mark_progress_detail,
@@ -84,7 +84,6 @@ const SANDBOX_VERSION: &str = "v1alpha1";
 pub const SANDBOX_KIND: &str = "Sandbox";
 
 const GPU_RESOURCE_NAME: &str = "nvidia.com/gpu";
-const GPU_RESOURCE_QUANTITY: &str = "1";
 const SPIFFE_WORKLOAD_API_VOLUME_NAME: &str = "spiffe-workload-api";
 
 // This POC treats the selected Struct as a driver-local typed schema. Once the
@@ -652,8 +651,10 @@ impl KubernetesComputeDriver {
 }
 
 fn validate_gpu_request(
-    _gpu_requirements: Option<&GpuResourceRequirements>,
+    gpu_requirements: Option<&GpuResourceRequirements>,
 ) -> Result<(), tonic::Status> {
+    let _ =
+        effective_driver_gpu_count(gpu_requirements).map_err(tonic::Status::invalid_argument)?;
     Ok(())
 }
 
@@ -1279,7 +1280,7 @@ fn sandbox_template_to_k8s(
     inject_workspace: bool,
     params: &SandboxPodParams<'_>,
 ) -> serde_json::Value {
-    let gpu_requirements = gpu.then_some(GpuResourceRequirements {});
+    let gpu_requirements = gpu.then_some(GpuResourceRequirements { count: None });
     sandbox_template_to_k8s_with_gpu_requirements(
         template,
         gpu_requirements.as_ref(),
@@ -1697,8 +1698,9 @@ fn container_resources(
         apply("requests", "memory", memory_request);
     }
 
-    if gpu_requirements.is_some() {
-        apply_gpu_limit(&mut resources, GPU_RESOURCE_QUANTITY);
+    if let Some(gpu) = gpu_requirements {
+        let quantity = gpu.count.unwrap_or(1).to_string();
+        apply_gpu_limit(&mut resources, &quantity);
     }
     if resources.as_object().is_some_and(serde_json::Map::is_empty) {
         None
@@ -1970,7 +1972,7 @@ mod tests {
         PROGRESS_ACTIVE_DETAIL_KEY, PROGRESS_ACTIVE_STEP_KEY, PROGRESS_COMPLETE_LABEL_KEY,
         PROGRESS_COMPLETE_STEP_KEY,
     };
-    use openshell_core::proto::compute::v1::GpuResourceRequirements;
+    use openshell_core::proto::compute::v1::{GpuResourceRequirements, ResourceRequirements};
     use prost_types::{Struct, Value, value::Kind};
 
     static ENV_LOCK: std::sync::LazyLock<std::sync::Mutex<()>> =
@@ -2060,6 +2062,28 @@ mod tests {
         assert!(err.contains("gpu_device_ids"));
     }
 
+    #[test]
+    fn validate_rejects_zero_gpu_count() {
+        let sandbox = Sandbox {
+            spec: Some(SandboxSpec {
+                resource_requirements: Some(ResourceRequirements {
+                    gpu: Some(GpuResourceRequirements { count: Some(0) }),
+                }),
+                ..SandboxSpec::default()
+            }),
+            ..Sandbox::default()
+        };
+
+        let gpu_requirements = sandbox
+            .spec
+            .as_ref()
+            .and_then(|spec| driver_gpu_requirements(spec.resource_requirements.as_ref()));
+        let err = validate_gpu_request(gpu_requirements).unwrap_err();
+        assert_eq!(err.code(), tonic::Code::InvalidArgument);
+        assert!(err.message().contains("gpu count must be greater than 0"));
+    }
+
+    #[test]
     fn kube_pulling_event_adds_image_progress_metadata() {
         let mut metadata = std::collections::HashMap::new();
 
@@ -2384,10 +2408,31 @@ mod tests {
         );
         assert_eq!(
             pod_template["spec"]["containers"][0]["resources"]["limits"][GPU_RESOURCE_NAME],
-            serde_json::json!(GPU_RESOURCE_QUANTITY)
+            serde_json::json!("1")
         );
     }
 
+    #[test]
+    fn gpu_count_sandbox_adds_requested_gpu_limit() {
+        let pod_template = {
+            let params = SandboxPodParams::default();
+            let gpu_requirements = GpuResourceRequirements { count: Some(2) };
+            sandbox_template_to_k8s_with_gpu_requirements(
+                &SandboxTemplate::default(),
+                Some(&gpu_requirements),
+                &std::collections::HashMap::new(),
+                true,
+                &params,
+            )
+        };
+
+        assert_eq!(
+            pod_template["spec"]["containers"][0]["resources"]["limits"][GPU_RESOURCE_NAME],
+            serde_json::json!("2")
+        );
+    }
+
+    #[test]
     fn gpu_sandbox_uses_template_runtime_class_name_when_set() {
         let template = SandboxTemplate {
             platform_config: Some(Struct {
@@ -2649,10 +2694,7 @@ mod tests {
 
         let limits = &pod_template["spec"]["containers"][0]["resources"]["limits"];
         assert_eq!(limits["cpu"], serde_json::json!("2"));
-        assert_eq!(
-            limits[GPU_RESOURCE_NAME],
-            serde_json::json!(GPU_RESOURCE_QUANTITY)
-        );
+        assert_eq!(limits[GPU_RESOURCE_NAME], serde_json::json!("1"));
     }
 
     #[test]
diff --git a/crates/openshell-driver-podman/README.md b/crates/openshell-driver-podman/README.md
index 1291dbc29..c484bf8a4 100644
--- a/crates/openshell-driver-podman/README.md
+++ b/crates/openshell-driver-podman/README.md
@@ -46,7 +46,7 @@ The container spec in `container.rs` sets these security-critical fields:
 | `no_new_privileges` | `true` | Prevents privilege escalation after exec. |
 | `seccomp_profile_path` | `unconfined` | The supervisor installs its own policy-aware BPF filter. A container-level profile can block Landlock/seccomp syscalls during setup. |
 | `mounts` | Private tmpfs at `/run/netns` | Lets the supervisor create named network namespaces in rootless Podman. |
-| CDI GPU devices | `driver_config.cdi_devices` when set, otherwise one concrete NVIDIA CDI GPU. Local `/dev/dxg` permits `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback. | Exposes requested GPUs to GPU-enabled sandbox containers. Counted GPU requests are rejected. |
+| CDI GPU devices | Opaque `driver_config.cdi_devices` values when set, otherwise the requested count of NVIDIA CDI GPUs selected in round-robin order. Local `/dev/dxg` permits `nvidia.com/gpu=all` as a WSL2 all-only compatibility fallback, where it counts as one selectable device. | Exposes requested GPUs to GPU-enabled sandbox containers. Exact CDI device lists must not contain duplicates and must match the effective GPU count. |
 
 The restricted agent child does not retain these supervisor privileges.
 
diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs
index 348ffe265..16814784a 100644
--- a/crates/openshell-driver-podman/src/container.rs
+++ b/crates/openshell-driver-podman/src/container.rs
@@ -5,7 +5,8 @@
 
 use crate::config::PodmanComputeConfig;
 use openshell_core::ComputeDriverError;
-use openshell_core::gpu::{cdi_gpu_device_ids, driver_gpu_requirements};
+#[cfg(test)]
+use openshell_core::gpu::{driver_gpu_requirements, validate_specific_gpu_device_request};
 use openshell_core::proto::compute::v1::{DriverSandbox, DriverSandboxTemplate};
 use openshell_core::proto_struct::deserialize_optional_non_empty_string_list;
 use openshell_core::{driver_mounts, proto_struct};
@@ -476,36 +477,6 @@ fn podman_pids_limit(value: i64) -> Option<i64> {
     if value > 0 { Some(value) } else { None }
 }
 
-/// Build CDI GPU device list if GPU is requested.
-fn build_devices(
-    sandbox: &DriverSandbox,
-    selected_default_device: Option<&str>,
-) -> Result<Option<Vec<LinuxDevice>>, ComputeDriverError> {
-    let Some(spec) = sandbox.spec.as_ref() else {
-        return Ok(None);
-    };
-    let cdi_devices = PodmanSandboxDriverConfig::from_sandbox(sandbox)?
-        .cdi_devices
-        .unwrap_or_default();
-    let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
-    if gpu_requirements.is_none() && !cdi_devices.is_empty() {
-        return Err(ComputeDriverError::InvalidArgument(
-            "driver_config.cdi_devices requires gpu=true".to_string(),
-        ));
-    }
-
-    cdi_gpu_device_ids(gpu_requirements, &cdi_devices, selected_default_device)
-        .map(|device_ids| {
-            device_ids.map(|device_ids| {
-                device_ids
-                    .into_iter()
-                    .map(|path| LinuxDevice { path })
-                    .collect()
-            })
-        })
-        .map_err(|err| ComputeDriverError::Precondition(err.to_string()))
-}
-
 pub fn podman_driver_volume_mount_sources(
     sandbox: &DriverSandbox,
     enable_bind_mounts: bool,
@@ -796,14 +767,30 @@ pub fn try_build_container_spec_with_token(
     config: &PodmanComputeConfig,
     token_host_path: Option<&Path>,
 ) -> Result<Value, ComputeDriverError> {
-    build_container_spec_with_token_and_gpu_default(sandbox, config, token_host_path, None)
+    let driver_config = PodmanSandboxDriverConfig::from_sandbox(sandbox)?;
+    let gpu_requirements = sandbox
+        .spec
+        .as_ref()
+        .and_then(|spec| driver_gpu_requirements(spec.resource_requirements.as_ref()));
+    let cdi_devices = if let Some(cdi_devices) = driver_config.cdi_devices.as_ref() {
+        validate_specific_gpu_device_request(
+            gpu_requirements,
+            cdi_devices,
+            "driver_config.cdi_devices",
+        )
+        .map_err(ComputeDriverError::InvalidArgument)?;
+        Some(cdi_devices.as_slice())
+    } else {
+        None
+    };
+    build_container_spec_with_token_and_gpu_devices(sandbox, config, token_host_path, cdi_devices)
 }
 
-pub fn build_container_spec_with_token_and_gpu_default(
+pub fn build_container_spec_with_token_and_gpu_devices(
     sandbox: &DriverSandbox,
     config: &PodmanComputeConfig,
     token_host_path: Option<&Path>,
-    selected_default_device: Option<&str>,
+    gpu_device_ids: Option<&[String]>,
 ) -> Result<Value, ComputeDriverError> {
     let image = resolve_image(sandbox, config);
     let name = container_name(&sandbox.name);
@@ -814,7 +801,13 @@ pub fn build_container_spec_with_token_and_gpu_default(
     let resource_limits = build_resource_limits(sandbox, config);
     let user_mounts = podman_user_mounts(sandbox, config.enable_bind_mounts)
         .map_err(ComputeDriverError::InvalidArgument)?;
-    let devices = build_devices(sandbox, selected_default_device)?;
+    let devices = gpu_device_ids.map(|device_ids| {
+        device_ids
+            .iter()
+            .cloned()
+            .map(|path| LinuxDevice { path })
+            .collect()
+    });
 
     // Network configuration -- always bridge mode.
     // Matches libpod's network spec format `{name: {opts}}`; the unit-struct
@@ -1150,9 +1143,9 @@ mod tests {
         }
     }
 
-    fn gpu_resources() -> ResourceRequirements {
+    fn gpu_resources(count: Option<u32>) -> ResourceRequirements {
         ResourceRequirements {
-            gpu: Some(GpuResourceRequirements {}),
+            gpu: Some(GpuResourceRequirements { count }),
         }
     }
 
@@ -1268,15 +1261,16 @@ mod tests {
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            resource_requirements: Some(gpu_resources()),
+            resource_requirements: Some(gpu_resources(None)),
             ..Default::default()
         });
         let config = test_config();
-        let spec = build_container_spec_with_token_and_gpu_default(
+        let gpu_devices = vec!["nvidia.com/gpu=1".to_string()];
+        let spec = build_container_spec_with_token_and_gpu_devices(
             &sandbox,
             &config,
             None,
-            Some("nvidia.com/gpu=1"),
+            Some(&gpu_devices),
         )
         .unwrap();
 
@@ -1287,21 +1281,20 @@ mod tests {
     }
 
     #[test]
-    fn container_spec_rejects_missing_default_cdi_device() {
+    fn container_spec_omits_devices_without_resolved_default_cdi_devices() {
         use openshell_core::proto::compute::v1::DriverSandboxSpec;
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            gpu: true,
+            resource_requirements: Some(gpu_resources(None)),
             ..Default::default()
         });
         let config = test_config();
 
-        let err = build_container_spec_with_token_and_gpu_default(&sandbox, &config, None, None)
-            .unwrap_err();
+        let spec =
+            build_container_spec_with_token_and_gpu_devices(&sandbox, &config, None, None).unwrap();
 
-        assert!(matches!(err, ComputeDriverError::Precondition(_)));
-        assert!(err.to_string().contains("selected default CDI GPU device"));
+        assert!(spec.get("devices").is_none());
     }
 
     #[test]
@@ -1310,7 +1303,7 @@ mod tests {
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            resource_requirements: Some(gpu_resources()),
+            resource_requirements: Some(gpu_resources(None)),
             template: Some(DriverSandboxTemplate {
                 driver_config: Some(cdi_devices_config(&["nvidia.com/gpu=0"])),
                 ..Default::default()
@@ -1327,7 +1320,60 @@ mod tests {
     }
 
     #[test]
-    fn container_spec_rejects_cdi_devices_without_gpu() {
+    fn container_spec_accepts_gpu_count_matching_cdi_devices() {
+        use openshell_core::proto::compute::v1::{DriverSandboxSpec, DriverSandboxTemplate};
+
+        let mut sandbox = test_sandbox("test-id", "test-name");
+        sandbox.spec = Some(DriverSandboxSpec {
+            resource_requirements: Some(gpu_resources(Some(2))),
+            template: Some(DriverSandboxTemplate {
+                driver_config: Some(cdi_devices_config(&[
+                    "nvidia.com/gpu=0",
+                    "nvidia.com/gpu=1",
+                ])),
+                ..Default::default()
+            }),
+            ..Default::default()
+        });
+        let config = test_config();
+        let spec = build_container_spec(&sandbox, &config);
+
+        assert_eq!(spec["devices"].as_array().map(Vec::len), Some(2));
+        assert_eq!(
+            spec["devices"][0]["path"].as_str(),
+            Some("nvidia.com/gpu=0")
+        );
+        assert_eq!(
+            spec["devices"][1]["path"].as_str(),
+            Some("nvidia.com/gpu=1")
+        );
+    }
+
+    #[test]
+    fn container_spec_rejects_gpu_count_mismatched_cdi_devices() {
+        use openshell_core::proto::compute::v1::{DriverSandboxSpec, DriverSandboxTemplate};
+
+        let mut sandbox = test_sandbox("test-id", "test-name");
+        sandbox.spec = Some(DriverSandboxSpec {
+            resource_requirements: Some(gpu_resources(Some(2))),
+            template: Some(DriverSandboxTemplate {
+                driver_config: Some(cdi_devices_config(&["nvidia.com/gpu=0"])),
+                ..Default::default()
+            }),
+            ..Default::default()
+        });
+        let config = test_config();
+
+        let err = try_build_container_spec_with_token(&sandbox, &config, None).unwrap_err();
+        assert!(matches!(err, ComputeDriverError::InvalidArgument(_)));
+        assert!(
+            err.to_string()
+                .contains("gpu count (2) must match driver_config.cdi_devices length (1)")
+        );
+    }
+
+    #[test]
+    fn container_spec_rejects_cdi_devices_without_gpu_request() {
         use openshell_core::proto::compute::v1::{DriverSandboxSpec, DriverSandboxTemplate};
 
         let mut sandbox = test_sandbox("test-id", "test-name");
@@ -1342,7 +1388,7 @@ mod tests {
 
         let err = try_build_container_spec_with_token(&sandbox, &config, None).unwrap_err();
         assert!(matches!(err, ComputeDriverError::InvalidArgument(_)));
-        assert!(err.to_string().contains("requires gpu=true"));
+        assert!(err.to_string().contains("requires a gpu request"));
     }
 
     #[test]
@@ -1351,7 +1397,7 @@ mod tests {
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            resource_requirements: Some(gpu_resources()),
+            resource_requirements: Some(gpu_resources(None)),
             template: Some(DriverSandboxTemplate {
                 driver_config: Some(cdi_devices_config(&[])),
                 ..Default::default()
@@ -1371,7 +1417,7 @@ mod tests {
 
         let mut sandbox = test_sandbox("test-id", "test-name");
         sandbox.spec = Some(DriverSandboxSpec {
-            resource_requirements: Some(gpu_resources()),
+            resource_requirements: Some(gpu_resources(None)),
             template: Some(DriverSandboxTemplate {
                 driver_config: Some(cdi_device_typo_config(&["nvidia.com/gpu=0"])),
                 ..Default::default()
diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs
index 927c02c28..27d510281 100644
--- a/crates/openshell-driver-podman/src/driver.rs
+++ b/crates/openshell-driver-podman/src/driver.rs
@@ -13,7 +13,8 @@ use openshell_core::ComputeDriverError;
 use openshell_core::config::CDI_GPU_DEVICE_ALL;
 use openshell_core::driver_utils::supervisor_image_should_refresh;
 use openshell_core::gpu::{
-    CdiGpuInventory, CdiGpuRoundRobin, CdiGpuSelectionError, driver_gpu_requirements,
+    CdiGpuDefaultSelector, CdiGpuInventory, CdiGpuSelectionError, driver_gpu_requirements,
+    effective_driver_gpu_count, validate_specific_gpu_device_request,
 };
 use openshell_core::proto::compute::v1::{
     DriverSandbox, GetCapabilitiesResponse, GpuResourceRequirements,
@@ -41,9 +42,8 @@ pub struct PodmanComputeDriver {
     /// The host's IP on the bridge network. Sandbox containers use this to
     /// reach the gateway server when no explicit gRPC endpoint is configured.
     network_gateway_ip: Option<String>,
-    gpu_inventory: CdiGpuInventory,
-    allow_all_default_gpu: bool,
-    gpu_selector: Arc<CdiGpuRoundRobin>,
+    gpu_selector: Arc<CdiGpuDefaultSelector>,
+    gpu_inventory_refresh: Arc<dyn Fn() -> (CdiGpuInventory, bool) + Send + Sync>,
 }
 
 impl std::fmt::Debug for PodmanComputeDriver {
@@ -52,12 +52,16 @@ impl std::fmt::Debug for PodmanComputeDriver {
             .field("socket_path", &self.config.socket_path)
             .field("default_image", &self.config.default_image)
             .field("network_name", &self.config.network_name)
-            .field("gpu_inventory", &self.gpu_inventory)
-            .field("allow_all_default_gpu", &self.allow_all_default_gpu)
+            .field("gpu_inventory", &self.gpu_selector.device_ids())
             .finish()
     }
 }
 
+struct ValidatedPodmanSandbox<'a> {
+    driver_config: PodmanSandboxDriverConfig,
+    gpu_requirements: Option<&'a GpuResourceRequirements>,
+}
+
 /// Construct and validate a container name from a sandbox name.
 ///
 /// Combines the prefix with the sandbox name and validates the result
@@ -170,6 +174,13 @@ fn local_podman_all_gpu_default_supported() -> bool {
     local_podman_all_gpu_default_supported_from(Path::new("/dev"))
 }
 
+fn local_podman_gpu_selector_state() -> (CdiGpuInventory, bool) {
+    (
+        local_podman_cdi_gpu_inventory(),
+        local_podman_all_gpu_default_supported(),
+    )
+}
+
 fn podman_gpu_selection_error(err: CdiGpuSelectionError) -> ComputeDriverError {
     ComputeDriverError::Precondition(err.to_string())
 }
@@ -271,8 +282,7 @@ impl PodmanComputeDriver {
             "Bridge network ready"
         );
 
-        let gpu_inventory = local_podman_cdi_gpu_inventory();
-        let allow_all_default_gpu = local_podman_all_gpu_default_supported();
+        let (gpu_inventory, allow_all_default_gpu) = local_podman_gpu_selector_state();
         if !gpu_inventory.is_empty() {
             info!(
                 device_count = gpu_inventory.as_slice().len(),
@@ -308,9 +318,11 @@ impl PodmanComputeDriver {
             client,
             config,
             network_gateway_ip,
-            gpu_inventory,
-            allow_all_default_gpu,
-            gpu_selector: Arc::new(CdiGpuRoundRobin::new()),
+            gpu_selector: Arc::new(CdiGpuDefaultSelector::new(
+                gpu_inventory,
+                allow_all_default_gpu,
+            )),
+            gpu_inventory_refresh: Arc::new(local_podman_gpu_selector_state),
         })
     }
 
@@ -342,71 +354,85 @@ impl PodmanComputeDriver {
         &self,
         sandbox: &DriverSandbox,
     ) -> Result<(), ComputeDriverError> {
+        let _ = self.validated_sandbox_create(sandbox).await?;
+        Ok(())
+    }
+
+    async fn validated_sandbox_create<'a>(
+        &self,
+        sandbox: &'a DriverSandbox,
+    ) -> Result<ValidatedPodmanSandbox<'a>, ComputeDriverError> {
         let gpu_requirements = sandbox
             .spec
             .as_ref()
             .and_then(|spec| spec.resource_requirements.as_ref())
             .and_then(|requirements| driver_gpu_requirements(Some(requirements)));
         let driver_config = PodmanSandboxDriverConfig::from_sandbox(sandbox)?;
-        if gpu_requirements.is_none() && driver_config.cdi_devices.is_some() {
-            return Err(ComputeDriverError::InvalidArgument(
-                "driver_config.cdi_devices requires gpu=true".to_string(),
-            ));
-        }
-        Self::validate_gpu_request(gpu_requirements)?;
+        Self::validate_gpu_request(gpu_requirements, &driver_config)?;
         self.validate_user_volume_mounts_available(sandbox).await?;
-        let _ = self.peek_default_gpu_device(sandbox)?;
-        Ok(())
+        let _ = self.resolve_gpu_cdi_devices(
+            gpu_requirements,
+            &driver_config,
+            CdiGpuDefaultSelector::peek_device_ids,
+        )?;
+        Ok(ValidatedPodmanSandbox {
+            driver_config,
+            gpu_requirements,
+        })
     }
 
     fn validate_gpu_request(
         gpu_requirements: Option<&GpuResourceRequirements>,
+        driver_config: &PodmanSandboxDriverConfig,
     ) -> Result<(), ComputeDriverError> {
-        if gpu_requirements.is_some() && !Self::has_gpu_capacity() {
-            return Err(ComputeDriverError::Precondition(
-                "GPU sandbox requested, but no NVIDIA GPU devices are available.".to_string(),
-            ));
+        let _ = effective_driver_gpu_count(gpu_requirements)
+            .map_err(ComputeDriverError::InvalidArgument)?;
+        if let Some(cdi_devices) = driver_config.cdi_devices.as_deref() {
+            validate_specific_gpu_device_request(
+                gpu_requirements,
+                cdi_devices,
+                "driver_config.cdi_devices",
+            )
+            .map_err(ComputeDriverError::InvalidArgument)?;
         }
+
         Ok(())
     }
 
-    fn peek_default_gpu_device(
-        &self,
-        sandbox: &DriverSandbox,
-    ) -> Result<Option<String>, ComputeDriverError> {
-        self.selected_default_gpu_device(sandbox, false)
+    fn refresh_gpu_inventory(&self) {
+        let (inventory, allow_all_default_gpu) = (self.gpu_inventory_refresh)();
+        self.gpu_selector.refresh(inventory, allow_all_default_gpu);
     }
 
-    fn next_default_gpu_device(
+    fn resolve_gpu_cdi_devices(
         &self,
-        sandbox: &DriverSandbox,
-    ) -> Result<Option<String>, ComputeDriverError> {
-        self.selected_default_gpu_device(sandbox, true)
-    }
+        gpu_requirements: Option<&GpuResourceRequirements>,
+        driver_config: &PodmanSandboxDriverConfig,
+        select_default_devices: fn(
+            &CdiGpuDefaultSelector,
+            u32,
+        ) -> Result<Vec<String>, CdiGpuSelectionError>,
+    ) -> Result<Option<Vec<String>>, ComputeDriverError> {
+        if let Some(cdi_devices) = driver_config.cdi_devices.as_deref() {
+            validate_specific_gpu_device_request(
+                gpu_requirements,
+                cdi_devices,
+                "driver_config.cdi_devices",
+            )
+            .map_err(ComputeDriverError::InvalidArgument)?;
+            return Ok(Some(cdi_devices.to_vec()));
+        }
 
-    fn selected_default_gpu_device(
-        &self,
-        sandbox: &DriverSandbox,
-        consume: bool,
-    ) -> Result<Option<String>, ComputeDriverError> {
-        let Some(spec) = sandbox.spec.as_ref() else {
+        let Some(count) = effective_driver_gpu_count(gpu_requirements)
+            .map_err(ComputeDriverError::InvalidArgument)?
+        else {
             return Ok(None);
         };
-        let driver_config = PodmanSandboxDriverConfig::from_sandbox(sandbox)?;
-        let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
-        if gpu_requirements.is_none() || driver_config.cdi_devices.is_some() {
-            return Ok(None);
-        }
 
-        let selected = if consume {
-            self.gpu_selector
-                .next_default_device_id(&self.gpu_inventory, self.allow_all_default_gpu)
-        } else {
-            self.gpu_selector
-                .peek_default_device_id(&self.gpu_inventory, self.allow_all_default_gpu)
-        }
-        .map_err(podman_gpu_selection_error)?;
-        Ok(Some(selected))
+        self.refresh_gpu_inventory();
+        select_default_devices(&self.gpu_selector, count)
+            .map(Some)
+            .map_err(podman_gpu_selection_error)
     }
 
     async fn validate_user_volume_mounts_available(
@@ -454,7 +480,7 @@ impl PodmanComputeDriver {
         // resources (volume), so we don't leave orphans when the name is
         // invalid.
         let name = validated_container_name(&sandbox.name)?;
-        self.validate_sandbox_create(sandbox).await?;
+        let validated = self.validated_sandbox_create(sandbox).await?;
 
         let vol_name = container::volume_name(&sandbox.id);
 
@@ -520,19 +546,23 @@ impl PodmanComputeDriver {
         };
 
         // 3. Create container.
-        let selected_default_gpu = match self.next_default_gpu_device(sandbox) {
-            Ok(device) => device,
+        let gpu_devices = match self.resolve_gpu_cdi_devices(
+            validated.gpu_requirements,
+            &validated.driver_config,
+            CdiGpuDefaultSelector::next_device_ids,
+        ) {
+            Ok(devices) => devices,
             Err(e) => {
                 let _ = self.client.remove_volume(&vol_name).await;
                 cleanup_sandbox_token_file(&sandbox.id);
                 return Err(e);
             }
         };
-        let spec = match container::build_container_spec_with_token_and_gpu_default(
+        let spec = match container::build_container_spec_with_token_and_gpu_devices(
             sandbox,
             &self.config,
             token_host_path.as_deref(),
-            selected_default_gpu.as_deref(),
+            gpu_devices.as_deref(),
         ) {
             Ok(spec) => spec,
             Err(e) => {
@@ -763,13 +793,18 @@ impl PodmanComputeDriver {
         allow_all_default_gpu: bool,
     ) -> Self {
         let client = PodmanClient::new(config.socket_path.clone());
+        let refresh_inventory = gpu_inventory.clone();
         Self {
             client,
             config,
             network_gateway_ip: None,
-            gpu_inventory,
-            allow_all_default_gpu,
-            gpu_selector: Arc::new(CdiGpuRoundRobin::new()),
+            gpu_selector: Arc::new(CdiGpuDefaultSelector::new(
+                gpu_inventory,
+                allow_all_default_gpu,
+            )),
+            gpu_inventory_refresh: Arc::new(move || {
+                (refresh_inventory.clone(), allow_all_default_gpu)
+            }),
         }
     }
 }
@@ -826,7 +861,9 @@ mod tests {
     use super::*;
     use crate::test_utils::{StubResponse, spawn_podman_stub};
     use hyper::StatusCode;
-    use openshell_core::proto::compute::v1::{DriverSandboxSpec, DriverSandboxTemplate};
+    use openshell_core::proto::compute::v1::{
+        DriverSandboxSpec, DriverSandboxTemplate, ResourceRequirements,
+    };
     use std::collections::HashMap;
     use std::fs;
     use std::path::PathBuf;
@@ -854,6 +891,12 @@ mod tests {
         }
     }
 
+    fn gpu_resources(count: Option<u32>) -> ResourceRequirements {
+        ResourceRequirements {
+            gpu: Some(GpuResourceRequirements { count }),
+        }
+    }
+
     #[test]
     fn podman_driver_error_from_conflict() {
         let err = ComputeDriverError::from(PodmanApiError::Conflict("exists".into()));
@@ -866,6 +909,69 @@ mod tests {
         assert!(matches!(err, ComputeDriverError::Message(_)));
     }
 
+    #[test]
+    fn validate_gpu_request_accepts_gpu_count_request_shape() {
+        let gpu = GpuResourceRequirements { count: Some(2) };
+        let driver_config = PodmanSandboxDriverConfig::default();
+
+        PodmanComputeDriver::validate_gpu_request(Some(&gpu), &driver_config)
+            .expect("default GPU count shape should be accepted before inventory selection");
+    }
+
+    #[test]
+    fn validate_gpu_request_accepts_single_cdi_device_without_gpu_count() {
+        let gpu = GpuResourceRequirements { count: None };
+        let mut driver_config = PodmanSandboxDriverConfig::default();
+        driver_config.cdi_devices = Some(vec!["nvidia.com/gpu=0".to_string()]);
+
+        PodmanComputeDriver::validate_gpu_request(Some(&gpu), &driver_config)
+            .expect("single exact CDI device should pass count validation");
+    }
+
+    #[test]
+    fn validate_gpu_request_rejects_multiple_cdi_devices_without_gpu_count() {
+        let gpu = GpuResourceRequirements { count: None };
+        let mut driver_config = PodmanSandboxDriverConfig::default();
+        driver_config.cdi_devices = Some(vec![
+            "nvidia.com/gpu=0".to_string(),
+            "nvidia.com/gpu=1".to_string(),
+        ]);
+        let err = PodmanComputeDriver::validate_gpu_request(Some(&gpu), &driver_config)
+            .expect_err("missing CDI device count should be rejected for multiple devices");
+
+        assert!(matches!(err, ComputeDriverError::InvalidArgument(_)));
+        assert!(
+            err.to_string()
+                .contains("gpu count (1) must match driver_config.cdi_devices length (2)")
+        );
+    }
+
+    #[test]
+    fn validate_gpu_request_rejects_cdi_devices_without_gpu_request() {
+        let mut driver_config = PodmanSandboxDriverConfig::default();
+        driver_config.cdi_devices = Some(vec!["nvidia.com/gpu=0".to_string()]);
+        let err = PodmanComputeDriver::validate_gpu_request(None, &driver_config)
+            .expect_err("missing GPU request should be rejected");
+
+        assert!(matches!(err, ComputeDriverError::InvalidArgument(_)));
+        assert!(err.to_string().contains("requires a gpu request"));
+    }
+
+    #[test]
+    fn validate_gpu_request_rejects_mismatched_cdi_device_count() {
+        let gpu = GpuResourceRequirements { count: Some(2) };
+        let mut driver_config = PodmanSandboxDriverConfig::default();
+        driver_config.cdi_devices = Some(vec!["nvidia.com/gpu=0".to_string()]);
+        let err = PodmanComputeDriver::validate_gpu_request(Some(&gpu), &driver_config)
+            .expect_err("mismatched CDI device count should be rejected");
+
+        assert!(matches!(err, ComputeDriverError::InvalidArgument(_)));
+        assert!(
+            err.to_string()
+                .contains("gpu count (2) must match driver_config.cdi_devices length (1)")
+        );
+    }
+
     // ── grpc_endpoint auto-detection ───────────────────────────────────
     //
     // PodmanComputeDriver::new() fills grpc_endpoint when it is empty.
@@ -996,7 +1102,7 @@ mod tests {
         );
         let sandbox = DriverSandbox {
             spec: Some(DriverSandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources(None)),
                 ..Default::default()
             }),
             ..Default::default()
@@ -1016,7 +1122,7 @@ mod tests {
         );
         let sandbox = DriverSandbox {
             spec: Some(DriverSandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources(None)),
                 ..Default::default()
             }),
             ..Default::default()
@@ -1035,7 +1141,7 @@ mod tests {
         );
         let sandbox = DriverSandbox {
             spec: Some(DriverSandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources(None)),
                 ..Default::default()
             }),
             ..Default::default()
@@ -1053,7 +1159,7 @@ mod tests {
         let driver = PodmanComputeDriver::for_tests(PodmanComputeConfig::default());
         let sandbox = DriverSandbox {
             spec: Some(DriverSandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources(None)),
                 template: Some(DriverSandboxTemplate {
                     driver_config: Some(cdi_devices_config(&["nvidia.com/gpu=0"])),
                     ..Default::default()
@@ -1078,7 +1184,7 @@ mod tests {
             id: "sbx-first".to_string(),
             name: "first".to_string(),
             spec: Some(DriverSandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources(None)),
                 ..Default::default()
             }),
             ..Default::default()
@@ -1087,35 +1193,35 @@ mod tests {
             id: "sbx-second".to_string(),
             name: "second".to_string(),
             spec: Some(DriverSandboxSpec {
-                gpu: true,
+                resource_requirements: Some(gpu_resources(None)),
                 ..Default::default()
             }),
             ..Default::default()
         };
 
         assert_eq!(
-            driver.peek_default_gpu_device(&first_sandbox).unwrap(),
-            Some("nvidia.com/gpu=0".to_string())
+            driver.gpu_selector.peek_device_ids(1).unwrap(),
+            vec!["nvidia.com/gpu=0".to_string()]
         );
-        let first_device = driver.next_default_gpu_device(&first_sandbox).unwrap();
-        let first_spec = container::build_container_spec_with_token_and_gpu_default(
+        let first_devices = driver.gpu_selector.next_device_ids(1).unwrap();
+        let first_spec = container::build_container_spec_with_token_and_gpu_devices(
             &first_sandbox,
             &driver.config,
             None,
-            first_device.as_deref(),
+            Some(&first_devices),
         )
         .unwrap();
 
         assert_eq!(
-            driver.peek_default_gpu_device(&second_sandbox).unwrap(),
-            Some("nvidia.com/gpu=1".to_string())
+            driver.gpu_selector.peek_device_ids(1).unwrap(),
+            vec!["nvidia.com/gpu=1".to_string()]
         );
-        let second_device = driver.next_default_gpu_device(&second_sandbox).unwrap();
-        let second_spec = container::build_container_spec_with_token_and_gpu_default(
+        let second_devices = driver.gpu_selector.next_device_ids(1).unwrap();
+        let second_spec = container::build_container_spec_with_token_and_gpu_devices(
             &second_sandbox,
             &driver.config,
             None,
-            second_device.as_deref(),
+            Some(&second_devices),
         )
         .unwrap();
 
diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs
index f0c669b87..d5d9565e7 100644
--- a/crates/openshell-driver-vm/src/driver.rs
+++ b/crates/openshell-driver-vm/src/driver.rs
@@ -29,7 +29,9 @@ use oci_client::manifest::{
 };
 use oci_client::secrets::RegistryAuth;
 use oci_client::{Reference, RegistryOperation};
-use openshell_core::gpu::driver_gpu_requirements;
+use openshell_core::gpu::{
+    driver_gpu_requirements, effective_driver_gpu_count, validate_specific_gpu_device_request,
+};
 use openshell_core::progress::{
     PROGRESS_STEP_PULLING_IMAGE, PROGRESS_STEP_REQUESTING_SANDBOX, PROGRESS_STEP_STARTING_SANDBOX,
     format_bytes, mark_progress_active, mark_progress_complete, mark_progress_detail,
@@ -3113,13 +3115,23 @@ fn validate_gpu_request(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Stat
         .ok_or_else(|| Status::invalid_argument("sandbox spec is required"))?;
 
     let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
+    let gpu_count =
+        effective_driver_gpu_count(gpu_requirements).map_err(Status::invalid_argument)?;
 
-    let _ = vm_gpu_device_id(sandbox)?;
     if gpu_requirements.is_some() && !gpu_enabled {
         return Err(Status::failed_precondition(
             "GPU support is not enabled on this driver; start with --gpu",
         ));
     }
+
+    let _ = vm_gpu_device_id(sandbox)?;
+
+    if gpu_count.is_some_and(|count| count > 1) {
+        return Err(Status::invalid_argument(
+            "VM GPU sandboxes support only one GPU",
+        ));
+    }
+
     Ok(())
 }
 
@@ -3133,11 +3145,12 @@ fn vm_gpu_device_id(sandbox: &Sandbox) -> Result<Option<String>, Status> {
         .gpu_device_ids
         .unwrap_or_default();
     let gpu_requirements = driver_gpu_requirements(spec.resource_requirements.as_ref());
-    if gpu_requirements.is_none() && !gpu_device_ids.is_empty() {
-        return Err(Status::invalid_argument(
-            "driver_config.gpu_device_ids requires gpu=true",
-        ));
-    }
+    validate_specific_gpu_device_request(
+        gpu_requirements,
+        &gpu_device_ids,
+        "driver_config.gpu_device_ids",
+    )
+    .map_err(Status::invalid_argument)?;
     if gpu_device_ids.len() > 1 {
         return Err(Status::invalid_argument(
             "vm driver currently supports at most one gpu_device_ids entry",
@@ -5112,9 +5125,9 @@ mod tests {
         }
     }
 
-    fn gpu_resources() -> ResourceRequirements {
+    fn gpu_resources(count: Option<u32>) -> ResourceRequirements {
         ResourceRequirements {
-            gpu: Some(GpuResourceRequirements {}),
+            gpu: Some(GpuResourceRequirements { count }),
         }
     }
 
@@ -5185,7 +5198,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                resource_requirements: Some(gpu_resources()),
+                resource_requirements: Some(gpu_resources(None)),
                 ..Default::default()
             }),
             ..Default::default()
@@ -5196,12 +5209,32 @@ mod tests {
         assert!(err.message().contains("GPU support is not enabled"));
     }
 
+    #[test]
+    fn validate_vm_sandbox_rejects_missing_gpu_support_before_request_shape() {
+        let sandbox = Sandbox {
+            id: "sandbox-123".to_string(),
+            spec: Some(SandboxSpec {
+                resource_requirements: Some(gpu_resources(Some(2))),
+                template: Some(SandboxTemplate {
+                    driver_config: Some(gpu_device_ids_config(&["0000:2d:00.0"])),
+                    ..Default::default()
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let err = validate_vm_sandbox(&sandbox, false)
+            .expect_err("missing GPU support should be rejected before request shape");
+        assert_eq!(err.code(), Code::FailedPrecondition);
+        assert!(err.message().contains("GPU support is not enabled"));
+    }
+
     #[test]
     fn validate_vm_sandbox_accepts_gpu_when_enabled() {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                resource_requirements: Some(gpu_resources()),
+                resource_requirements: Some(gpu_resources(None)),
                 ..Default::default()
             }),
             ..Default::default()
@@ -5209,7 +5242,121 @@ mod tests {
         validate_vm_sandbox(&sandbox, true).expect("gpu should be accepted when enabled");
     }
 
-    fn validate_vm_sandbox_rejects_gpu_device_without_gpu() {
+    #[test]
+    fn validate_vm_sandbox_accepts_gpu_count_one() {
+        let sandbox = Sandbox {
+            id: "sandbox-123".to_string(),
+            spec: Some(SandboxSpec {
+                resource_requirements: Some(gpu_resources(Some(1))),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        validate_vm_sandbox(&sandbox, true).expect("one GPU should be accepted when enabled");
+    }
+
+    #[test]
+    fn validate_vm_sandbox_accepts_single_gpu_device_without_gpu_count() {
+        let sandbox = Sandbox {
+            id: "sandbox-123".to_string(),
+            spec: Some(SandboxSpec {
+                resource_requirements: Some(gpu_resources(None)),
+                template: Some(SandboxTemplate {
+                    driver_config: Some(gpu_device_ids_config(&["0000:2d:00.0"])),
+                    ..Default::default()
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        validate_vm_sandbox(&sandbox, true)
+            .expect("single exact GPU device should be compatible with a default GPU request");
+    }
+
+    #[test]
+    fn validate_vm_sandbox_rejects_multiple_gpu_device_ids_without_gpu_count() {
+        let sandbox = Sandbox {
+            id: "sandbox-123".to_string(),
+            spec: Some(SandboxSpec {
+                resource_requirements: Some(gpu_resources(None)),
+                template: Some(SandboxTemplate {
+                    driver_config: Some(gpu_device_ids_config(&["0000:2d:00.0", "0000:31:00.0"])),
+                    ..Default::default()
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let err = validate_vm_sandbox(&sandbox, true)
+            .expect_err("multiple GPU device IDs without count should be rejected");
+
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(
+            err.message()
+                .contains("gpu count (1) must match driver_config.gpu_device_ids length (2)")
+        );
+    }
+
+    #[test]
+    fn validate_vm_sandbox_accepts_gpu_count_matching_device_id() {
+        let sandbox = Sandbox {
+            id: "sandbox-123".to_string(),
+            spec: Some(SandboxSpec {
+                resource_requirements: Some(gpu_resources(Some(1))),
+                template: Some(SandboxTemplate {
+                    driver_config: Some(gpu_device_ids_config(&["0000:2d:00.0"])),
+                    ..Default::default()
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        validate_vm_sandbox(&sandbox, true)
+            .expect("matching explicit GPU device count should be accepted");
+    }
+
+    #[test]
+    fn validate_vm_sandbox_rejects_gpu_count_above_one() {
+        let sandbox = Sandbox {
+            id: "sandbox-123".to_string(),
+            spec: Some(SandboxSpec {
+                resource_requirements: Some(gpu_resources(Some(2))),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let err = validate_vm_sandbox(&sandbox, true)
+            .expect_err("multiple GPU VM request should be rejected");
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("support only one GPU"));
+    }
+
+    #[test]
+    fn validate_vm_sandbox_rejects_gpu_count_mismatched_device_id() {
+        let sandbox = Sandbox {
+            id: "sandbox-123".to_string(),
+            spec: Some(SandboxSpec {
+                resource_requirements: Some(gpu_resources(Some(2))),
+                template: Some(SandboxTemplate {
+                    driver_config: Some(gpu_device_ids_config(&["0000:2d:00.0"])),
+                    ..Default::default()
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+        let err = validate_vm_sandbox(&sandbox, true)
+            .expect_err("mismatched explicit GPU device count should be rejected");
+
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(
+            err.message()
+                .contains("gpu count (2) must match driver_config.gpu_device_ids length (1)")
+        );
+    }
+
+    #[test]
+    fn validate_vm_sandbox_rejects_gpu_device_without_gpu_request() {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
@@ -5222,9 +5369,9 @@ mod tests {
             ..Default::default()
         };
         let err = validate_vm_sandbox(&sandbox, true)
-            .expect_err("gpu_device_ids without gpu should be rejected");
+            .expect_err("gpu_device_ids without a GPU request should be rejected");
         assert_eq!(err.code(), Code::InvalidArgument);
-        assert!(err.message().contains("gpu_device_ids requires gpu=true"));
+        assert!(err.message().contains("requires a gpu request"));
     }
 
     #[test]
@@ -5232,7 +5379,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                resource_requirements: Some(gpu_resources()),
+                resource_requirements: Some(gpu_resources(Some(2))),
                 template: Some(SandboxTemplate {
                     driver_config: Some(gpu_device_ids_config(&["0000:2d:00.0", "0000:31:00.0"])),
                     ..Default::default()
@@ -5252,7 +5399,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                resource_requirements: Some(gpu_resources()),
+                resource_requirements: Some(gpu_resources(None)),
                 template: Some(SandboxTemplate {
                     driver_config: Some(gpu_device_ids_config(&[])),
                     ..Default::default()
@@ -5272,7 +5419,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                resource_requirements: Some(gpu_resources()),
+                resource_requirements: Some(gpu_resources(None)),
                 template: Some(SandboxTemplate {
                     driver_config: Some(gpu_device_id_typo_config(&["0000:2d:00.0"])),
                     ..Default::default()
@@ -5292,7 +5439,7 @@ mod tests {
         let sandbox = Sandbox {
             id: "sandbox-123".to_string(),
             spec: Some(SandboxSpec {
-                resource_requirements: Some(gpu_resources()),
+                resource_requirements: Some(gpu_resources(None)),
                 template: Some(SandboxTemplate {
                     agent_socket_path: "/tmp/agent.sock".to_string(),
                     driver_config: Some(gpu_device_ids_config(&[])),
diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs
index 3dc28943f..8576e51b9 100644
--- a/crates/openshell-server/src/compute/mod.rs
+++ b/crates/openshell-server/src/compute/mod.rs
@@ -1506,7 +1506,7 @@ fn driver_sandbox_spec_from_public(
                 gpu: requirements
                     .gpu
                     .as_ref()
-                    .map(|_| DriverGpuResourceRequirements {}),
+                    .map(|gpu| DriverGpuResourceRequirements { count: gpu.count }),
             }
         }),
         sandbox_token: String::new(),
@@ -2092,7 +2092,7 @@ mod tests {
     fn driver_sandbox_spec_from_public_preserves_gpu_requirement() {
         let public = SandboxSpec {
             resource_requirements: Some(openshell_core::proto::ResourceRequirements {
-                gpu: Some(openshell_core::proto::GpuResourceRequirements {}),
+                gpu: Some(openshell_core::proto::GpuResourceRequirements { count: Some(2) }),
             }),
             ..Default::default()
         };
@@ -2105,7 +2105,7 @@ mod tests {
             .as_ref()
             .and_then(|requirements| requirements.gpu.as_ref())
             .expect("driver GPU requirement should be set");
-        assert_eq!(gpu, &DriverGpuResourceRequirements {});
+        assert_eq!(gpu.count, Some(2));
     }
 
     #[test]
@@ -2615,7 +2615,7 @@ mod tests {
             &mut status,
             Some(&SandboxSpec {
                 resource_requirements: Some(openshell_core::proto::ResourceRequirements {
-                    gpu: Some(openshell_core::proto::GpuResourceRequirements {}),
+                    gpu: Some(openshell_core::proto::GpuResourceRequirements { count: None }),
                 }),
                 ..Default::default()
             }),
@@ -2976,7 +2976,7 @@ mod tests {
         let sandbox = Sandbox {
             spec: Some(SandboxSpec {
                 resource_requirements: Some(openshell_core::proto::ResourceRequirements {
-                    gpu: Some(openshell_core::proto::GpuResourceRequirements {}),
+                    gpu: Some(openshell_core::proto::GpuResourceRequirements { count: None }),
                 }),
                 ..Default::default()
             }),
diff --git a/crates/openshell-server/src/grpc/validation.rs b/crates/openshell-server/src/grpc/validation.rs
index 2ca71f7b3..b3680c6e7 100644
--- a/crates/openshell-server/src/grpc/validation.rs
+++ b/crates/openshell-server/src/grpc/validation.rs
@@ -134,6 +134,9 @@ pub(super) fn validate_sandbox_spec(
         validate_env_entries(&tmpl.environment, "spec.template.environment")?;
     }
 
+    // --- spec.resource_requirements.gpu ---
+    validate_gpu_request_fields(spec)?;
+
     // --- spec.policy serialized size ---
     if let Some(ref policy) = spec.policy {
         let size = policy.encoded_len();
@@ -147,6 +150,14 @@ pub(super) fn validate_sandbox_spec(
     Ok(())
 }
 
+fn validate_gpu_request_fields(spec: &openshell_core::proto::SandboxSpec) -> Result<(), Status> {
+    if openshell_core::gpu::sandbox_gpu_count(spec.resource_requirements.as_ref()) == Some(0) {
+        return Err(Status::invalid_argument("gpu count must be greater than 0"));
+    }
+
+    Ok(())
+}
+
 /// Validate template-level field sizes.
 fn validate_sandbox_template(tmpl: &SandboxTemplate) -> Result<(), Status> {
     // String fields.
@@ -761,13 +772,37 @@ mod tests {
     fn validate_sandbox_spec_accepts_gpu_flag() {
         let spec = SandboxSpec {
             resource_requirements: Some(openshell_core::proto::ResourceRequirements {
-                gpu: Some(openshell_core::proto::GpuResourceRequirements {}),
+                gpu: Some(openshell_core::proto::GpuResourceRequirements { count: None }),
+            }),
+            ..Default::default()
+        };
+        assert!(validate_sandbox_spec("gpu-sandbox", &spec).is_ok());
+    }
+
+    #[test]
+    fn validate_sandbox_spec_accepts_gpu_count() {
+        let spec = SandboxSpec {
+            resource_requirements: Some(openshell_core::proto::ResourceRequirements {
+                gpu: Some(openshell_core::proto::GpuResourceRequirements { count: Some(2) }),
             }),
             ..Default::default()
         };
         assert!(validate_sandbox_spec("gpu-sandbox", &spec).is_ok());
     }
 
+    #[test]
+    fn validate_sandbox_spec_rejects_zero_gpu_count() {
+        let spec = SandboxSpec {
+            resource_requirements: Some(openshell_core::proto::ResourceRequirements {
+                gpu: Some(openshell_core::proto::GpuResourceRequirements { count: Some(0) }),
+            }),
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("gpu-sandbox", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("gpu count must be greater than 0"));
+    }
+
     #[test]
     fn validate_sandbox_spec_accepts_empty_defaults() {
         assert!(validate_sandbox_spec("", &default_spec()).is_ok());
diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx
index e76a4b1ca..c81afd66d 100644
--- a/docs/reference/sandbox-compute-drivers.mdx
+++ b/docs/reference/sandbox-compute-drivers.mdx
@@ -53,14 +53,29 @@ openshell sandbox create \
 ```
 
 Driver config is for fields without a stable public flag. Prefer `--cpu`,
-`--memory`, and `--gpu` for supported resource intent.
+`--memory`, and `--gpu` for supported resource intent. When `--gpu` is present
+without a count, OpenShell treats it as a request for one GPU. Pass
+`--gpu COUNT` when requesting more than one GPU.
+
+Kubernetes maps the GPU count to the `nvidia.com/gpu` pod resource limit.
+Docker and Podman satisfy count-only GPU requests by selecting the requested
+number of NVIDIA CDI devices from the local CDI inventory in round-robin order.
+The drivers refresh the CDI inventory before validating or creating the
+sandbox, so CDI devices added or removed after gateway start can affect later
+creates. On WSL2 all-only runtimes, Docker or Podman can use
+`nvidia.com/gpu=all` as a compatibility fallback, where it counts as one
+selectable device.
 
 Exact GPU device selection remains driver-owned and requires `--gpu`. Docker
-and Podman accept `cdi_devices`; replace the top-level `docker` key with
-`podman` when using the Podman driver, for example
-`{"docker":{"cdi_devices":["nvidia.com/gpu=0"]}}`. The VM driver accepts
-`gpu_device_ids`, for example `{"vm":{"gpu_device_ids":["0000:2d:00.0"]}}`;
-the current VM implementation accepts at most one entry.
+and Podman accept `cdi_devices` as opaque CDI device names; replace the
+top-level `docker` key with `podman` when using the Podman driver, for example
+`{"docker":{"cdi_devices":["nvidia.com/gpu=0"]}}`. Explicit CDI device lists
+must not contain duplicates, and their length must match the effective GPU
+count. A single exact CDI device is compatible with the default `--gpu`
+request. The VM driver accepts `gpu_device_ids`, for example
+`{"vm":{"gpu_device_ids":["0000:2d:00.0"]}}`; the current VM implementation
+accepts at most one entry and allows either `--gpu` or `--gpu 1` when
+`gpu_device_ids` is set.
 
 For Kubernetes, `pod.runtime_class_name` maps to PodSpec `runtimeClassName`.
 It overrides the gateway's configured default runtime class for that sandbox,
diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx
index efb2a6b5a..fdbf497f5 100644
--- a/docs/sandboxes/manage-sandboxes.mdx
+++ b/docs/sandboxes/manage-sandboxes.mdx
@@ -70,17 +70,33 @@ To request GPU resources, add `--gpu`:
 openshell sandbox create --gpu -- claude
 ```
 
+Request a specific number of GPUs by passing a count to `--gpu`:
+
+```shell
+openshell sandbox create --gpu 2 -- claude
+```
+
+When you omit the count, OpenShell treats the request as `--gpu 1`.
+Kubernetes honors counted `--gpu` requests by setting the `nvidia.com/gpu`
+limit. Docker and Podman select the requested number of default NVIDIA CDI
+devices in round-robin order. VM gateways accept only one GPU, either through
+`--gpu` or `--gpu 1`; a single `gpu_device_ids` entry works with either form.
+
 For Docker-backed sandboxes, GPU injection uses Docker CDI. If you enable Docker
 CDI after the gateway starts, restart the gateway so OpenShell can detect the
 updated Docker daemon capability.
 
-Docker and Podman gateways map a default `--gpu` request to one concrete
-NVIDIA CDI device when individual CDI devices are available. On WSL2 all-only
-runtimes, the default can fall back to `nvidia.com/gpu=all`.
+Docker and Podman refresh the CDI inventory before validating or creating a
+default GPU request, so CDI devices added or removed after the driver starts can
+be reflected in later sandbox creates. On WSL2 all-only runtimes, the default
+can fall back to `nvidia.com/gpu=all`; that fallback counts as one selectable
+device.
 
 Exact GPU device selection is driver-specific and still requires `--gpu`. For
 Docker or Podman, pass CDI IDs through `cdi_devices`. The top-level key must
-match the active driver; replace `docker` with `podman` when using Podman:
+match the active driver; replace `docker` with `podman` when using Podman. CDI
+IDs are treated as opaque strings. The list must not contain duplicate IDs, and
+its length must match the effective GPU count:
 
 ```shell
 openshell sandbox create \
diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto
index bfff3d42d..f471f575a 100644
--- a/proto/compute_driver.proto
+++ b/proto/compute_driver.proto
@@ -103,7 +103,11 @@ message ResourceRequirements {
 }
 
 // Driver GPU resource requirements.
-message GpuResourceRequirements {}
+message GpuResourceRequirements {
+  // Optional number of GPUs requested. When omitted, the request is for one
+  // GPU using the selected driver's default assignment behavior.
+  optional uint32 count = 1;
+}
 
 // Driver-owned runtime template consumed by the compute platform.
 //
diff --git a/proto/openshell.proto b/proto/openshell.proto
index b7884f6e7..c5dad4140 100644
--- a/proto/openshell.proto
+++ b/proto/openshell.proto
@@ -336,7 +336,11 @@ message ResourceRequirements {
 }
 
 // Public GPU resource requirements.
-message GpuResourceRequirements {}
+message GpuResourceRequirements {
+  // Optional number of GPUs requested. When omitted, the request is for one
+  // GPU using the selected driver's default assignment behavior.
+  optional uint32 count = 1;
+}
 
 // Public sandbox template mapped onto compute-driver template inputs.
 message SandboxTemplate {
diff --git a/rfc/0004-sandbox-resource-requirements/README.md b/rfc/0004-sandbox-resource-requirements/README.md
index 01b4319dd..c2ceae778 100644
--- a/rfc/0004-sandbox-resource-requirements/README.md
+++ b/rfc/0004-sandbox-resource-requirements/README.md
@@ -7,6 +7,8 @@ links:
   - https://github.com/NVIDIA/OpenShell/pull/1340
   - https://github.com/NVIDIA/OpenShell/pull/1360
   - https://github.com/NVIDIA/OpenShell/issues/1492
+  - https://github.com/NVIDIA/OpenShell/pull/1675
+  - https://github.com/NVIDIA/OpenShell/pull/1815
 ---
 
 # RFC 0004 - Sandbox Resource Requirements
@@ -26,6 +28,11 @@ then relies on the selected driver to validate and provision the request.
 `SandboxTemplate.resources` remains a platform-native realization layer and
 escape hatch. It is not the portable driver-selection interface.
 
+The current implementation lands the first GPU-focused phase of this model:
+`resource_requirements.gpu` replaces the legacy GPU-specific request fields,
+with an optional count. The broader generic compute/device/dataset model in
+this RFC remains the target direction for later expansion.
+
 ## Motivation
 
 OpenShell currently treats GPU placement as a special case. The public
@@ -59,6 +66,12 @@ configuration remains separate and is interpreted by the resource driver.
 Exposing a general-purpose driver-specific configuration surface is related, but
 tracked separately in issue #1492.
 
+Since this RFC was first drafted, `SandboxTemplate.driver_config` has been
+added as the driver-owned configuration surface. The gateway selects the block
+for the active driver and forwards that inner object unchanged. Exact GPU
+selection for Docker, Podman, and VM is now expressed there rather than in the
+portable resource requirement.
+
 ## Non-goals
 
 - Defining dataset allocation, mount, caching, or access-control semantics.
@@ -69,8 +82,10 @@ tracked separately in issue #1492.
 - Defining the general driver-specific configuration passthrough API. Issue
   #1492 tracks that related API surface.
 - Publishing allocated resource identities in sandbox status.
-- Preserving long-term compatibility for `gpu`, `gpu_device`, or a
-  GPU-specific `gpu_count` request field.
+- Preserving alpha-era compatibility for `gpu`, `gpu_device`, or a
+  GPU-specific `gpu_count` request field. The legacy GPU-specific request
+  fields are intentionally not carried forward into the API shape this RFC
+  aims to stabilize.
 
 ## Proposal
 
@@ -89,26 +104,52 @@ message SandboxSpec {
 
   // Portable resource requirements used by the gateway for driver selection
   // and by drivers for provisioning.
-  SandboxResourceRequirements resource_requirements = 11;
+  ResourceRequirements resource_requirements = 9;
 
-  reserved 9, 10;
-  reserved "gpu", "gpu_device";
+  reserved 10;
+  reserved "gpu_device";
 }
 ```
 
+The public sandbox API is still alpha. This migration intentionally replaces
+the old `bool gpu = 9` field with the typed `resource_requirements = 9` message
+instead of reserving the legacy field number. Old live requests and persisted
+sandbox records that encode GPU intent through the legacy boolean are not
+migrated; callers should use a matching OpenShell CLI/API version and recreate
+GPU sandboxes after upgrade when they need the new typed shape. Avoiding
+alpha-era reserved fields keeps the proto surface closer to the API intended
+for stabilization.
+
 `SandboxTemplate.resources` keeps its existing role as platform-native workload
 configuration. It may contain Kubernetes-style CPU, memory, and extended
 resource requests and limits, but it is not the portable resource contract.
 
+The implemented first phase uses this narrower proto shape:
+
+```proto
+message ResourceRequirements {
+  // GPU requirements for the sandbox. Presence indicates a GPU request.
+  GpuResourceRequirements gpu = 1;
+}
+
+message GpuResourceRequirements {
+  // Optional number of GPUs requested. When omitted, the request is for one
+  // GPU using the selected driver's default assignment behavior.
+  optional uint32 count = 1;
+}
+```
+
+`gpu.count` must be greater than zero when present. When `gpu` is present and
+`count` is omitted, the effective count is one. Drivers apply that same
+effective-count rule when validating exact driver-config device lists.
+
 The CLI should not expose a JSON flag for `resource_requirements`. Common
 portable requests should use typed flags such as CPU, memory, and GPU-count
 flags, and SDK/API callers should use the typed protobuf messages directly.
-JSON-formatted driver-specific configuration is a related but separate API
-topic. Issue #1492 tracks exposing an opaque driver-owned configuration surface,
-potentially named `driver_config`. This RFC only requires that driver-native
-configuration remains separate from portable resource requirements.
+JSON-formatted driver-specific configuration remains separate from portable
+resource requirements and is passed through `SandboxTemplate.driver_config`.
 
-### Resource requirements
+### Long-term resource requirements
 
 Use typed requirement domains for stable first-party resource concepts instead
 of making every request stringly typed through a `kind` field.
@@ -250,6 +291,11 @@ Portable GPU semantics are limited to:
 - `count`
 - exact-match attributes in `selector.match_attributes`
 
+In the current GPU-focused implementation, only the `gpu.count` portion is
+implemented in `ResourceRequirements`. Exact device IDs are not portable
+selector attributes yet; they remain driver-owned config. Docker and Podman use
+`driver_config.cdi_devices`, and VM uses `driver_config.gpu_device_ids`.
+
 Driver-native GPU details are expressed through namespaced parameters. Example
 parameter namespaces:
 
@@ -317,10 +363,23 @@ Example realizations:
 | Driver | Realization |
 |---|---|
 | Kubernetes | Convert `className=gpu,count=N` into a pod resource limit such as `limits["nvidia.com/gpu"] = "N"` unless Kubernetes-specific parameters select another resource name or class. |
-| Docker | Convert CDI parameters into Docker CDI device injection. For a count-only request, select an available CDI GPU device when device inventory is available. |
-| Podman | Convert CDI parameters into Podman CDI device injection. For a count-only request, select an available CDI GPU device when device inventory is available. |
+| Docker | Use exact `driver_config.cdi_devices` values when present; otherwise select `count` default CDI GPU IDs from the discovered inventory. |
+| Podman | Use exact `driver_config.cdi_devices` values when present; otherwise select `count` default CDI GPU IDs from the discovered inventory. |
 | VM | Convert VM parameters into BDF or index-based device assignment. |
 
+Docker and Podman default selection is round-robin over the normalized NVIDIA
+CDI inventory. Indexed IDs are preferred and sorted numerically; named IDs are
+used when no indexed IDs exist. On WSL2 all-only runtimes, `nvidia.com/gpu=all`
+can be used as a fallback and counts as one selectable device. The selector
+refreshes its inventory before validating and creating default GPU requests so
+CDI devices added or removed after driver startup can affect later creates.
+
+Explicit CDI IDs are opaque. The drivers do not parse aliases, normalize
+`nvidia.com/gpu=all`, or treat it specially on the explicit path. Exact ID
+lists must not contain duplicates and their length must match the effective GPU
+count. A list of two explicit IDs with `--gpu` and no count fails because the
+effective count is one.
+
 Docker and Podman should not interpret VM BDF/index parameters. The VM driver
 should not interpret CDI parameters. Gateway namespace prefiltering should avoid
 sending clearly incompatible requests to those drivers.
@@ -505,17 +564,22 @@ other.
 ### Related driver-specific configuration
 
 Driver-specific configuration is intentionally separate from portable resource
-requirements. Issue #1492 tracks an opaque driver-owned configuration surface
-for backend-native settings such as Kubernetes node selectors, tolerations,
-image pull secrets, Docker network mode, or VM disk shape. A future design may
-place that surface on `SandboxTemplate.driver_config` and may use a namespaced
-map-of-maps shape, but this RFC does not standardize that API or its CLI flags.
+requirements. The current API uses `SandboxTemplate.driver_config` as an
+opaque envelope keyed by driver name. The gateway selects the block for the
+active driver and forwards that inner object to the compute driver; the driver
+owns nested schema validation.
+
+`driver_config` is the right place for backend-native settings such as
+Kubernetes node selectors, tolerations, image pull secrets, Docker or Podman
+mounts, and exact GPU IDs. These settings are not portable resource
+requirements, and the gateway must not interpret them as a scheduling contract.
 
 This RFC does not introduce `--resources-json`, `--resource-requirements-json`,
 or `--template-resources-json`. CPU, memory, GPU count, and exact GPU selection
-should use typed resource fields or typed CLI flags. Backend-native settings
-that are not modeled by `resource_requirements` should remain driver-specific
-and should not be presented as portable resource requests.
+should use typed resource fields, typed CLI flags, or the selected driver's
+documented `driver_config` fields. Backend-native settings that are not modeled
+by `resource_requirements` should remain driver-specific and should not be
+presented as portable resource requests.
 
 ### Template realization and conflicts
 
@@ -551,21 +615,43 @@ message DriverSandboxSpec {
   string log_level = 1;
   map<string, string> environment = 5;
   DriverSandboxTemplate template = 6;
-  DriverSandboxResourceRequirements resource_requirements = 11;
+  ResourceRequirements resource_requirements = 9;
 
-  reserved 9, 10;
-  reserved "gpu", "gpu_device";
+  reserved 10;
+  reserved "gpu_device";
 }
 ```
 
 Driver-owned resource requirement messages should have the same semantics as
 the public messages, but live in `compute_driver.proto` to keep the public and
-internal contracts separated.
+internal contracts separated. In the current implementation, the driver proto
+mirrors the narrow GPU-focused shape:
+
+```proto
+message ResourceRequirements {
+  GpuResourceRequirements gpu = 1;
+}
+
+message GpuResourceRequirements {
+  optional uint32 count = 1;
+}
+```
+
+The compute-driver API is version-coupled to the gateway in current deployments:
+local drivers are launched by the gateway at startup, and the driver proto is
+not treated as a public compatibility surface. It follows the same alpha-era
+field replacement as the public API rather than preserving transitional GPU
+fields.
 
 ### Driver capabilities
 
-Replace GPU-specific capability fields with coarse resource capability
-summaries:
+The current implementation removes the GPU-specific `supports_gpu` and
+`gpu_count` capability fields from the compute-driver proto. It does not yet
+replace them with a stable resource capability summary. Until that lands, the
+selected driver's `ValidateSandboxCreate` remains the authority for whether a
+GPU request can be served.
+
+A later phase should add coarse resource capability summaries:
 
 ```proto
 message GetCapabilitiesResponse {
@@ -638,30 +724,44 @@ or if driver-specific validation rejects parameter values.
 When no resource requirements are present, the gateway should preserve today's
 default behavior and use the configured default driver.
 
-## Implementation plan
-
-1. Update public protobufs to add `SandboxResourceRequirements` and remove the
-   long-term use of `gpu` and `gpu_device`.
-2. Update compute-driver protobufs with mirrored driver-owned resource
-   requirements and coarse resource capability summaries.
-3. Update gateway validation and public-to-driver translation.
-4. Add validation that rejects conflicts between portable resource requirements
-   and template resource passthrough.
-5. Allow the gateway to consider multiple configured compute drivers for a
-   create request, using capability prefiltering plus `ValidateSandboxCreate`.
-6. Update Kubernetes, Docker, Podman, and VM drivers to advertise compute and
-   GPU device capability summaries and interpret their supported parameter
-   namespaces.
-7. Update CLI/API request construction so CPU, memory, GPU count, and exact GPU
-   selection use resource requirements instead of GPU-specific request fields.
-8. Do not expose JSON-formatted portable resource request flags.
-9. Update user-facing docs and driver README files once behavior is
-   implemented.
-
-Because this is a breaking request-spec change, the implementation must either
-land in a breaking API version or be explicitly called out as a breaking change
-for the current API. Removed protobuf tags should be reserved rather than
-reused.
+## Implementation state and plan
+
+Implemented in the first phase:
+
+1. Replace the public and driver `gpu`/`gpu_device` request fields with
+   `resource_requirements.gpu`.
+2. Treat a present GPU requirement with omitted `count` as an effective request
+   for one GPU, and reject `count = 0`.
+3. Pass GPU requirements through gateway-to-driver create and validation paths.
+4. Keep exact device IDs in `driver_config`:
+   `driver_config.cdi_devices` for Docker and Podman, and
+   `driver_config.gpu_device_ids` for VM.
+5. Reject duplicate exact device IDs and require exact device list length to
+   match the effective GPU count.
+6. Let Kubernetes map GPU count to the `nvidia.com/gpu` limit.
+7. Let Docker and Podman satisfy count-only GPU requests by selecting default
+   CDI devices from a refreshed local inventory.
+8. Remove GPU-specific capability fields from the compute-driver proto without
+   adding replacement resource capability summaries yet.
+9. Do not expose JSON-formatted portable resource request flags.
+
+Remaining follow-up work:
+
+1. Add the broader compute/device/dataset resource model when those semantics
+   are stable enough for the public API.
+2. Add coarse resource capability summaries and gateway matching across
+   multiple configured drivers.
+3. Add conflict validation between portable resource requirements and
+   platform-native template resource passthrough where both can express the
+   same resource.
+4. Decide whether and how drivers should expose inventory or capacity for
+   advisory matching without turning it into a reservation API.
+
+Because the public and driver APIs are still alpha, the first implementation
+intentionally breaks old live and persisted GPU intent instead of preserving
+compatibility shims for the legacy GPU fields. Callers should use a matching
+OpenShell CLI/API version and recreate GPU sandboxes after upgrade when they
+need the new typed request shape.
 
 ## Tests
 
@@ -669,23 +769,27 @@ The implementation should include:
 
 - protobuf translation tests for public resource requirements into driver
   resource requirements.
-- gateway matching tests for compute capability support, device class, count,
-  selector, and parameter namespace filtering.
-- gateway tests showing that the selected driver is the first validating
-  candidate in configured order.
-- validation tests for conflicts between resource requirements and template
-  resource passthrough.
-- validation tests that unsupported parameter namespaces are rejected.
+- shared helper tests showing that omitted GPU count is effective count one and
+  explicit zero count is rejected.
+- validation tests for duplicate exact device IDs and for mismatched exact
+  device-list length versus effective GPU count.
 - Kubernetes tests that map compute requirements to pod CPU/memory resources
   and GPU count to `nvidia.com/gpu` limits.
-- Docker and Podman GPU e2e tests that request a CDI GPU with
-  `cdi.openshell.ai`.
+- Docker and Podman selector tests for numeric CDI ID ordering, named CDI ID
+  fallback, count-based round-robin selection, all-only fallback, insufficient
+  devices, and inventory refresh without cursor reset.
+- Docker and Podman validation tests showing that explicit CDI IDs bypass
+  default inventory selection and remain opaque.
 - VM tests that map CPU/memory to VM allocation and request a GPU by BDF or
-  index with `vm.openshell.ai`.
-- tests showing that template-only resources are treated as platform-native
-  passthrough and are not used for portable driver matching.
+  index through `driver_config.gpu_device_ids`.
 - CLI request-shape tests showing that there is no JSON-formatted portable
   resource request flag.
+- future gateway matching tests for compute capability support, device class,
+  count, selector, and parameter namespace filtering once resource capability
+  summaries are added.
+- future validation tests for conflicts between resource requirements and
+  template resource passthrough once those conflicts can occur through stable
+  portable fields.
 - error-message tests for no matching driver and validation failure across all
   candidates.