NVIDIA
diff --git a/‎architecture/compute-runtimes.md‎
Lines changed: 6 additions & 7 deletions b/‎architecture/compute-runtimes.md‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎crates/openshell-cli/src/main.rs‎
Lines changed: 133 additions & 5 deletions b/‎crates/openshell-cli/src/main.rs‎
Lines changed: 133 additions & 5 deletions
diff --git a/‎crates/openshell-cli/src/run.rs‎
Lines changed: 1 addition & 1 deletion b/‎crates/openshell-cli/src/run.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs‎
Lines changed: 47 additions & 5 deletions b/‎crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs‎
Lines changed: 47 additions & 5 deletions
@@ -55,11 +55,9 @@ through the driver configuration. The Helm chart defaults sandbox agents to
 `Unconfined` so runtime/default AppArmor profiles do not block supervisor
 network namespace setup on AppArmor-enabled nodes.
 
-GPU requests enter the driver layer through `SandboxSpec.gpu` and
-`SandboxSpec.gpu_device`. Docker and Podman map default GPU requests to one
-concrete NVIDIA CDI device when individual CDI devices are available, use
-`nvidia.com/gpu=all` only for WSL2/all-only compatibility, and pass explicit
-driver-native device IDs through.
+Resource requirements enter the driver layer through `SandboxSpec.resource_requirements`. This includes a set of GPU requirements, where a user
+can request a specific number of GPUs or the driver-specific default behaviour.
+For all in-tree drivers, this is equivalent to selecting a single GPU.
 
 VM runtime state paths are derived only from driver-validated sandbox IDs
 matching `[A-Za-z0-9._-]{1,128}`. The gateway-owned VM driver socket uses a
@@ -99,8 +97,9 @@ Custom sandbox images must include the agent runtime and any system
 dependencies, but they should not need to include the gateway. GPU-capable
 images must include the user-space libraries required by the workload. The
 runtime still owns GPU device injection. GPU requests are explicit, and can be
-refined with a driver-native device identifier; the gateway validates the
-request shape and each runtime enforces the GPU allocation modes it supports.
+refined with a driver-native device identifier or requested count; the gateway
+validates the request shape and each runtime enforces the GPU allocation modes it
+supports.
 
 ## Deployment Shape
 
 
@@ -29,6 +29,21 @@ struct GatewayContext {
     endpoint: String,
 }
 
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum GpuCliRequest {
+    DriverDefault,
+    Count(u32),
+}
+
+impl From<GpuCliRequest> for GpuResourceRequirements {
+    fn from(gpu: GpuCliRequest) -> Self {
+        match gpu {
+            GpuCliRequest::Count(count) => Self { count: Some(count) },
+            GpuCliRequest::DriverDefault => Self { count: None },
+        }
+    }
+}
+
 /// Resolve the gateway name to a [`GatewayContext`] with the gateway endpoint.
 ///
 /// Resolution priority:
@@ -110,6 +125,21 @@ fn resolve_gateway(
     })
 }
 
+fn parse_gpu_request(value: &str) -> std::result::Result<GpuCliRequest, String> {
+    if value.is_empty() {
+        return Ok(GpuCliRequest::DriverDefault);
+    }
+
+    let count = value
+        .parse::<u32>()
+        .map_err(|_| "GPU count must be a positive integer".to_string())?;
+    if count == 0 {
+        return Err("GPU count must be greater than 0".to_string());
+    }
+
+    Ok(GpuCliRequest::Count(count))
+}
+
 fn resolve_gateway_name(gateway_flag: &Option<String>) -> Option<String> {
     gateway_flag
         .clone()
@@ -1217,8 +1247,11 @@ enum SandboxCommands {
         editor: Option<CliEditor>,
 
         /// Request GPU resources for the sandbox.
-        #[arg(long)]
-        gpu: bool,
+        ///
+        /// Omit COUNT for the driver's default GPU selection, or pass COUNT
+        /// to request a specific number of GPUs.
+        #[arg(long, num_args = 0..=1, value_name = "COUNT", default_missing_value = "", value_parser = parse_gpu_request)]
+        gpu: Option<GpuCliRequest>,
 
         /// CPU limit for the sandbox (for example: 500m, 1, 2.5).
         #[arg(long)]
@@ -2626,7 +2659,7 @@ async fn main() -> Result<()> {
                         .map(|s| openshell_core::forward::ForwardSpec::parse(&s))
                         .transpose()?;
                     let keep = keep || !no_keep || editor.is_some() || forward.is_some();
-                    let gpu_requirements = gpu.then_some(GpuResourceRequirements {});
+                    let gpu_requirements: Option<GpuResourceRequirements> = gpu.map(Into::into);
 
                     let ctx = resolve_gateway(&cli.gateway, &cli.gateway_endpoint)?;
                     let endpoint = &ctx.endpoint;
@@ -3636,6 +3669,27 @@ mod tests {
         });
     }
 
+    #[test]
+    fn gpu_cli_request_option_maps_absent_gpu_to_no_requirements() {
+        let gpu: Option<GpuResourceRequirements> = Option::<GpuCliRequest>::None.map(Into::into);
+
+        assert_eq!(gpu, None);
+    }
+
+    #[test]
+    fn gpu_cli_request_driver_default_converts_to_requirements() {
+        let gpu = GpuResourceRequirements::from(GpuCliRequest::DriverDefault);
+
+        assert_eq!(gpu.count, None);
+    }
+
+    #[test]
+    fn gpu_cli_request_count_converts_to_requirements() {
+        let gpu = GpuResourceRequirements::from(GpuCliRequest::Count(2));
+
+        assert_eq!(gpu.count, Some(2));
+    }
+
     #[test]
     fn apply_auth_uses_stored_token() {
         let tmp = tempfile::tempdir().unwrap();
@@ -4507,7 +4561,23 @@ mod tests {
                 command: Some(SandboxCommands::Create { gpu, .. }),
                 ..
             }) => {
-                assert!(gpu);
+                assert_eq!(gpu, Some(GpuCliRequest::DriverDefault));
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_parses_from_gpu_flag() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu", "2"])
+            .expect("sandbox create --gpu 2 should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, .. }),
+                ..
+            }) => {
+                assert_eq!(gpu, Some(GpuCliRequest::Count(2)));
             }
             other => panic!("expected SandboxCommands::Create, got: {other:?}"),
         }
@@ -4523,13 +4593,71 @@ mod tests {
                 command: Some(SandboxCommands::Create { gpu, command, .. }),
                 ..
             }) => {
-                assert!(gpu);
+                assert_eq!(gpu, Some(GpuCliRequest::DriverDefault));
+                assert_eq!(command, vec!["claude".to_string()]);
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_allows_trailing_command() {
+        let cli = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu",
+            "2",
+            "--",
+            "claude",
+        ])
+        .expect("sandbox create --gpu 2 -- claude should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, command, .. }),
+                ..
+            }) => {
+                assert_eq!(gpu, Some(GpuCliRequest::Count(2)));
                 assert_eq!(command, vec!["claude".to_string()]);
             }
             other => panic!("expected SandboxCommands::Create, got: {other:?}"),
         }
     }
 
+    #[test]
+    fn sandbox_create_gpu_count_rejects_zero() {
+        let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu", "0"]);
+
+        assert!(result.is_err(), "sandbox create --gpu 0 should be rejected");
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_accepts_equals_syntax() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu=2"])
+            .expect("sandbox create --gpu=2 should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, .. }),
+                ..
+            }) => {
+                assert_eq!(gpu, Some(GpuCliRequest::Count(2)));
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_rejects_non_integer() {
+        let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu", "many"]);
+
+        assert!(
+            result.is_err(),
+            "sandbox create --gpu many should be rejected"
+        );
+    }
+
     #[test]
     fn service_expose_accepts_positional_target_port_and_service() {
         let cli = Cli::try_parse_from([
 
@@ -8395,7 +8395,7 @@ mod tests {
     #[test]
     fn provisioning_timeout_message_includes_condition_and_gpu_hint() {
         let resource_requirements = ResourceRequirements {
-            gpu: Some(GpuResourceRequirements {}),
+            gpu: Some(GpuResourceRequirements { count: None }),
         };
         let message = provisioning_timeout_message(
             120,
 
@@ -1088,8 +1088,8 @@ fn test_tls(server: &TestServer) -> TlsOptions {
     server.tls.with_gateway_name("openshell")
 }
 
-fn gpu_requirements() -> GpuResourceRequirements {
-    GpuResourceRequirements {}
+fn gpu_requirements(count: Option<u32>) -> GpuResourceRequirements {
+    GpuResourceRequirements { count }
 }
 
 #[tokio::test]
@@ -1301,7 +1301,7 @@ async fn sandbox_create_sends_gpu_default_request() {
         "openshell",
         &[],
         true,
-        Some(gpu_requirements()),
+        Some(gpu_requirements(None)),
         None,
         None,
         None,
@@ -1328,11 +1328,53 @@ async fn sandbox_create_sends_gpu_default_request() {
         .and_then(|requirements| requirements.gpu.as_ref())
         .expect("GPU requirement should be sent");
 
-    assert!(requests[0]
+    assert_eq!(gpu.count, None);
+}
+
+#[tokio::test]
+async fn sandbox_create_sends_gpu_count_request() {
+    let server = run_server().await;
+    let fake_ssh_dir = tempfile::tempdir().unwrap();
+    let xdg_dir = tempfile::tempdir().unwrap();
+    let _env = test_env(&fake_ssh_dir, &xdg_dir);
+    let tls = test_tls(&server);
+    install_fake_ssh(&fake_ssh_dir);
+
+    run::sandbox_create(
+        &server.endpoint,
+        Some("gpu-two"),
+        None,
+        "openshell",
+        &[],
+        true,
+        Some(gpu_requirements(Some(2))),
+        None,
+        None,
+        None,
+        None,
+        &[],
+        None,
+        None,
+        &["echo".to_string(), "OK".to_string()],
+        Some(false),
+        Some(false),
+        &HashMap::new(),
+        &HashMap::new(),
+        "manual",
+        &tls,
+    )
+    .await
+    .expect("sandbox create should succeed");
+
+    let requests = create_requests(&server).await;
+    let gpu = requests[0]
         .spec
         .as_ref()
         .and_then(|spec| spec.resource_requirements.as_ref())
-        .is_some_and(|requirements| requirements.gpu.is_some()));
+        .and_then(|requirements| requirements.gpu.as_ref())
+        .expect("GPU requirement should be sent");
+
+    assert_eq!(gpu.count, Some(2));
 }
 
 #[tokio::test]