From 1d543ab016cb8b96adba01e09e7f38956c306b18 Mon Sep 17 00:00:00 2001 From: Jack Kleeman Date: Mon, 18 May 2026 11:58:49 +0000 Subject: [PATCH] Add no-disable-device-node-modification-hook nvcdi feature flag The disable-device-node-modification CDI hook bind-mounts a tmpfs file over /proc/driver/nvidia/params inside the container. With procMount: Unmasked (Kubernetes 1.34+), that overmount makes the kernel's mount_too_revealing() check reject any subsequent fresh procfs mount from a less-privileged namespace -- for example a nested user namespace created by bubblewrap. Workloads that sandbox themselves inside a GPU container can no longer mount procfs. The hook can be skipped in static cdi mode via nvidia-ctk cdi generate --disable-hooks, but jit-cdi mode (the default since 1.18.0) has no way to suppress an individual hook. Setting NVreg_ModifyDeviceFiles=0 on the host (which the hook short-circuits on) is not viable on systems with NVSwitch: fabricmanager fails to initialize with NV_ERR_INVALID_STATE when the parameter is set, even with device nodes pre-created via udev or mknod. This adds a nvcdi feature flag, following the no-additional-gids-for-device-nodes naming pattern, that suppresses the hook in jit-cdi mode: [nvidia-container-runtime.modes.jit-cdi] nvcdi-feature-flags = ["no-disable-device-node-modification-hook"] The hook's purpose is to prevent in-container nvidia-smi/libnvidia-ml from creating extra /dev/nvidiaN device nodes. That prevention is already enforced by cgroup device controls in container runtimes, so disabling the hook does not affect device isolation. The flag is opt-in and off by default. Signed-off-by: Jack Kleeman --- pkg/nvcdi/api.go | 4 ++++ pkg/nvcdi/options.go | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 7bf2fdc12..2953f3bbf 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -100,4 +100,8 @@ const ( // FeatureDisableIPCDiscoverer disables the inclusion of IPC sockets // (nvidia-persistenced, nvidia-fabricmanager, MPS) in the CDI spec. FeatureDisableIPCDiscoverer = FeatureFlag("disable-ipc-discoverer") + + // FeatureNoDisableDeviceNodeModificationHook disables the + // disable-device-node-modification CDI hook. + FeatureNoDisableDeviceNodeModificationHook = FeatureFlag("no-disable-device-node-modification-hook") ) diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index a5e2b1c40..e30024844 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -119,6 +119,10 @@ func populateOptions(opts ...Option) *options { o.disabledHooks = append(o.disabledHooks, HookEnableCudaCompat, DisableDeviceNodeModificationHook) } + if o.featureFlags[FeatureNoDisableDeviceNodeModificationHook] { + o.disabledHooks = append(o.disabledHooks, DisableDeviceNodeModificationHook) + } + if o.editsFactory == nil { o.editsFactory = edits.NewFactory( edits.WithLogger(o.logger),