From c5d57082c37221eb0f9abf517743a149683a45f9 Mon Sep 17 00:00:00 2001 From: dkeven Date: Thu, 12 Mar 2026 15:20:53 +0800 Subject: [PATCH] fix(device-plugin): do not abort plugin start in case no device is found --- cmd/device-plugin/nvidia/main.go | 3 ++- pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/device-plugin/nvidia/main.go b/cmd/device-plugin/nvidia/main.go index 7fa133a0c..db3149f17 100644 --- a/cmd/device-plugin/nvidia/main.go +++ b/cmd/device-plugin/nvidia/main.go @@ -328,7 +328,8 @@ func startPlugins(c *cli.Context, flags []cli.Flag, restarting bool) ([]plugin.I } if started == 0 { - klog.Info("No devices found. Waiting indefinitely.") + klog.Info("No devices found. Retrying in 30s...") + return plugins, true, nil } return plugins, false, nil diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go b/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go index 0008ff163..55d8168fb 100644 --- a/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go +++ b/pkg/device-plugin/nvidiadevice/nvinternal/rm/nvml_manager.go @@ -93,6 +93,7 @@ func NewNVMLResourceManagers(nvmllib nvml.Interface, config *nvidia.DeviceConfig nvml: nvmllib, } r.rescanInterval = 30 * time.Second + r.lastRescan = time.Now() rms = append(rms, r) }