beclab · dkeven · Feb 27, 2026 · Feb 27, 2026
diff --git a/cmd/vGPUmonitor/metrics.go b/cmd/vGPUmonitor/metrics.go
@@ -260,7 +260,7 @@ func (cc ClusterManagerCollector) collectGPUDeviceMetrics(ch chan<- prometheus.M
 }
 
 func (cc ClusterManagerCollector) collectGPUMemoryMetrics(ch chan<- prometheus.Metric, hdev nvml.Device, index int) error {
-	memory, ret := hdev.GetMemoryInfo()
+	memory, ret := util.GetCompatibleNVMLMemoryInfo(hdev)
 	if ret != nvml.SUCCESS {
 		return fmt.Errorf("nvml get memory error ret=%d", ret)
 	}

diff --git a/docker/Dockerfile.withlib b/docker/Dockerfile.withlib
@@ -15,13 +15,20 @@ ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=utility
 
 ARG VERSION
+ARG TARGETARCH
 LABEL version="$VERSION"
 LABEL maintainer="opensource@4paradigm.com"
 COPY ./LICENSE /k8s-vgpu/LICENSE
 COPY --from=gobuild /k8s-vgpu/bin /k8s-vgpu/bin
 COPY --from=gobuild /go/bin/nvidia-mig-parted /k8s-vgpu/bin/
 COPY ./docker/entrypoint.sh /k8s-vgpu/bin/entrypoint.sh
 COPY ./lib /k8s-vgpu/lib
+RUN set -eux; \
+    if [ "$TARGETARCH" = "arm64" ]; then \
+      mv -f /k8s-vgpu/lib/nvidia/libvgpu.arm64.so /k8s-vgpu/lib/nvidia/libvgpu.so; \
+    else \
+      rm -f /k8s-vgpu/lib/nvidia/libvgpu.arm64.so; \
+    fi
 COPY ./docker/vgpu-init.sh /k8s-vgpu/bin/vgpu-init.sh
 
 ENV PATH="/k8s-vgpu/bin:${PATH}"

diff --git a/lib/nvidia/libvgpu.arm64.so b/lib/nvidia/libvgpu.arm64.so
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go
@@ -130,7 +130,7 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 			panic(0)
 		}
 		memoryTotal := 0
-		memory, ret := ndev.GetMemoryInfo()
+		memory, ret := util.GetCompatibleNVMLMemoryInfo(ndev)
 		if ret == nvml.SUCCESS {
 			memoryTotal = int(memory.Total)
 		} else {
@@ -142,6 +142,11 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 			klog.Error("nvml get name error ret=", ret)
 			panic(0)
 		}
+		defaultShareMode := util.ShareModeTimeSlicing
+		config, ok := util.GetCompatibleConfigsByDeviceName(Model)
+		if ok && config.DefaultShareMode != "" {
+			defaultShareMode = config.DefaultShareMode
+		}
 		if !strings.Contains(Model, "NVIDIA") {
 			Model = fmt.Sprintf("%v-%v", "NVIDIA", Model)
 		}
@@ -184,6 +189,7 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 			Mode:         plugin.operatingMode,
 			Health:       health,
 			Architecture: int32(architecture),
+			ShareMode:    defaultShareMode,
 		})
 		klog.Infof("nvml registered device id=%v, memory=%v, type=%v, numa=%v", idx, registeredmem, Model, numa)
 	}
@@ -216,6 +222,20 @@ func (plugin *NvidiaDevicePlugin) RegistrInAnnotation() error {
 	klog.V(4).InfoS("patch nvidia  topo score to node", "hami.io/node-nvidia-score", string(data))
 	annos[nvidia.HandshakeAnnos] = "Reported " + time.Now().String()
 	annos[nvidia.RegisterAnnos] = encodeddevices
+
+	// Ensure each discovered device has a sharemode annotation key present.
+	// Do NOT override an existing value potentially set by scheduler/API handlers.
+	// If missing, default to time-slicing.
+	for _, dev := range *devices {
+		shareModeKey := fmt.Sprintf(util.ShareModeAnnotationTpl, dev.ID)
+		if node.Annotations != nil {
+			if _, ok := node.Annotations[shareModeKey]; ok {
+				continue
+			}
+		}
+		annos[shareModeKey] = dev.ShareMode
+	}
+
 	if len(data) > 0 {
 		annos[nvidia.RegisterGPUPairScore] = string(data)
 	}

diff --git a/pkg/scheduler/routes/gpu_manage.go b/pkg/scheduler/routes/gpu_manage.go
@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/http"
+	"slices"
 	"sort"
 	"strings"
 	"time"
@@ -34,9 +35,10 @@ type GPUAppInfo struct {
 
 type GPUDetail struct {
 	GPUInfo
-	Apps            []GPUAppInfo `json:"apps"`
-	MemoryAllocated *int64       `json:"memoryAllocated,omitempty"`
-	MemoryAvailable *int64       `json:"memoryAvailable,omitempty"`
+	AllowedShareModes []string     `json:"allowedShareModes,omitempty"`
+	Apps              []GPUAppInfo `json:"apps"`
+	MemoryAllocated   *int64       `json:"memoryAllocated,omitempty"`
+	MemoryAvailable   *int64       `json:"memoryAvailable,omitempty"`
 }
 
 type AssignGPURequest struct {
@@ -111,11 +113,17 @@ func ListGPUDetails(s *scheduler.Scheduler) httprouter.Handle {
 
 		for _, node := range nodes {
 			for _, device := range node.Devices {
+				allowedShareModes := util.DefaultAllowedShareModes
+				config, ok := util.GetCompatibleConfigsByDeviceName(device.Type)
+				if ok && len(config.AllowedShareModes) > 0 {
+					allowedShareModes = config.AllowedShareModes
+				}
 				uuidToGPUDetails[device.ID] = &GPUDetail{
 					GPUInfo: GPUInfo{
 						NodeName:   node.Node.Name,
 						DeviceInfo: device,
 					},
+					AllowedShareModes: allowedShareModes,
 				}
 			}
 		}
@@ -426,6 +434,14 @@ func SwitchGPUMode(s *scheduler.Scheduler) httprouter.Handle {
 		for _, node := range nodes {
 			for _, device := range node.Devices {
 				if device.ID == uuid {
+					config, ok := util.GetCompatibleConfigsByDeviceName(device.Type)
+					if ok && len(config.AllowedShareModes) > 0 {
+						if !slices.Contains(config.AllowedShareModes, req.Mode) {
+							klog.Warningf("GPU %s does not support mode %s, refusing to switch", uuid, req.Mode)
+							http.Error(w, fmt.Sprintf("GPU %s does not support mode %s", uuid, req.Mode), http.StatusBadRequest)
+							return
+						}
+					}
 					targetNode = node.Node
 					break
 				}