diff --git a/cmd/vGPUmonitor/metrics.go b/cmd/vGPUmonitor/metrics.go
index 24574d002..54973df8a 100644
--- a/cmd/vGPUmonitor/metrics.go
+++ b/cmd/vGPUmonitor/metrics.go
@@ -260,7 +260,7 @@ func (cc ClusterManagerCollector) collectGPUDeviceMetrics(ch chan<- prometheus.M
 }
 
 func (cc ClusterManagerCollector) collectGPUMemoryMetrics(ch chan<- prometheus.Metric, hdev nvml.Device, index int) error {
-	memory, ret := hdev.GetMemoryInfo()
+	memory, ret := util.GetCompatibleNVMLMemoryInfo(hdev)
 	if ret != nvml.SUCCESS {
 		return fmt.Errorf("nvml get memory error ret=%d", ret)
 	}
diff --git a/docker/Dockerfile.withlib b/docker/Dockerfile.withlib
index 900066134..e610c7926 100644
--- a/docker/Dockerfile.withlib
+++ b/docker/Dockerfile.withlib
@@ -15,6 +15,7 @@ ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=utility
 
 ARG VERSION
+ARG TARGETARCH
 LABEL version="$VERSION"
 LABEL maintainer="opensource@4paradigm.com"
 COPY ./LICENSE /k8s-vgpu/LICENSE
@@ -22,6 +23,12 @@ COPY --from=gobuild /k8s-vgpu/bin /k8s-vgpu/bin
 COPY --from=gobuild /go/bin/nvidia-mig-parted /k8s-vgpu/bin/
 COPY ./docker/entrypoint.sh /k8s-vgpu/bin/entrypoint.sh
 COPY ./lib /k8s-vgpu/lib
+RUN set -eux; \
+    if [ "$TARGETARCH" = "arm64" ]; then \
+      mv -f /k8s-vgpu/lib/nvidia/libvgpu.arm64.so /k8s-vgpu/lib/nvidia/libvgpu.so; \
+    else \
+      rm -f /k8s-vgpu/lib/nvidia/libvgpu.arm64.so; \
+    fi
 COPY ./docker/vgpu-init.sh /k8s-vgpu/bin/vgpu-init.sh
 
 ENV PATH="/k8s-vgpu/bin:${PATH}"
diff --git a/lib/nvidia/libvgpu.arm64.so b/lib/nvidia/libvgpu.arm64.so
new file mode 100755
index 000000000..932dcc1e8
Binary files /dev/null and b/lib/nvidia/libvgpu.arm64.so differ
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go
index f1b765c47..d325c3e68 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go
+++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/register.go
@@ -130,7 +130,7 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 			panic(0)
 		}
 		memoryTotal := 0
-		memory, ret := ndev.GetMemoryInfo()
+		memory, ret := util.GetCompatibleNVMLMemoryInfo(ndev)
 		if ret == nvml.SUCCESS {
 			memoryTotal = int(memory.Total)
 		} else {
@@ -142,6 +142,11 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 			klog.Error("nvml get name error ret=", ret)
 			panic(0)
 		}
+		defaultShareMode := util.ShareModeTimeSlicing
+		config, ok := util.GetCompatibleConfigsByDeviceName(Model)
+		if ok && config.DefaultShareMode != "" {
+			defaultShareMode = config.DefaultShareMode
+		}
 		if !strings.Contains(Model, "NVIDIA") {
 			Model = fmt.Sprintf("%v-%v", "NVIDIA", Model)
 		}
@@ -184,6 +189,7 @@ func (plugin *NvidiaDevicePlugin) getAPIDevices() *[]*util.DeviceInfo {
 			Mode:         plugin.operatingMode,
 			Health:       health,
 			Architecture: int32(architecture),
+			ShareMode:    defaultShareMode,
 		})
 		klog.Infof("nvml registered device id=%v, memory=%v, type=%v, numa=%v", idx, registeredmem, Model, numa)
 	}
@@ -216,6 +222,20 @@ func (plugin *NvidiaDevicePlugin) RegistrInAnnotation() error {
 	klog.V(4).InfoS("patch nvidia  topo score to node", "hami.io/node-nvidia-score", string(data))
 	annos[nvidia.HandshakeAnnos] = "Reported " + time.Now().String()
 	annos[nvidia.RegisterAnnos] = encodeddevices
+
+	// Ensure each discovered device has a sharemode annotation key present.
+	// Do NOT override an existing value potentially set by scheduler/API handlers.
+	// If missing, default to time-slicing.
+	for _, dev := range *devices {
+		shareModeKey := fmt.Sprintf(util.ShareModeAnnotationTpl, dev.ID)
+		if node.Annotations != nil {
+			if _, ok := node.Annotations[shareModeKey]; ok {
+				continue
+			}
+		}
+		annos[shareModeKey] = dev.ShareMode
+	}
+
 	if len(data) > 0 {
 		annos[nvidia.RegisterGPUPairScore] = string(data)
 	}
diff --git a/pkg/scheduler/routes/gpu_manage.go b/pkg/scheduler/routes/gpu_manage.go
index 2e3b8cb99..4ffa4c4e3 100644
--- a/pkg/scheduler/routes/gpu_manage.go
+++ b/pkg/scheduler/routes/gpu_manage.go
@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/http"
+	"slices"
 	"sort"
 	"strings"
 	"time"
@@ -34,9 +35,10 @@ type GPUAppInfo struct {
 
 type GPUDetail struct {
 	GPUInfo
-	Apps            []GPUAppInfo `json:"apps"`
-	MemoryAllocated *int64       `json:"memoryAllocated,omitempty"`
-	MemoryAvailable *int64       `json:"memoryAvailable,omitempty"`
+	AllowedShareModes []string     `json:"allowedShareModes,omitempty"`
+	Apps              []GPUAppInfo `json:"apps"`
+	MemoryAllocated   *int64       `json:"memoryAllocated,omitempty"`
+	MemoryAvailable   *int64       `json:"memoryAvailable,omitempty"`
 }
 
 type AssignGPURequest struct {
@@ -111,11 +113,17 @@ func ListGPUDetails(s *scheduler.Scheduler) httprouter.Handle {
 
 		for _, node := range nodes {
 			for _, device := range node.Devices {
+				allowedShareModes := util.DefaultAllowedShareModes
+				config, ok := util.GetCompatibleConfigsByDeviceName(device.Type)
+				if ok && len(config.AllowedShareModes) > 0 {
+					allowedShareModes = config.AllowedShareModes
+				}
 				uuidToGPUDetails[device.ID] = &GPUDetail{
 					GPUInfo: GPUInfo{
 						NodeName:   node.Node.Name,
 						DeviceInfo: device,
 					},
+					AllowedShareModes: allowedShareModes,
 				}
 			}
 		}
@@ -426,6 +434,14 @@ func SwitchGPUMode(s *scheduler.Scheduler) httprouter.Handle {
 		for _, node := range nodes {
 			for _, device := range node.Devices {
 				if device.ID == uuid {
+					config, ok := util.GetCompatibleConfigsByDeviceName(device.Type)
+					if ok && len(config.AllowedShareModes) > 0 {
+						if !slices.Contains(config.AllowedShareModes, req.Mode) {
+							klog.Warningf("GPU %s does not support mode %s, refusing to switch", uuid, req.Mode)
+							http.Error(w, fmt.Sprintf("GPU %s does not support mode %s", uuid, req.Mode), http.StatusBadRequest)
+							return
+						}
+					}
 					targetNode = node.Node
 					break
 				}
diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go
index 0a8f99cc0..ce3f5aa18 100644
--- a/pkg/scheduler/scheduler.go
+++ b/pkg/scheduler/scheduler.go
@@ -18,7 +18,6 @@ package scheduler
 
 import (
 	"context"
-	"errors"
 	"fmt"
 	"maps"
 	"sort"
@@ -30,6 +29,7 @@ import (
 
 	corev1 "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/client-go/informers"
@@ -662,6 +662,200 @@ func (s *Scheduler) getPodUsage() (map[string]PodUseDeviceStat, error) {
 	return podUsageStat, nil
 }
 
+type nvidiaRequestSummary struct {
+	requested     int
+	hasMemory     bool
+	memoryByte    int64
+	memoryPercent int32
+}
+
+func summarizeNVIDIARequests(resourceReqs util.PodDeviceRequests) nvidiaRequestSummary {
+	sum := nvidiaRequestSummary{}
+	for _, ctrReqs := range resourceReqs {
+		for _, req := range ctrReqs {
+			if req.Type != nvidia.NvidiaGPUDevice || req.Nums <= 0 {
+				continue
+			}
+			sum.requested += int(req.Nums)
+			if req.Memreq > 0 {
+				sum.hasMemory = true
+				// similar to the memory request in pod spec, we only consider the maximum memory request for now
+				// this works with our current assumption that only one container in the pod has a memory request
+				if int64(req.Memreq) > sum.memoryByte {
+					sum.memoryByte = int64(req.Memreq)
+				}
+				continue
+			}
+			if req.MemPercentagereq != 0 && req.MemPercentagereq != 101 {
+				sum.hasMemory = true
+				// use the max percentage across containers for a conservative single-value summary
+				if req.MemPercentagereq > sum.memoryPercent {
+					sum.memoryPercent = req.MemPercentagereq
+				}
+			}
+		}
+	}
+	return sum
+}
+
+func requiredNvidiaMemoryBytes(sum nvidiaRequestSummary, totalMemory int64) int64 {
+	if !sum.hasMemory {
+		return 0
+	}
+	if sum.memoryByte > 0 {
+		return sum.memoryByte
+	}
+	if sum.memoryPercent > 0 && totalMemory > 0 {
+		return totalMemory * int64(sum.memoryPercent) / 100
+	}
+	return 0
+}
+
+func normalizeGPUUUID(uuid string) string {
+	if strings.Contains(uuid, "[") {
+		return strings.Split(uuid, "[")[0]
+	}
+	return uuid
+}
+
+func (s *Scheduler) collectConsumedGPUUUIDsByApp(appName string, currentPod *corev1.Pod) map[string]struct{} {
+	consumed := make(map[string]struct{})
+	for _, p := range s.ListPodsInfo() {
+		if p.Labels == nil || p.Labels[util.AppNameLabelKey] != appName {
+			continue
+		}
+		if currentPod != nil && p.Namespace == currentPod.Namespace && p.Name == currentPod.Name {
+			continue
+		}
+		for _, podDevices := range p.Devices {
+			for _, containerDevices := range podDevices {
+				for _, assigned := range containerDevices {
+					uuid := normalizeGPUUUID(assigned.UUID)
+					if uuid != "" {
+						consumed[uuid] = struct{}{}
+					}
+				}
+			}
+		}
+	}
+	return consumed
+}
+
+func (s *Scheduler) selectDynamicGPUCandidates(
+	appBoundUUIDs map[string]struct{},
+	alreadySelected map[string]struct{},
+	consumedByApp map[string]struct{},
+	allBindings []*v1alpha1.GPUBinding,
+	requiredCount int,
+	requestSummary nvidiaRequestSummary,
+) ([]string, error) {
+	if requiredCount <= 0 {
+		return nil, nil
+	}
+	nodes, err := s.ListNodes()
+	if err != nil {
+		return nil, err
+	}
+	uuidToNode := make(map[string]string)
+	for _, n := range nodes {
+		for _, d := range n.Devices {
+			uuidToNode[d.ID] = n.Node.Name
+		}
+	}
+	// todo: needs more flexibility
+	// when we allow an app to be bound to multiple nodes
+	// already consumed GPUs by this app should not be considered as constraints
+	// and not consumed GPUs may be used with or without other candidates
+	// e.g. if an app has 3 GPUs, with 1 consumed, 2 not consumed, it can be bound to any of the 2 not consumed GPUs
+	// with another not bound GPU
+	// or totally other 2 GPUs
+	pinnedNode := ""
+	for uuid := range appBoundUUIDs {
+		if nodeName, ok := uuidToNode[uuid]; ok {
+			pinnedNode = nodeName
+			break
+		}
+	}
+
+	bindingCount := make(map[string]int)
+	bindingAllocatedMemory := make(map[string]int64)
+	for _, b := range allBindings {
+		if b.Spec.UUID == "" {
+			continue
+		}
+		bindingCount[b.Spec.UUID]++
+		if b.Spec.Memory != nil {
+			bindingAllocatedMemory[b.Spec.UUID] += b.Spec.Memory.Value()
+		}
+	}
+
+	memSlicingCandidates := make([]string, 0)
+	exclusiveCandidates := make([]string, 0)
+	timeSlicingCandidates := make([]string, 0)
+
+	for _, n := range nodes {
+		if pinnedNode != "" && n.Node.Name != pinnedNode {
+			continue
+		}
+		for _, d := range n.Devices {
+			uuid := d.ID
+			if uuid == "" || !d.Health {
+				continue
+			}
+			if _, ok := appBoundUUIDs[uuid]; ok {
+				continue
+			}
+			if _, ok := alreadySelected[uuid]; ok {
+				continue
+			}
+			if _, ok := consumedByApp[uuid]; ok {
+				continue
+			}
+
+			switch d.ShareMode {
+			case util.ShareModeMemSlicing:
+				if !requestSummary.hasMemory {
+					continue
+				}
+				requiredMemory := requiredNvidiaMemoryBytes(requestSummary, int64(d.Devmem))
+				remaining := int64(d.Devmem) - bindingAllocatedMemory[uuid]
+				if remaining >= requiredMemory {
+					memSlicingCandidates = append(memSlicingCandidates, uuid)
+				}
+			case util.ShareModeExclusive:
+				if bindingCount[uuid] > 0 {
+					continue
+				}
+				if requestSummary.hasMemory {
+					requiredMemory := requiredNvidiaMemoryBytes(requestSummary, int64(d.Devmem))
+					if int64(d.Devmem) < requiredMemory {
+						continue
+					}
+				}
+				exclusiveCandidates = append(exclusiveCandidates, uuid)
+			case util.ShareModeTimeSlicing:
+				if requestSummary.hasMemory {
+					requiredMemory := requiredNvidiaMemoryBytes(requestSummary, int64(d.Devmem))
+					if int64(d.Devmem) < requiredMemory {
+						continue
+					}
+				}
+				timeSlicingCandidates = append(timeSlicingCandidates, uuid)
+			}
+		}
+	}
+
+	result := make([]string, 0)
+	if requestSummary.hasMemory && len(memSlicingCandidates) > 0 {
+		result = append(result, memSlicingCandidates...)
+	} else if len(exclusiveCandidates) > 0 {
+		result = append(result, exclusiveCandidates...)
+	} else {
+		result = append(result, timeSlicingCandidates...)
+	}
+	return result, nil
+}
+
 func (s *Scheduler) Bind(args extenderv1.ExtenderBindingArgs) (*extenderv1.ExtenderBindingResult, error) {
 	klog.InfoS("Attempting to bind pod to node", "pod", args.PodName, "namespace", args.PodNamespace, "node", args.Node)
 	var res *extenderv1.ExtenderBindingResult
@@ -749,35 +943,131 @@ func (s *Scheduler) Filter(args extenderv1.ExtenderArgs) (*extenderv1.ExtenderFi
 	if annos == nil {
 		annos = make(map[string]string)
 	}
-	appName := args.Pod.Labels[util.AppNameLabelKey]
-	hasBindings := false
-	if appName != "" {
-		bindings, err := s.ListGPUBindings()
-		if err != nil {
-			klog.ErrorS(err, "Failed to list GPUBindings for Filter", "pod", klog.KObj(args.Pod))
-			s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
-			return nil, err
-		}
-		var uuids []string
-		for _, b := range bindings {
-			if b.Spec.AppName != appName {
-				continue
+	appName := ""
+	if args.Pod.Labels != nil {
+		appName = args.Pod.Labels[util.AppNameLabelKey]
+	}
+	if appName == "" {
+		err := fmt.Errorf("cannot schedule pod without %s label", util.AppNameLabelKey)
+		s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
+		failedNodes := make(map[string]string)
+		if args.NodeNames != nil {
+			for _, nodeName := range *args.NodeNames {
+				failedNodes[nodeName] = "pod has no owner application"
 			}
-			if !b.MatchPod(args.Pod) {
-				continue
+		}
+		return &extenderv1.ExtenderFilterResult{
+			FailedNodes: failedNodes,
+		}, nil
+	}
+
+	bindings, err := s.ListGPUBindings()
+	if err != nil {
+		klog.ErrorS(err, "Failed to list GPUBindings for Filter", "pod", klog.KObj(args.Pod))
+		s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
+		return nil, err
+	}
+
+	appBoundByUUID := make(map[string]*v1alpha1.GPUBinding)
+	appBoundUUIDs := make(map[string]struct{})
+	matchedBindings := make([]*v1alpha1.GPUBinding, 0)
+	for _, b := range bindings {
+		if b.Spec.AppName != appName || b.Spec.UUID == "" {
+			continue
+		}
+		appBoundUUIDs[b.Spec.UUID] = struct{}{}
+		if _, ok := appBoundByUUID[b.Spec.UUID]; !ok {
+			appBoundByUUID[b.Spec.UUID] = b
+		}
+		// todo: maybe we can remove this check, because the pod selector currently only matches the app name
+		if !b.MatchPod(args.Pod) {
+			continue
+		}
+		matchedBindings = append(matchedBindings, b)
+		// todo: currently this will conflict if the pod has multiple containers, or requires multiple GPUs with different memory requests in bindings
+		if b.Spec.Memory != nil {
+			annos[fmt.Sprintf(nvidia.AppGPUMemAnnotationTpl, b.Spec.UUID)] = b.Spec.Memory.String()
+		}
+	}
+
+	policyMode := ""
+	if args.Pod.Labels != nil {
+		policyMode = args.Pod.Labels[nvidia.AppPodGPUConsumePolicyKey]
+	}
+	consumedByApp := s.collectConsumedGPUUUIDsByApp(appName, args.Pod)
+	if policyMode == "" || policyMode == nvidia.AppPodGPUConsumePolicyAll {
+		if len(matchedBindings) > 0 {
+			for _, b := range matchedBindings {
+				if _, occupied := consumedByApp[b.Spec.UUID]; occupied {
+					err := fmt.Errorf("bound GPU %s of app %s is already consumed by another pod", b.Spec.UUID, appName)
+					s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
+					return &extenderv1.ExtenderFilterResult{
+						FailedNodes: map[string]string{},
+					}, nil
+				}
 			}
-			hasBindings = true
-			uuids = append(uuids, b.Spec.UUID)
-			if b.Spec.Memory != nil {
-				annos[fmt.Sprintf(nvidia.AppGPUMemAnnotationTpl, b.Spec.UUID)] = b.Spec.Memory.String()
+			for ctrIdx := range resourceReqs {
+				for reqIdx, req := range resourceReqs[ctrIdx] {
+					if req.Type != nvidia.NvidiaGPUDevice || req.Nums <= 0 {
+						continue
+					}
+					// this assumes only one container in the pod has a gpu request
+					req.Nums = int32(len(matchedBindings))
+					resourceReqs[ctrIdx][reqIdx] = req
+				}
 			}
 		}
-		if len(uuids) > 0 {
-			annos[nvidia.GPUUseUUID] = strings.Join(uuids, ",")
-		} else {
-			// Ensure the hint is empty if nothing matches
-			annos[nvidia.GPUUseUUID] = ""
+	}
+
+	nvidiaSummary := summarizeNVIDIARequests(resourceReqs)
+	selectedUUIDs := make([]string, 0)
+	selectedUUIDSet := make(map[string]struct{})
+	appendSelectedUUID := func(uuid string) {
+		if uuid == "" {
+			return
+		}
+		if _, ok := selectedUUIDSet[uuid]; ok {
+			return
 		}
+		selectedUUIDSet[uuid] = struct{}{}
+		selectedUUIDs = append(selectedUUIDs, uuid)
+	}
+
+	for _, b := range matchedBindings {
+		if _, occupied := consumedByApp[b.Spec.UUID]; occupied {
+			continue
+		}
+		appendSelectedUUID(b.Spec.UUID)
+	}
+
+	if nvidiaSummary.requested > 0 && len(selectedUUIDs) < nvidiaSummary.requested {
+		dynamicCandidates, err := s.selectDynamicGPUCandidates(
+			appBoundUUIDs,
+			selectedUUIDSet,
+			consumedByApp,
+			bindings,
+			nvidiaSummary.requested-len(selectedUUIDs),
+			nvidiaSummary,
+		)
+		if err != nil {
+			s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
+			return nil, err
+		}
+		for _, uuid := range dynamicCandidates {
+			appendSelectedUUID(uuid)
+		}
+	}
+	if nvidiaSummary.requested > 0 && len(selectedUUIDs) < nvidiaSummary.requested {
+		err := fmt.Errorf("insufficient GPU candidates for app %s, requested=%d, available=%d", appName, nvidiaSummary.requested, len(selectedUUIDs))
+		s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
+		return &extenderv1.ExtenderFilterResult{
+			FailedNodes: map[string]string{},
+		}, nil
+	}
+	if len(selectedUUIDs) > 0 {
+		annos[nvidia.GPUUseUUID] = strings.Join(selectedUUIDs, ",")
+	} else {
+		annos[nvidia.GPUUseUUID] = ""
 	}
 	s.delPod(args.Pod)
 	nodeUsage, failedNodes, err := s.getNodesUsage(args.NodeNames, args.Pod)
@@ -808,49 +1098,82 @@ func (s *Scheduler) Filter(args extenderv1.ExtenderArgs) (*extenderv1.ExtenderFi
 	m := (*nodeScores).NodeList[len((*nodeScores).NodeList)-1]
 
 	devlist, ok := m.Devices[nvidia.NvidiaGPUDevice]
-	if ok && len(devlist) > 0 && !hasBindings {
-		appName := args.Pod.Labels[util.AppNameLabelKey]
-		if appName == "" {
-			klog.V(4).InfoS("Cannot find the owner Application to create GPUBinding automatically",
-				"pod", args.Pod.Name)
-			err := errors.New("Cannot find the owner Application to create GPUBinding automatically")
+	if ok && len(devlist) > 0 {
+		nodeInfo, err := s.GetNode(m.NodeID)
+		if err != nil {
 			s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
 			return nil, err
 		}
-		var uuid string
+		shareModeByUUID := make(map[string]string)
+		for _, d := range nodeInfo.Devices {
+			shareModeByUUID[d.ID] = d.ShareMode
+		}
+
+		allocatedUUIDs := make(map[string]struct{})
 		for _, cdev := range devlist {
 			for _, dev := range cdev {
-				if dev.ShareMode == util.ShareModeTimeSlicing && dev.UUID != "" {
-					uuid = dev.UUID
+				uuid := normalizeGPUUUID(dev.UUID)
+				if uuid == "" {
+					continue
 				}
+				allocatedUUIDs[uuid] = struct{}{}
 			}
 		}
-		if uuid == "" {
-			klog.V(4).InfoS("Cannot find a GPU UUID to create GPUBinding automatically",
-				"pod", args.Pod.Name)
-			err := errors.New("Cannot find a GPU UUID to create GPUBinding automatically")
-			s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
-			return nil, err
+		bindingAllocatedMemory := make(map[string]int64)
+		for _, b := range bindings {
+			if b.Spec.UUID == "" || b.Spec.Memory == nil {
+				continue
+			}
+			bindingAllocatedMemory[b.Spec.UUID] += b.Spec.Memory.Value()
+		}
+		deviceTotalMemByUUID := make(map[string]int64)
+		for _, d := range nodeInfo.Devices {
+			deviceTotalMemByUUID[d.ID] = int64(d.Devmem)
 		}
-		autoBinding := &v1alpha1.GPUBinding{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: strings.ToLower(fmt.Sprintf("%s-%s-%d", appName, uuid, time.Now().Unix())),
-			},
-			Spec: v1alpha1.GPUBindingSpec{
-				UUID:    uuid,
-				AppName: appName,
-				PodSelector: &metav1.LabelSelector{
-					MatchLabels: map[string]string{
-						util.AppNameLabelKey: appName,
+		for uuid := range allocatedUUIDs {
+			if _, exists := appBoundByUUID[uuid]; exists {
+				continue
+			}
+			autoBinding := &v1alpha1.GPUBinding{
+				ObjectMeta: metav1.ObjectMeta{
+					Name: strings.ToLower(fmt.Sprintf("%s-%s-%d", appName, uuid, time.Now().Unix())),
+				},
+				Spec: v1alpha1.GPUBindingSpec{
+					UUID:    uuid,
+					AppName: appName,
+					PodSelector: &metav1.LabelSelector{
+						MatchLabels: map[string]string{
+							util.AppNameLabelKey: appName,
+						},
 					},
 				},
-			},
-		}
-		err := s.CreateGPUBinding(context.Background(), autoBinding)
-		if err != nil {
-			klog.ErrorS(err, "Failed to create GPUBinding automatically", "pod", args.Pod.Name)
-			s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
-			return nil, err
+			}
+			if shareModeByUUID[uuid] == util.ShareModeMemSlicing {
+				totalMem := deviceTotalMemByUUID[uuid]
+				requiredMem := requiredNvidiaMemoryBytes(nvidiaSummary, totalMem)
+				if requiredMem <= 0 {
+					err := fmt.Errorf("invalid mem-slicing GPU memory request for binding on %s: request=%d", uuid, requiredMem)
+					klog.ErrorS(err, "Failed to create GPUBinding automatically", "pod", args.Pod.Name, "uuid", uuid)
+					s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
+					return nil, err
+				}
+				if totalMem > 0 && bindingAllocatedMemory[uuid]+requiredMem > totalMem {
+					err := fmt.Errorf("insufficient mem-slicing GPU memory for binding on %s: allocated=%d, request=%d, total=%d", uuid, bindingAllocatedMemory[uuid], requiredMem, totalMem)
+					klog.ErrorS(err, "Failed to create GPUBinding automatically", "pod", args.Pod.Name, "uuid", uuid)
+					s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
+					return nil, err
+				}
+				memQ := resource.NewQuantity(requiredMem, resource.BinarySI)
+				autoBinding.Spec.Memory = memQ
+				bindingAllocatedMemory[uuid] += requiredMem
+			}
+			err := s.CreateGPUBinding(context.Background(), autoBinding)
+			if err != nil {
+				klog.ErrorS(err, "Failed to create GPUBinding automatically", "pod", args.Pod.Name, "uuid", uuid)
+				s.recordScheduleFilterResultEvent(args.Pod, EventReasonFilteringFailed, "", err)
+				return nil, err
+			}
+			appBoundByUUID[uuid] = autoBinding
 		}
 	}
 
diff --git a/pkg/scheduler/score.go b/pkg/scheduler/score.go
index 8843b2d8e..7fa3b1907 100644
--- a/pkg/scheduler/score.go
+++ b/pkg/scheduler/score.go
@@ -25,7 +25,6 @@ import (
 	"k8s.io/klog/v2"
 
 	"github.com/Project-HAMi/HAMi/pkg/device"
-	"github.com/Project-HAMi/HAMi/pkg/device/nvidia"
 	"github.com/Project-HAMi/HAMi/pkg/scheduler/config"
 	"github.com/Project-HAMi/HAMi/pkg/scheduler/policy"
 	"github.com/Project-HAMi/HAMi/pkg/util"
@@ -119,84 +118,6 @@ func fitInDevices(node *NodeUsage, requests util.ContainerDeviceRequests, annos
 }
 
 func (s *Scheduler) calcScore(nodes *map[string]*NodeUsage, resourceReqs util.PodDeviceRequests, annos map[string]string, task *corev1.Pod, failedNodes map[string]string) (*policy.NodeScoreList, error) {
-	appName := ""
-	if task.Labels != nil {
-		appName = task.Labels[util.AppNameLabelKey]
-	}
-
-	if appName != "" {
-		policyMode := task.Labels[nvidia.AppPodGPUConsumePolicyKey]
-		boundCSV := annos[nvidia.GPUUseUUID]
-		boundSet := map[string]struct{}{}
-		if boundCSV != "" {
-			for _, u := range strings.Split(boundCSV, ",") {
-				u = strings.TrimSpace(u)
-				if u != "" {
-					boundSet[u] = struct{}{}
-				}
-			}
-		}
-		// compute consumed UUIDs by live pods of the same app (in scheduler memory)
-		consumed := map[string]struct{}{}
-		for _, p := range s.ListPodsInfo() {
-			if p.Labels == nil || p.Labels[util.AppNameLabelKey] != appName {
-				continue
-			}
-			for _, pdev := range p.Devices {
-				for _, cdevs := range pdev {
-					for _, cdev := range cdevs {
-						uuid := cdev.UUID
-						if strings.Contains(uuid, "[") {
-							uuid = strings.Split(uuid, "[")[0]
-						}
-						if uuid != "" {
-							consumed[uuid] = struct{}{}
-						}
-					}
-				}
-			}
-		}
-
-		if policyMode == "" || policyMode == nvidia.AppPodGPUConsumePolicyAll {
-			if len(consumed) > 0 {
-				empty := policy.NodeScoreList{Policy: config.NodeSchedulerPolicy, NodeList: []*policy.NodeScore{}}
-				return &empty, nil
-			}
-			if len(boundSet) > 0 {
-				for ctrIdx := range resourceReqs {
-					for key, req := range resourceReqs[ctrIdx] {
-						if req.Type == nvidia.NvidiaGPUDevice {
-							req.Nums = int32(len(boundSet))
-							resourceReqs[ctrIdx][key] = req
-						}
-					}
-				}
-			}
-		} else if policyMode == nvidia.AppPodGPUConsumePolicySingle {
-			// allow only unconsumed bound UUIDs by filter out those already consumed by other pods of the same app
-			if len(boundSet) > 0 {
-				pruned := make([]string, 0, len(boundSet))
-				for u := range boundSet {
-					if _, ok := consumed[u]; !ok {
-						pruned = append(pruned, u)
-					}
-				}
-				if len(pruned) == 0 {
-					empty := policy.NodeScoreList{Policy: config.NodeSchedulerPolicy, NodeList: []*policy.NodeScore{}}
-					return &empty, nil
-				}
-				annos[nvidia.GPUUseUUID] = strings.Join(pruned, ",")
-			}
-			for ctrIdx := range resourceReqs {
-				for key, req := range resourceReqs[ctrIdx] {
-					if req.Type == nvidia.NvidiaGPUDevice && req.Nums != 1 {
-						req.Nums = 1
-						resourceReqs[ctrIdx][key] = req
-					}
-				}
-			}
-		}
-	}
 	userNodePolicy := config.NodeSchedulerPolicy
 	if annos != nil {
 		if value, ok := annos[policy.NodeSchedulerPolicyAnnotationKey]; ok {
diff --git a/pkg/util/compatible.go b/pkg/util/compatible.go
new file mode 100644
index 000000000..8aa93636c
--- /dev/null
+++ b/pkg/util/compatible.go
@@ -0,0 +1,64 @@
+package util
+
+import (
+	"regexp"
+	"strings"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+)
+
+// GetCompatibleNVMLMemoryInfo wraps nvml.Device.GetMemoryInfo().
+//
+// Some environments/drivers return nvml.ERROR_NOT_SUPPORTED for GetMemoryInfo.
+// In that case, we fall back to GetName() and derive total memory by matching
+// the device name against a (currently hardcoded) mapping table.
+//
+// Fallback behavior:
+//   - Total: derived from the mapping table (bytes)
+//   - Free:  0
+//   - Used:  Total
+//
+// NOTE: The mapping table is intentionally hardcoded for now; later it can be
+// moved to configuration.
+func GetCompatibleNVMLMemoryInfo(dev nvml.Device) (nvml.Memory, nvml.Return) {
+	mem, ret := dev.GetMemoryInfo()
+	if ret == nvml.SUCCESS || ret != nvml.ERROR_NOT_SUPPORTED {
+		return mem, ret
+	}
+
+	name, nret := dev.GetName()
+	if nret != nvml.SUCCESS {
+		return mem, nret
+	}
+
+	config, ok := GetCompatibleConfigsByDeviceName(name)
+	if !ok {
+		return mem, ret
+	}
+	return nvml.Memory{
+		Total: config.TotalMemory,
+		Free:  0,
+		Used:  config.TotalMemory,
+	}, nvml.SUCCESS
+}
+
+type DeviceCompatibleConfigPattern struct {
+	Pattern           *regexp.Regexp
+	TotalMemory       uint64 // bytes
+	DefaultShareMode  string
+	AllowedShareModes []string
+}
+
+var compatibleConfigPatterns = []DeviceCompatibleConfigPattern{
+	{Pattern: regexp.MustCompile(`^NVIDIA\s+GB10$`), TotalMemory: 96 * 1024 * 1024 * 1024, DefaultShareMode: ShareModeMemSlicing, AllowedShareModes: []string{ShareModeMemSlicing, ShareModeExclusive}},
+}
+
+func GetCompatibleConfigsByDeviceName(name string) (DeviceCompatibleConfigPattern, bool) {
+	n := strings.TrimSpace(name)
+	for _, rule := range compatibleConfigPatterns {
+		if rule.Pattern.MatchString(n) {
+			return rule, true
+		}
+	}
+	return DeviceCompatibleConfigPattern{}, false
+}
diff --git a/pkg/util/types.go b/pkg/util/types.go
index 1af3c39dc..56f2b749c 100644
--- a/pkg/util/types.go
+++ b/pkg/util/types.go
@@ -55,7 +55,8 @@ const (
 )
 
 var (
-	DebugMode bool
+	DefaultAllowedShareModes = []string{ShareModeTimeSlicing, ShareModeMemSlicing, ShareModeExclusive}
+	DebugMode                bool
 
 	NodeName          string
 	RuntimeSocketFlag string
@@ -69,7 +70,7 @@ type ContainerDevice struct {
 	Usedmem    int32
 	Usedcores  int32
 	CustomInfo map[string]any
-	ShareMode string
+	ShareMode  string
 }
 
 type ContainerDeviceRequest struct {
@@ -134,20 +135,20 @@ type DeviceUsage struct {
 }
 
 type DeviceInfo struct {
-	ID           string         `json:"id,omitempty"`
-	Index        uint           `json:"index,omitempty"`
-	Count        int32          `json:"count,omitempty"`
-	Devmem       int32          `json:"devmem,omitempty"`
-	Devcore      int32          `json:"devcore,omitempty"`
-	Type         string         `json:"type,omitempty"`
-	Numa         int            `json:"numa,omitempty"`
-	Mode         string         `json:"mode,omitempty"`
-	Architecture int32          `json:"architecture,omitempty"`
-	MIGTemplate  []Geometry     `json:"migtemplate,omitempty"`
-	Health       bool           `json:"health,omitempty"`
-	DeviceVendor string         `json:"devicevendor,omitempty"`
-	CustomInfo   map[string]any `json:"custominfo,omitempty"`
-	ShareMode    string     `json:"sharemode,omitempty"`
+	ID              string          `json:"id,omitempty"`
+	Index           uint            `json:"index,omitempty"`
+	Count           int32           `json:"count,omitempty"`
+	Devmem          int32           `json:"devmem,omitempty"`
+	Devcore         int32           `json:"devcore,omitempty"`
+	Type            string          `json:"type,omitempty"`
+	Numa            int             `json:"numa,omitempty"`
+	Mode            string          `json:"mode,omitempty"`
+	Architecture    int32           `json:"architecture,omitempty"`
+	MIGTemplate     []Geometry      `json:"migtemplate,omitempty"`
+	Health          bool            `json:"health,omitempty"`
+	DeviceVendor    string          `json:"devicevendor,omitempty"`
+	CustomInfo      map[string]any  `json:"custominfo,omitempty"`
+	ShareMode       string          `json:"sharemode,omitempty"`
 	DevicePairScore DevicePairScore `json:"devicepairscore,omitempty"`
 }