diff --git a/internal/rm/allocate.go b/internal/rm/allocate.go index 166b68e84..8f994c8cf 100644 --- a/internal/rm/allocate.go +++ b/internal/rm/allocate.go @@ -51,13 +51,21 @@ func (r *resourceManager) distributedAlloc(available, required []string, size in replicas[id].total++ } + // Track how many slots have already been picked from each physical device + // during this allocation. Used as the tie-break sort key below so the + // allocator rotates to a sibling physical device when the underlying + // "used" counts would otherwise tie. + pickedFrom := make(map[string]int) + // Grab the set of 'needed' devices one-by-one from the candidates list. // Before selecting each candidate, first sort the candidate list using the // replicas map above. After sorting, the first element in the list will // contain the device with the least difference between total and available - // replications (based on what's already been allocated). Add this device - // to the list of devices to allocate, remove it from the candidate list, - // down its available count in the replicas map, and repeat. + // replications (based on what's already been allocated). When two devices + // tie on that count, prefer the physical device we have not touched (or + // have touched the least) during this allocation. Add this device to the + // list of devices to allocate, remove it from the candidate list, down + // its available count in the replicas map, and repeat. var devices []string for i := 0; i < needed; i++ { sort.Slice(candidates, func(i, j int) bool { @@ -65,9 +73,13 @@ func (r *resourceManager) distributedAlloc(available, required []string, size in jid := AnnotatedID(candidates[j]).GetID() idiff := replicas[iid].total - replicas[iid].available jdiff := replicas[jid].total - replicas[jid].available - return idiff < jdiff + if idiff != jdiff { + return idiff < jdiff + } + return pickedFrom[iid] < pickedFrom[jid] }) id := AnnotatedID(candidates[0]).GetID() + pickedFrom[id]++ replicas[id].available-- devices = append(devices, candidates[0]) candidates = candidates[1:] diff --git a/internal/rm/allocate_test.go b/internal/rm/allocate_test.go new file mode 100644 index 000000000..f30a6250d --- /dev/null +++ b/internal/rm/allocate_test.go @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package rm + +import ( + "testing" + + "github.com/stretchr/testify/require" + pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" +) + +func makeReplicatedDevices(t *testing.T, gpuToReplicas map[string]int) Devices { + t.Helper() + ds := make(Devices) + for gpu, n := range gpuToReplicas { + for i := 0; i < n; i++ { + annotated := string(NewAnnotatedID(gpu, i)) + ds[annotated] = &Device{ + Device: pluginapi.Device{ID: annotated}, + Index: gpu, + Replicas: n, + } + } + } + return ds +} + +func countPerGPU(annotatedIDs []string) map[string]int { + counts := make(map[string]int) + for _, id := range annotatedIDs { + counts[AnnotatedID(id).GetID()]++ + } + return counts +} + +func TestDistributedAlloc_PartiallyAllocated_DistributesAcrossDistinctGPUs(t *testing.T) { + devices := makeReplicatedDevices(t, map[string]int{ + "GPU-0": 2, + "GPU-1": 2, + }) + r := &resourceManager{devices: devices} + + available := []string{ + "GPU-0::0", "GPU-0::1", + "GPU-1::1", + } + + allocated, err := r.distributedAlloc(available, nil, 2) + require.NoError(t, err) + require.Len(t, allocated, 2) + + counts := countPerGPU(allocated) + require.Equalf(t, 1, counts["GPU-0"], + "expected 1 slot from GPU-0 to keep allocations distributed across physical GPUs; got: %v", + counts) + require.Equalf(t, 1, counts["GPU-1"], + "expected 1 slot from GPU-1 (the still-available second physical GPU) instead of stacking both on GPU-0; got: %v", + counts) +} +