diff --git a/pkg/device/pod_test.go b/pkg/device/pod_test.go index e6072531b..fd267042a 100644 --- a/pkg/device/pod_test.go +++ b/pkg/device/pod_test.go @@ -542,3 +542,21 @@ func TestContainerDeviceDeepCopy(t *testing.T) { _, exists := original.CustomInfo["key2"] assert.False(t, exists, "original CustomInfo should not have key2") } + +func TestTakeAndDeletePodIsAtomic(t *testing.T) { + pm := NewPodManager() + pod := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{ + Name: "p", + Namespace: "ns", + UID: k8stypes.UID("uid-1"), + }} + pm.AddPod(pod, "node-1", PodDevices{}) + + pi1, ok1 := pm.TakeAndDeletePod(pod) + pi2, ok2 := pm.TakeAndDeletePod(pod) + + assert.True(t, ok1) + assert.NotNil(t, pi1) + assert.False(t, ok2) + assert.Nil(t, pi2) +} diff --git a/pkg/device/pods.go b/pkg/device/pods.go index 5b0d916ad..431ea9f4f 100644 --- a/pkg/device/pods.go +++ b/pkg/device/pods.go @@ -119,6 +119,18 @@ func (m *PodManager) GetPod(pod *corev1.Pod) (*PodInfo, bool) { return pi, ok } +func (m *PodManager) TakeAndDeletePod(pod *corev1.Pod) (*PodInfo, bool) { + m.mutex.Lock() + defer m.mutex.Unlock() + + pi, ok := m.pods[pod.UID] + if ok { + delete(m.pods, pod.UID) + klog.InfoS("Pod taken and deleted", "pod", klog.KRef(pod.Namespace, pod.Name), "nodeID", pi.NodeID) + } + return pi, ok +} + func (m *PodManager) ListPodsUID() ([]*corev1.Pod, error) { m.mutex.RLock() defer m.mutex.RUnlock() diff --git a/pkg/device/quota.go b/pkg/device/quota.go index 1a01fe0c9..288a03f71 100644 --- a/pkg/device/quota.go +++ b/pkg/device/quota.go @@ -155,9 +155,8 @@ func (q *QuotaManager) RmUsage(pod *corev1.Pod, podDev PodDevices) { return } for idx, val := range usage { - _, ok = (*dp)[idx] - if ok { - (*dp)[idx].Used -= val + if qInfo, ok := (*dp)[idx]; ok && qInfo != nil { + qInfo.Used -= val } } if klog.V(4).Enabled() { diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index f68f2f1c8..a2c999b63 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -145,11 +145,9 @@ func (s *Scheduler) onAddPod(obj any) { return } if util.IsPodInTerminatedState(pod) { - pi, ok := s.podManager.GetPod(pod) - if ok { + if pi, ok := s.podManager.TakeAndDeletePod(pod); ok { s.quotaManager.RmUsage(pod, pi.Devices) } - s.podManager.DelPod(pod) return } if util.IsPodTerminating(pod) { @@ -195,10 +193,8 @@ func (s *Scheduler) onDelPod(obj any) { if !ok { return } - pi, ok := s.podManager.GetPod(pod) - if ok { + if pi, ok := s.podManager.TakeAndDeletePod(pod); ok { s.quotaManager.RmUsage(pod, pi.Devices) - s.podManager.DelPod(pod) } }