From 791a3bb20d2ee91103b813c749d3d2f422e7522e Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Fri, 11 Jul 2025 17:03:23 +0530 Subject: [PATCH 1/3] verify Guranteed pod is running after kubelet restart This PR addresses issue where we are verifying if the cpu manager state file is same after kubelet restart while we are verifying the above, we are not checking if Guranteed pod started before kubelet restart is also still running. Refer: https://issues.redhat.com/browse/OCPBUGS-43280 Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 32dcbdb443..4afe555c25 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -346,6 +346,11 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { cpuManagerCpusetBeforeRestart, err := nodes.CpuManagerCpuSet(ctx, workerRTNode) Expect(err).ToNot(HaveOccurred()) testlog.Infof("pre kubelet restart default cpuset: %v", cpuManagerCpusetBeforeRestart.String()) + + By("capturing test pod state before restart") + originalPodUID := testpod.UID + testlog.Infof("pre kubelet restart pod UID: %v", originalPodUID) + kubeletRestartCmd := []string{ "chroot", "/rootfs", @@ -362,6 +367,34 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { testlog.Infof("post restart: finished cooldown time: %v", restartCooldownTime) + By("verify test pod comes back after kubelet restart") + Eventually(func() error { + updatedPod := &corev1.Pod{} + err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), updatedPod) + if err != nil { + return fmt.Errorf("failed to get pod after restart: %v", err) + } + + // Verify it's the same pod (same UID) + if updatedPod.UID != originalPodUID { + return fmt.Errorf("pod UID changed after restart: original=%v, current=%v", originalPodUID, updatedPod.UID) + } + + // Verify pod is ready + if updatedPod.Status.Phase != corev1.PodRunning { + return fmt.Errorf("pod is not running after restart: phase=%v", updatedPod.Status.Phase) + } + // Check pod ready condition + for _, condition := range updatedPod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + testlog.Infof("post kubelet restart pod is ready with UID: %v", updatedPod.UID) + return nil + } + } + + return fmt.Errorf("pod ready condition not found or not true") + }).WithTimeout(5*time.Minute).WithPolling(10*time.Second).Should(Succeed(), "test pod should come back after kubelet restart") + By("fetch Default cpuset from cpu manager state after restart") cpuManagerCpusetAfterRestart, err := nodes.CpuManagerCpuSet(ctx, workerRTNode) Expect(err).ToNot(HaveOccurred()) From 3f02e16ee0d75404ac903d9a0fd113de4d88d908 Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Tue, 15 Jul 2025 17:09:24 +0530 Subject: [PATCH 2/3] avoid using indirect references and test negative pod status condition Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 4afe555c25..919ad043ce 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -369,8 +369,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { By("verify test pod comes back after kubelet restart") Eventually(func() error { - updatedPod := &corev1.Pod{} - err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), updatedPod) + var updatedPod corev1.Pod + err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), &updatedPod) if err != nil { return fmt.Errorf("failed to get pod after restart: %v", err) } @@ -386,13 +386,11 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { } // Check pod ready condition for _, condition := range updatedPod.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - testlog.Infof("post kubelet restart pod is ready with UID: %v", updatedPod.UID) - return nil + if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { + return fmt.Errorf("Pod ondition is not in Ready state after kubelet restart: condition: %v", updatedPod.Status.Conditions) } } - - return fmt.Errorf("pod ready condition not found or not true") + return nil }).WithTimeout(5*time.Minute).WithPolling(10*time.Second).Should(Succeed(), "test pod should come back after kubelet restart") By("fetch Default cpuset from cpu manager state after restart") From 2a1974faa242536afe5f8cff7b0b9499c82df49d Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Tue, 15 Jul 2025 18:09:59 +0530 Subject: [PATCH 3/3] typo fix and add more debug when pod fails Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 919ad043ce..9275c93f05 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -387,7 +387,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { // Check pod ready condition for _, condition := range updatedPod.Status.Conditions { if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { - return fmt.Errorf("Pod ondition is not in Ready state after kubelet restart: condition: %v", updatedPod.Status.Conditions) + return fmt.Errorf("Pod condition is not in Ready state after kubelet restart: reason: %v, message: %v", condition.Reason, condition.Message) + } } return nil