From a0813400e85de81e6437f1e148830fecf04e824e Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Fri, 11 Jul 2025 17:03:23 +0530 Subject: [PATCH 1/3] verify Guranteed pod is running after kubelet restart This PR addresses issue where we are verifying if the cpu manager state file is same after kubelet restart while we are verifying the above, we are not checking if Guranteed pod started before kubelet restart is also still running. Refer: https://issues.redhat.com/browse/OCPBUGS-43280 Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 7f9253be57..73777a3a23 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -346,6 +346,11 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { cpuManagerCpusetBeforeRestart, err := nodes.CpuManagerCpuSet(ctx, workerRTNode) Expect(err).ToNot(HaveOccurred()) testlog.Infof("pre kubelet restart default cpuset: %v", cpuManagerCpusetBeforeRestart.String()) + + By("capturing test pod state before restart") + originalPodUID := testpod.UID + testlog.Infof("pre kubelet restart pod UID: %v", originalPodUID) + kubeletRestartCmd := []string{ "chroot", "/rootfs", @@ -362,6 +367,34 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { testlog.Infof("post restart: finished cooldown time: %v", restartCooldownTime) + By("verify test pod comes back after kubelet restart") + Eventually(func() error { + updatedPod := &corev1.Pod{} + err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), updatedPod) + if err != nil { + return fmt.Errorf("failed to get pod after restart: %v", err) + } + + // Verify it's the same pod (same UID) + if updatedPod.UID != originalPodUID { + return fmt.Errorf("pod UID changed after restart: original=%v, current=%v", originalPodUID, updatedPod.UID) + } + + // Verify pod is ready + if updatedPod.Status.Phase != corev1.PodRunning { + return fmt.Errorf("pod is not running after restart: phase=%v", updatedPod.Status.Phase) + } + // Check pod ready condition + for _, condition := range updatedPod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + testlog.Infof("post kubelet restart pod is ready with UID: %v", updatedPod.UID) + return nil + } + } + + return fmt.Errorf("pod ready condition not found or not true") + }).WithTimeout(5*time.Minute).WithPolling(10*time.Second).Should(Succeed(), "test pod should come back after kubelet restart") + By("fetch Default cpuset from cpu manager state after restart") cpuManagerCpusetAfterRestart, err := nodes.CpuManagerCpuSet(ctx, workerRTNode) Expect(err).ToNot(HaveOccurred()) From dada06acf425995a8a9657a073bec48b3442c6d9 Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Tue, 15 Jul 2025 17:09:24 +0530 Subject: [PATCH 2/3] avoid using indirect references and test negative pod status condition Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 73777a3a23..7e7648cc5f 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -369,8 +369,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { By("verify test pod comes back after kubelet restart") Eventually(func() error { - updatedPod := &corev1.Pod{} - err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), updatedPod) + var updatedPod corev1.Pod + err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), &updatedPod) if err != nil { return fmt.Errorf("failed to get pod after restart: %v", err) } @@ -386,13 +386,11 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { } // Check pod ready condition for _, condition := range updatedPod.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - testlog.Infof("post kubelet restart pod is ready with UID: %v", updatedPod.UID) - return nil + if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { + return fmt.Errorf("Pod ondition is not in Ready state after kubelet restart: condition: %v", updatedPod.Status.Conditions) } } - - return fmt.Errorf("pod ready condition not found or not true") + return nil }).WithTimeout(5*time.Minute).WithPolling(10*time.Second).Should(Succeed(), "test pod should come back after kubelet restart") By("fetch Default cpuset from cpu manager state after restart") From db1eddff00d86ec4128b2309143a879fba9cb77e Mon Sep 17 00:00:00 2001 From: "Niranjan M.R" Date: Tue, 15 Jul 2025 18:09:59 +0530 Subject: [PATCH 3/3] typo fix and add more debug when pod fails Signed-off-by: Niranjan M.R --- .../functests/1_performance/cpu_management.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 7e7648cc5f..86618ce1e0 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -387,7 +387,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { // Check pod ready condition for _, condition := range updatedPod.Status.Conditions { if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { - return fmt.Errorf("Pod ondition is not in Ready state after kubelet restart: condition: %v", updatedPod.Status.Conditions) + return fmt.Errorf("Pod condition is not in Ready state after kubelet restart: reason: %v, message: %v", condition.Reason, condition.Message) + } } return nil