From f76e1381d088f701d90598d50b92a6b263d4f21c Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 24 Jun 2025 15:12:49 +0200 Subject: [PATCH 1/2] e2e: node: fix quota disablement testcases Initially we added minimal quota disablement e2e tests, but since the emergence of https://github.com/kubevirt/kubevirt/issues/14965 it becames clear that is better to have full coverage. This PR restores coverage parity with the old test suite. Signed-off-by: Francesco Romani --- test/e2e_node/cpumanager_test.go | 234 +++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) diff --git a/test/e2e_node/cpumanager_test.go b/test/e2e_node/cpumanager_test.go index 6ab13eee33a..51ec3503a4e 100644 --- a/test/e2e_node/cpumanager_test.go +++ b/test/e2e_node/cpumanager_test.go @@ -1016,6 +1016,30 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, framework.WithSerial(), featu gomega.Expect(pod).To(HaveContainerQuota(ctnName, "max")) }) + ginkgo.It("should disable for guaranteed pod with exclusive CPUs assigned", func(ctx context.Context) { + cpuCount := 4 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + ctnName := "gu-container-cfsquota-disabled" + pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{ + { + ctnName: ctnName, + cpuRequest: "3", + cpuLimit: "3", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("max")) + gomega.Expect(pod).To(HaveContainerQuota(ctnName, "max")) + + gomega.Expect(pod).To(HaveContainerCPUsCount(ctnName, 3)) + gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(ctnName, onlineCPUs)) + gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith(ctnName, reservedCPUs)) + }) + ginkgo.It("should enforce for guaranteed pod", func(ctx context.Context) { cpuCount := 1 // overshoot, minimum request is 1 skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) @@ -1106,6 +1130,216 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, framework.WithSerial(), featu }) }) + ginkgo.When("checking the CFS quota management can be disabled", ginkgo.Label("cfs-quota"), func() { + // NOTE: these tests check only cases on which the quota is set to "max", so we intentionally + // don't duplicate the all the tests + + ginkgo.BeforeEach(func(ctx context.Context) { + // WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels. + // this means on more-than-2-way SMT systems this test will prove nothing + reservedCPUs = cpuset.New(0) + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, + disableCPUQuotaWithExclusiveCPUs: false, + })) + }) + + ginkgo.It("should not disable for guaranteed pod with exclusive CPUs assigned", func(ctx context.Context) { + cpuCount := 1 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + ctnName := "gu-container-cfsquota-disabled" + pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{ + { + ctnName: ctnName, + cpuRequest: "1", + cpuLimit: "1", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("max")) + gomega.Expect(pod).To(HaveContainerQuota(ctnName, "max")) + }) + + ginkgo.It("should not disable for guaranteed pod with exclusive CPUs assigned", func(ctx context.Context) { + cpuCount := 4 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + ctnName := "gu-container-cfsquota-disabled" + pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{ + { + ctnName: ctnName, + cpuRequest: "3", + cpuLimit: "3", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("max")) + gomega.Expect(pod).To(HaveContainerQuota(ctnName, "max")) + + gomega.Expect(pod).To(HaveContainerCPUsCount(ctnName, 3)) + gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(ctnName, onlineCPUs)) + gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith(ctnName, reservedCPUs)) + }) + + ginkgo.It("should enforce for guaranteed pod", func(ctx context.Context) { + cpuCount := 1 // overshoot, minimum request is 1 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + ctnName := "gu-container-cfsquota-enabled" + pod := makeCPUManagerPod("gu-pod-cfs-quota-on", []ctnAttribute{ + { + ctnName: ctnName, + cpuRequest: "500m", + cpuLimit: "500m", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("50000")) + gomega.Expect(pod).To(HaveContainerQuota(ctnName, "50000")) + }) + + ginkgo.It("should not enforce with multiple containers only in the container with exclusive CPUs", func(ctx context.Context) { + cpuCount := 2 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + pod := makeCPUManagerPod("gu-pod-multicontainer-mixed", []ctnAttribute{ + { + ctnName: "gu-container-non-int-values", + cpuRequest: "500m", + cpuLimit: "500m", + }, + { + ctnName: "gu-container-int-values", + cpuRequest: "1", + cpuLimit: "1", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("max")) + gomega.Expect(pod).To(HaveContainerQuota("gu-container-non-int-values", "50000")) + gomega.Expect(pod).To(HaveContainerQuota("gu-container-int-values", "max")) + }) + }) + + ginkgo.When("checking the CFS quota management can be disabled", ginkgo.Label("cfs-quota"), func() { + // NOTE: these tests check only cases on which the quota is set to "max", so we intentionally + // don't duplicate the all the tests + + ginkgo.BeforeEach(func(ctx context.Context) { + // WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels. + // this means on more-than-2-way SMT systems this test will prove nothing + reservedCPUs = cpuset.New(0) + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, + disableCPUQuotaWithExclusiveCPUs: false, + })) + }) + + ginkgo.It("should not disable for guaranteed pod with exclusive CPUs assigned", func(ctx context.Context) { + cpuCount := 1 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + ctnName := "gu-container-cfsquota-disabled" + pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{ + { + ctnName: ctnName, + cpuRequest: "1", + cpuLimit: "1", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("100000")) + gomega.Expect(pod).To(HaveContainerQuota(ctnName, "100000")) + }) + + ginkgo.It("should not disable for guaranteed pod with exclusive CPUs assigned", func(ctx context.Context) { + cpuCount := 4 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + ctnName := "gu-container-cfsquota-disabled" + pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{ + { + ctnName: ctnName, + cpuRequest: "3", + cpuLimit: "3", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("300000")) + gomega.Expect(pod).To(HaveContainerQuota(ctnName, "300000")) + + gomega.Expect(pod).To(HaveContainerCPUsCount(ctnName, 3)) + gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(ctnName, onlineCPUs)) + gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith(ctnName, reservedCPUs)) + }) + + ginkgo.It("should enforce for guaranteed pod", func(ctx context.Context) { + cpuCount := 1 // overshoot, minimum request is 1 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + ctnName := "gu-container-cfsquota-enabled" + pod := makeCPUManagerPod("gu-pod-cfs-quota-on", []ctnAttribute{ + { + ctnName: ctnName, + cpuRequest: "500m", + cpuLimit: "500m", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("50000")) + gomega.Expect(pod).To(HaveContainerQuota(ctnName, "50000")) + }) + + ginkgo.It("should not enforce with multiple containers only in the container with exclusive CPUs", func(ctx context.Context) { + cpuCount := 2 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + pod := makeCPUManagerPod("gu-pod-multicontainer-mixed", []ctnAttribute{ + { + ctnName: "gu-container-non-int-values", + cpuRequest: "500m", + cpuLimit: "500m", + }, + { + ctnName: "gu-container-int-values", + cpuRequest: "1", + cpuLimit: "1", + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + gomega.Expect(pod).To(HaveSandboxQuota("150000")) + gomega.Expect(pod).To(HaveContainerQuota("gu-container-non-int-values", "50000")) + gomega.Expect(pod).To(HaveContainerQuota("gu-container-int-values", "100000")) + }) + }) + f.Context("When checking the sidecar containers", feature.SidecarContainers, func() { ginkgo.BeforeEach(func(ctx context.Context) { reservedCPUs = cpuset.New(0) From 3b0fd328106cc7d13d8cc33f84cff515732cce14 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 24 Jun 2025 15:13:12 +0200 Subject: [PATCH 2/2] e2e: serial: cpumanager: continue on failure The `ginkgo.ContinueOnFailure` decorator serves the usecase of the new cpumanager tests perfectly: https://onsi.github.io/ginkgo/#failure-handling-in-ordered-containers """ You can override this behavior by decorating an Ordered container with ContinueOnFailure. This is useful in cases where Ordered is being used to provide shared expensive set up for a collection of specs. When ContinueOnFailure is set, Ginkgo will continue running specs even if an earlier spec in the Ordered container has failed. """ And this is exactly the case at hand. Previously, without this decorator, subsequent failures were masked, which is dangerous and not what we want. Signed-off-by: Francesco Romani --- test/e2e_node/cpumanager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e_node/cpumanager_test.go b/test/e2e_node/cpumanager_test.go index 51ec3503a4e..d8b96de8e61 100644 --- a/test/e2e_node/cpumanager_test.go +++ b/test/e2e_node/cpumanager_test.go @@ -117,7 +117,7 @@ var ( * A better approach would be check what we do have in the node. This is deferred to a later stage alongside * other improvements. */ -var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, framework.WithSerial(), feature.CPUManager, func() { +var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, framework.WithSerial(), feature.CPUManager, func() { f := framework.NewDefaultFramework("cpumanager-test") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged