Files
kubernetes/test/e2e_node/cpumanager_test.go
Francesco Romani a3a767b37e WIP: fix e2e tests
Signed-off-by: Francesco Romani <fromani@redhat.com>
2025-07-29 20:20:08 +02:00

2613 lines
102 KiB
Go

/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2enode
/*
* this is a rewrite of the cpumanager e2e_node test.
* we will move testcases from cpu_manager_test.go to cpumanager_test.go.
* Full details in the tracking issue: https://github.com/kubernetes/kubernetes/issues/129884
*/
import (
"context"
"fmt"
"os"
"path/filepath"
"reflect"
"regexp"
"strconv"
"strings"
"time"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"github.com/onsi/gomega/gcustom"
"github.com/onsi/gomega/types"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
admissionapi "k8s.io/pod-security-admission/api"
"k8s.io/utils/cpuset"
"k8s.io/kubernetes/test/e2e/feature"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
)
const (
defaultCFSPeriod = "100000"
)
// this is ugly, but pratical
var (
e2enodeRuntimeName string
e2enodeCgroupV2Enabled bool
e2enodeCgroupDriver string
)
/*
- Serial:
because the test updates kubelet configuration.
- Ordered:
Each spec (It block) need to run with a kubelet configuration in place. At minimum, we need
the non-default cpumanager static policy, then we have the cpumanager options and so forth.
The simplest solution is to set the kubelet explicitly each time, but this will cause a kubelet restart
each time, which takes longer and makes the flow intrinsically more fragile (so more flakes are more likely).
Using Ordered allows us to use BeforeAll/AfterAll, and most notably to reuse the kubelet config in a batch
of specs (It blocks). Each it block will still set its kubelet config preconditions, but with a sensible
test arrangement, many of these preconditions will devolve into noop.
Arguably, this decision increases the coupling among specs, leaving room for subtle ordering bugs.
There's no argue the ginkgo spec randomization would help, but the tradeoff here is between
lane complexity/fragility (reconfiguring the kubelet is not bulletproof yet) and accepting this risk.
If in the future we decide to pivot to make each spec fully independent, little changes will be needed.
Finally, worth pointing out that the previous cpumanager e2e test incarnation implemented the same
concept in a more convoluted way with function helpers, so arguably using Ordered and making it
explicit is already an improvement.
*/
/*
* Extending the cpumanager test suite
* TL;DRs: THE MOST IMPORTANT:
* Please keep the test hierarchy very flat.
* Nesting more than 2 contexts total from SIGDescribe root is likely to be a smell.
* The problem with deep nesting is the interaction between BeforeEach blocks and the increased scope of variables.
* The Ideal layout would be
* SIGDescribe # top level, unique
* Context # you can add more context, but please try hard to avoid nesting them
* It # add it blocks freely. Feel free to start new *non-nested* contexts as you see fit
* Context
* It
* It
* Exception: if you need to add the same labels to quite a few (say, 3+) It blocks, you can add a **empty** context
* The most important thing is to avoid long chain of beforeeach/aftereach and > 2 level of context nesting.
* So a **empty** context only to group labels is acceptable:
* SIGDescribe
* Context(label1, label2) # avoid beforeeach/aftereach and variables here
* Context
* It
* It
* Context
* It
* Final rule of thumb: if the nesting of the context description starts to read awkward or funny or stop making sense
* if read as english sentence, then the nesting is likely too deep.
*/
/*
* About reserved CPUs:
* all the tests assume the first available CPUID is 0, which is pretty fair and most of time correct.
* A better approach would be check what we do have in the node. This is deferred to a later stage alongside
* other improvements.
*/
var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, framework.WithSerial(), feature.CPUManager, func() {
f := framework.NewDefaultFramework("cpumanager-test")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
// original kubeletconfig before the context start, to be restored
var oldCfg *kubeletconfig.KubeletConfiguration
var reservedCPUs cpuset.CPUSet
var onlineCPUs cpuset.CPUSet
var smtLevel int
var uncoreGroupSize int
// tracks all the pods created by a It() block. Best would be a namespace per It block
// TODO: move to a namespace per It block?
var podMap map[string]*v1.Pod
// closure just and only to not carry around awkwardly `f` and `onlineCPUs` only for logging purposes
var skipIfAllocatableCPUsLessThan func(node *v1.Node, cpuReq int)
ginkgo.BeforeAll(func(ctx context.Context) {
var err error
oldCfg, err = getCurrentKubeletConfig(ctx)
framework.ExpectNoError(err)
onlineCPUs, err = getOnlineCPUs() // this should not change at all, at least during this suite lifetime
framework.ExpectNoError(err)
framework.Logf("Online CPUs: %s", onlineCPUs)
smtLevel = smtLevelFromSysFS() // this should not change at all, at least during this suite lifetime
framework.Logf("SMT level: %d", smtLevel)
uncoreGroupSize = getUncoreCPUGroupSize()
framework.Logf("Uncore Group Size: %d", uncoreGroupSize)
e2enodeCgroupV2Enabled = IsCgroup2UnifiedMode()
framework.Logf("cgroup V2 enabled: %v", e2enodeCgroupV2Enabled)
e2enodeCgroupDriver = oldCfg.CgroupDriver
framework.Logf("cgroup driver: %s", e2enodeCgroupDriver)
runtime, _, err := getCRIClient()
framework.ExpectNoError(err, "Failed to get CRI client")
version, err := runtime.Version(context.Background(), "")
framework.ExpectNoError(err, "Failed to get runtime version")
e2enodeRuntimeName = version.GetRuntimeName()
framework.Logf("runtime: %s", e2enodeRuntimeName)
})
ginkgo.AfterAll(func(ctx context.Context) {
updateKubeletConfig(ctx, f, oldCfg, true)
})
ginkgo.BeforeEach(func(ctx context.Context) {
// note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis
podMap = make(map[string]*v1.Pod)
})
ginkgo.JustBeforeEach(func(ctx context.Context) {
// note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis
// use a closure to minimize the arguments, to make the usage more straightforward
skipIfAllocatableCPUsLessThan = func(node *v1.Node, val int) {
ginkgo.GinkgoHelper()
cpuReq := int64(val + reservedCPUs.Size()) // reserved CPUs are not usable, need to account them
// the framework is initialized using an injected BeforeEach node, so the
// earliest we can do is to initialize the other objects here
nodeCPUDetails := cpuDetailsFromNode(node)
msg := fmt.Sprintf("%v full CPUs (detected=%v requested=%v reserved=%v online=%v smt=%v)", cpuReq, nodeCPUDetails.Allocatable, val, reservedCPUs.Size(), onlineCPUs.Size(), smtLevel)
ginkgo.By("Checking if allocatable: " + msg)
if nodeCPUDetails.Allocatable < cpuReq {
e2eskipper.Skipf("Skipping CPU Manager test: not allocatable %s", msg)
}
}
})
ginkgo.AfterEach(func(ctx context.Context) {
deletePodsAsync(ctx, f, podMap)
})
ginkgo.When("running non-guaranteed pods tests", ginkgo.Label("non-guaranteed", "reserved-cpus"), func() {
ginkgo.It("should let the container access all the online CPUs without a reserved CPUs set", func(ctx context.Context) {
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: cpuset.CPUSet{},
}))
pod := makeCPUManagerPod("non-gu-pod", []ctnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "100m",
cpuLimit: "200m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
gomega.Expect(pod).To(HaveContainerCPUsEqualTo("non-gu-container", onlineCPUs))
})
ginkgo.It("should let the container access all the online CPUs when using a reserved CPUs set", func(ctx context.Context) {
reservedCPUs = cpuset.New(0)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod := makeCPUManagerPod("non-gu-pod", []ctnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "100m",
cpuLimit: "200m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
gomega.Expect(pod).To(HaveContainerCPUsEqualTo("non-gu-container", onlineCPUs))
})
ginkgo.It("should let the container access all the online non-exclusively-allocated CPUs when using a reserved CPUs set", ginkgo.Label("guaranteed", "exclusive-cpus"), func(ctx context.Context) {
cpuCount := 1
reservedCPUs = cpuset.New(0)
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount+1) // note the extra for the non-gu pod
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
podGu := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount),
cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount),
},
})
ginkgo.By("creating the guaranteed test pod")
podGu = e2epod.NewPodClient(f).CreateSync(ctx, podGu)
podMap[string(podGu.UID)] = podGu
podBu := makeCPUManagerPod("non-gu-pod", []ctnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "200m",
cpuLimit: "300m",
},
})
ginkgo.By("creating the burstable test pod")
podBu = e2epod.NewPodClient(f).CreateSync(ctx, podBu)
podMap[string(podBu.UID)] = podBu
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(podGu).To(HaveContainerCPUsCount("gu-container", cpuCount))
gomega.Expect(podGu).To(HaveContainerCPUsASubsetOf("gu-container", onlineCPUs))
gomega.Expect(podGu).ToNot(HaveContainerCPUsOverlapWith("gu-container", reservedCPUs))
exclusiveCPUs, err := getContainerAllowedCPUs(podGu, "gu-container", false)
framework.ExpectNoError(err, "cannot get exclusive CPUs for pod %s/%s", podGu.Namespace, podGu.Name)
expectedSharedCPUs := onlineCPUs.Difference(exclusiveCPUs)
gomega.Expect(podBu).To(HaveContainerCPUsEqualTo("non-gu-container", expectedSharedCPUs))
})
})
ginkgo.When("running guaranteed pod tests", ginkgo.Label("guaranteed", "exclusive-cpus"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
reservedCPUs = cpuset.New(0)
})
ginkgo.It("should allocate exclusively a CPU to a 1-container pod", func(ctx context.Context) {
cpuCount := 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount),
cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount),
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container", cpuCount))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container", reservedCPUs))
})
// we don't use a separate group (gingo.When) with BeforeEach to factor out the tests because each
// test need to check for the amount of CPUs it needs.
ginkgo.It("should allocate exclusively a even number of CPUs to a 1-container pod", func(ctx context.Context) {
cpuCount := 2
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount),
cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount),
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container", cpuCount))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container", reservedCPUs))
// TODO: this is probably too strict but it is the closest of the old test did
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container"))
})
ginkgo.It("should allocate exclusively a odd number of CPUs to a 1-container pod", func(ctx context.Context) {
cpuCount := 3
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount),
cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount),
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container", cpuCount))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container", reservedCPUs))
// TODO: this is probably too strict but it is the closest of the old test did
toleration := 1
gomega.Expect(pod).To(HaveContainerCPUsQuasiThreadSiblings("gu-container", toleration))
})
ginkgo.It("should allocate exclusively CPUs to a multi-container pod (1+2)", func(ctx context.Context) {
cpuCount := 3 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-1", 1))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
// TODO: this is probably too strict but it is the closest of the old test did
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
ginkgo.It("should allocate exclusively CPUs to a multi-container pod (3+2)", func(ctx context.Context) {
cpuCount := 5 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "3000m",
cpuLimit: "3000m",
},
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-1", 3))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
toleration := 1
gomega.Expect(pod).To(HaveContainerCPUsQuasiThreadSiblings("gu-container-1", toleration))
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
ginkgo.It("should allocate exclusively CPUs to a multi-container pod (4+2)", func(ctx context.Context) {
cpuCount := 6 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "4000m",
cpuLimit: "4000m",
},
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-1", 4))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-1"))
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
ginkgo.It("should allocate exclusively a CPU to multiple 1-container pods", func(ctx context.Context) {
cpuCount := 4 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod1 := makeCPUManagerPod("gu-pod-1", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod 1")
pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1)
podMap[string(pod1.UID)] = pod1
pod2 := makeCPUManagerPod("gu-pod-2", []ctnAttribute{
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod 2")
pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2)
podMap[string(pod2.UID)] = pod2
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod1).To(HaveContainerCPUsCount("gu-container-1", 2))
gomega.Expect(pod1).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod1).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
gomega.Expect(pod1).To(HaveContainerCPUsThreadSiblings("gu-container-1"))
gomega.Expect(pod2).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod2).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod2).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
gomega.Expect(pod2).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
})
ginkgo.When("running guaranteed pod tests with feature gates disabled", ginkgo.Label("guaranteed", "exclusive-cpus", "feature-gate-disabled"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
reservedCPUs = cpuset.New(0)
})
ginkgo.It("should allocate exclusively a CPU to a 1-container pod", func(ctx context.Context) {
cpuCount := 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: false,
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount),
cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount),
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container", cpuCount))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container", reservedCPUs))
})
// we don't use a separate group (gingo.When) with BeforeEach to factor out the tests because each
// test need to check for the amount of CPUs it needs.
ginkgo.It("should allocate exclusively a even number of CPUs to a 1-container pod", func(ctx context.Context) {
cpuCount := 2
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: false,
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount),
cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount),
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container", cpuCount))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container", reservedCPUs))
// TODO: this is probably too strict but it is the closest of the old test did
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container"))
})
ginkgo.It("should allocate exclusively a odd number of CPUs to a 1-container pod", func(ctx context.Context) {
cpuCount := 3
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: false,
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount),
cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount),
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container", cpuCount))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container", reservedCPUs))
// TODO: this is probably too strict but it is the closest of the old test did
toleration := 1
gomega.Expect(pod).To(HaveContainerCPUsQuasiThreadSiblings("gu-container", toleration))
})
ginkgo.It("should allocate exclusively CPUs to a multi-container pod (1+2)", func(ctx context.Context) {
cpuCount := 3 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: false,
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-1", 1))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
// TODO: this is probably too strict but it is the closest of the old test did
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
ginkgo.It("should allocate exclusively CPUs to a multi-container pod (3+2)", func(ctx context.Context) {
cpuCount := 5 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: false,
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "3000m",
cpuLimit: "3000m",
},
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-1", 3))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
toleration := 1
gomega.Expect(pod).To(HaveContainerCPUsQuasiThreadSiblings("gu-container-1", toleration))
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
ginkgo.It("should allocate exclusively CPUs to a multi-container pod (4+2)", func(ctx context.Context) {
cpuCount := 6 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: false,
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "4000m",
cpuLimit: "4000m",
},
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-1", 4))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-1"))
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
ginkgo.It("should allocate exclusively a CPU to multiple 1-container pods", func(ctx context.Context) {
cpuCount := 4 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: false,
}))
pod1 := makeCPUManagerPod("gu-pod-1", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod 1")
pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1)
podMap[string(pod1.UID)] = pod1
pod2 := makeCPUManagerPod("gu-pod-2", []ctnAttribute{
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod 2")
pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2)
podMap[string(pod2.UID)] = pod2
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod1).To(HaveContainerCPUsCount("gu-container-1", 2))
gomega.Expect(pod1).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod1).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
gomega.Expect(pod1).To(HaveContainerCPUsThreadSiblings("gu-container-1"))
gomega.Expect(pod2).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod2).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod2).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
gomega.Expect(pod2).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
})
ginkgo.When("running with strict CPU reservation", ginkgo.Label("strict-cpu-reservation"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
reservedCPUs = cpuset.New(0)
})
ginkgo.It("should let the container access all the online CPUs without a reserved CPUs set", func(ctx context.Context) {
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: cpuset.CPUSet{},
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.StrictCPUReservationOption: "true",
},
}))
pod := makeCPUManagerPod("non-gu-pod", []ctnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "100m",
cpuLimit: "200m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// cpumanager will always reserve at least 1 cpu. In this case we don't set which, and if we treat the cpumanager
// as black box (which we very much should) we can't predict which one. So we can only assert that *A* cpu is not
// usable because is reserved.
gomega.Expect(pod).To(HaveContainerCPUsCount("non-gu-container", onlineCPUs.Size()-1))
})
ginkgo.It("should let the container access all the online CPUs minus the reserved CPUs set when enabled", func(ctx context.Context) {
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.StrictCPUReservationOption: "true",
},
}))
pod := makeCPUManagerPod("non-gu-pod", []ctnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "100m",
cpuLimit: "200m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
gomega.Expect(pod).To(HaveContainerCPUsEqualTo("non-gu-container", onlineCPUs.Difference(reservedCPUs)))
})
ginkgo.It("should let the container access all the online non-exclusively-allocated CPUs minus the reserved CPUs set when enabled", func(ctx context.Context) {
cpuCount := 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount+1) // note the extra for the non-gu pod)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.StrictCPUReservationOption: "true",
},
}))
cpuReq := fmt.Sprintf("%dm", 1000*cpuCount)
podGu := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: cpuReq,
cpuLimit: cpuReq,
},
})
ginkgo.By("creating the guaranteed test pod")
podGu = e2epod.NewPodClient(f).CreateSync(ctx, podGu)
podMap[string(podGu.UID)] = podGu
podBu := makeCPUManagerPod("non-gu-pod", []ctnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "200m",
cpuLimit: "300m",
},
})
ginkgo.By("creating the burstable test pod")
podBu = e2epod.NewPodClient(f).CreateSync(ctx, podBu)
podMap[string(podBu.UID)] = podBu
ginkgo.By("checking if the expected cpuset was assigned")
usableCPUs := onlineCPUs.Difference(reservedCPUs)
// any full CPU is fine - we cannot nor we should predict which one, though
gomega.Expect(podGu).To(HaveContainerCPUsCount("gu-container", cpuCount))
gomega.Expect(podGu).To(HaveContainerCPUsASubsetOf("gu-container", usableCPUs))
gomega.Expect(podGu).ToNot(HaveContainerCPUsOverlapWith("gu-container", reservedCPUs))
exclusiveCPUs, err := getContainerAllowedCPUs(podGu, "gu-container", false)
framework.ExpectNoError(err, "cannot get exclusive CPUs for pod %s/%s", podGu.Namespace, podGu.Name)
expectedSharedCPUs := usableCPUs.Difference(exclusiveCPUs)
gomega.Expect(podBu).To(HaveContainerCPUsEqualTo("non-gu-container", expectedSharedCPUs))
})
})
ginkgo.When("running with SMT Alignment", ginkgo.Label("smt-alignment"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
// strict SMT alignment is trivially verified and granted on non-SMT systems
if smtLevel < minSMTLevel {
e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption)
}
reservedCPUs = cpuset.New(0)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.FullPCPUsOnlyOption: "true",
},
}))
})
ginkgo.It("should reject workload asking non-SMT-multiple of cpus", func(ctx context.Context) {
cpuCount := 1
cpuReq := fmt.Sprintf("%dm", 1000*cpuCount)
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
ginkgo.By("creating the testing pod")
// negative test: try to run a container whose requests aren't a multiple of SMT level, expect a rejection
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-neg",
cpuRequest: cpuReq,
cpuLimit: cpuReq,
},
})
ginkgo.By("creating the test pod")
// CreateSync would wait for pod to become Ready - which will never happen if production code works as intended!
pod = e2epod.NewPodClient(f).Create(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("ensuring the testing pod is in failed state")
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) {
if pod.Status.Phase != v1.PodPending {
return true, nil
}
return false, nil
})
framework.ExpectNoError(err)
ginkgo.By("ensuring the testing pod is failed for the expected reason")
pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
gomega.Expect(pod).To(BeAPodInPhase(v1.PodFailed))
gomega.Expect(pod).To(HaveStatusReasonMatchingRegex(`SMT.*Alignment.*Error`))
})
ginkgo.It("should admit workload asking SMT-multiple of cpus", func(ctx context.Context) {
// positive test: try to run a container whose requests are a multiple of SMT level, check allocated cores
// 1. are core siblings
// 2. take a full core
// WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels.
// this means on more-than-2-way SMT systems this test will prove nothing
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel)
cpuRequest := fmt.Sprintf("%d000m", smtLevel)
ginkgo.By(fmt.Sprintf("creating the testing pod cpuRequest=%v", cpuRequest))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-pos",
cpuRequest: cpuRequest,
cpuLimit: cpuRequest,
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("validating each container in the testing pod")
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name))
}
})
})
ginkgo.When("running with Uncore Cache Alignment", ginkgo.Label("prefer-align-cpus-by-uncore-cache"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
reservedCPUs := cpuset.New(0)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.PreferAlignByUnCoreCacheOption: "true",
},
}))
})
ginkgo.It("should admit container asking odd integer amount of cpus", func(ctx context.Context) {
// assume uncore caches's worth of cpus will always be an even integer value
// smallest odd integer cpu request can be 1 cpu
// for meaningful test, minimum allocatable cpu requirement should be:
// minCPUCapacity + reservedCPUs.Size() + 1 CPU allocated
cpuCount := minCPUCapacity + reservedCPUs.Size() + 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
// check if the node processor architecture has split or monolithic uncore cache.
// prefer-align-cpus-by-uncore-cache can be enabled on non-split uncore cache processors
// with no change to default static behavior
allocatableCPUs := cpuDetailsFromNode(getLocalNode(ctx, f)).Allocatable
hasSplitUncore := (allocatableCPUs > int64(uncoreGroupSize))
if hasSplitUncore {
// create a container that requires one less cpu than a full uncore cache's worth of cpus
// assume total shared CPUs of a single uncore cache will always be an even integer
cpuRequest := fmt.Sprintf("%d000m", (uncoreGroupSize - 1))
ginkgo.By(fmt.Sprintf("creating the testing pod cpuRequest=%v", cpuRequest))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-pos",
cpuRequest: cpuRequest,
cpuLimit: cpuRequest,
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("validating each container in the testing pod")
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsWithSameUncoreCacheID(cnt.Name))
}
} else {
// for node with monolithic uncore cache processor
// uncoreGroupSize will be socket's worth of CPUs
// subtract (minCPUCapacity + 1) CPU resource constraint
cpuRequest := fmt.Sprintf("%d000m", (uncoreGroupSize - (minCPUCapacity + 1)))
ginkgo.By(fmt.Sprintf("creating the testing pod cpuRequest=%v", cpuRequest))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-pos",
cpuRequest: cpuRequest,
cpuLimit: cpuRequest,
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("validating each container in the testing pod")
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsWithSameUncoreCacheID(cnt.Name))
}
}
})
})
ginkgo.When("running with Uncore Cache Alignment disabled", ginkgo.Label("prefer-align-cpus-by-uncore-cache"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
reservedCPUs := cpuset.New(0)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.PreferAlignByUnCoreCacheOption: "false",
},
}))
})
ginkgo.It("should allocate exclusively CPUs to a multi-container pod (1+2)", func(ctx context.Context) {
cpuCount := 3 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-1",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
{
ctnName: "gu-container-2",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
// we cannot nor we should predict which CPUs the container gets
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-1", 1))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-1", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-1", reservedCPUs))
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-2", 2))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-2", onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-2", reservedCPUs))
// TODO: this is probably too strict but it is the closest of the old test did
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings("gu-container-2"))
})
})
ginkgo.When("checking the compatibility between options", func() {
// please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely.
ginkgo.Context("SMT Alignment and strict CPU reservation", ginkgo.Label("smt-alignment", "strict-cpu-reservation"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
// strict SMT alignment is trivially verified and granted on non-SMT systems
if smtLevel < minSMTLevel {
e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption)
}
reservedCPUs = cpuset.New(0)
})
ginkgo.It("should reject workload asking non-SMT-multiple of cpus", func(ctx context.Context) {
cpuCount := 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount+1) // note the extra for the non-gu pod)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.FullPCPUsOnlyOption: "true",
cpumanager.StrictCPUReservationOption: "true",
},
}))
// negative test: try to run a container whose requests aren't a multiple of SMT level, expect a rejection
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-neg",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
})
ginkgo.By("creating the testing pod")
// CreateSync would wait for pod to become Ready - which will never happen if production code works as intended!
pod = e2epod.NewPodClient(f).Create(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("ensuring the testing pod is in failed state")
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) {
if pod.Status.Phase != v1.PodPending {
return true, nil
}
return false, nil
})
framework.ExpectNoError(err)
ginkgo.By("ensuring the testing pod is failed for the expected reason")
pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
gomega.Expect(pod).To(BeAPodInPhase(v1.PodFailed))
gomega.Expect(pod).To(HaveStatusReasonMatchingRegex(`SMT.*Alignment.*Error`))
})
ginkgo.It("should admit workload asking SMT-multiple of cpus", func(ctx context.Context) {
// positive test: try to run a container whose requests are a multiple of SMT level, check allocated cores
// 1. are core siblings
// 2. take a full core
// WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels.
// this means on more-than-2-way SMT systems this test will prove nothing
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.FullPCPUsOnlyOption: "true",
cpumanager.StrictCPUReservationOption: "true",
},
}))
cpuCount := smtLevel
cpuRequest := fmt.Sprintf("%d000m", smtLevel)
ginkgo.By(fmt.Sprintf("creating the testing pod cpuRequest=%v", cpuRequest))
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
{
ctnName: "gu-container-x",
cpuRequest: cpuRequest,
cpuLimit: cpuRequest,
},
})
ginkgo.By("creating the testing pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
usableCPUs := onlineCPUs.Difference(reservedCPUs)
gomega.Expect(pod).To(HaveContainerCPUsCount("gu-container-x", cpuCount))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf("gu-container-x", usableCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("gu-container-x", reservedCPUs))
ginkgo.By("validating each container in the testing pod")
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name))
}
})
})
// please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely.
ginkgo.Context("SMT Alignment and Uncore Cache Alignment", ginkgo.Label("smt-alignment", "prefer-align-cpus-by-uncore-cache"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
// strict SMT alignment is trivially verified and granted on non-SMT systems
if smtLevel < minSMTLevel {
e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption)
}
reservedCPUs = cpuset.New(0)
})
ginkgo.It("should assign packed CPUs with prefer-align-cpus-by-uncore-cache disabled and pcpu-only policy options enabled", func(ctx context.Context) {
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.FullPCPUsOnlyOption: "true",
cpumanager.PreferAlignByUnCoreCacheOption: "false",
},
}))
ctnAttrs := []ctnAttribute{
{
ctnName: "test-gu-container-uncore-cache-alignment-disabled",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
}
pod := makeCPUManagerPod("test-pod-uncore-cache-alignment-disabled", ctnAttrs)
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("validating each container in the testing pod")
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name))
}
})
ginkgo.It("should assign CPUs aligned to uncore caches with prefer-align-cpus-by-uncore-cache and pcpu-only policy options enabled", func(ctx context.Context) {
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.FullPCPUsOnlyOption: "true",
cpumanager.PreferAlignByUnCoreCacheOption: "true",
},
}))
// check if the node processor architecture has split or monolithic uncore cache.
// prefer-align-cpus-by-uncore-cache can be enabled on non-split uncore cache processors
// with no change to default static behavior
allocatableCPUs := cpuDetailsFromNode(getLocalNode(ctx, f)).Allocatable
hasSplitUncore := (allocatableCPUs > int64(uncoreGroupSize))
if hasSplitUncore {
// for node with split uncore cache processor
// create a pod that requires a full uncore cache worth of CPUs
ctnAttrs := []ctnAttribute{
{
ctnName: "test-gu-container-align-cpus-by-uncore-cache-on-split-uncore",
cpuRequest: fmt.Sprintf("%d", uncoreGroupSize),
cpuLimit: fmt.Sprintf("%d", uncoreGroupSize),
},
}
pod := makeCPUManagerPod("test-pod-align-cpus-by-uncore-cache", ctnAttrs)
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
// 'prefer-align-cpus-by-uncore-cache' policy options will attempt at best-effort to allocate cpus
// so that distribution across uncore caches is minimized. Since the test container is requesting a full
// uncore cache worth of cpus and CPU0 is part of the reserved CPUset and not allocatable, the policy will attempt
// to allocate cpus from the next available uncore cache
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
cpus, err := getContainerAllowedCPUs(pod, cnt.Name, false)
framework.ExpectNoError(err, "cannot get cpus allocated to pod %s/%s cnt %s", pod.Namespace, pod.Name, cnt.Name)
siblingsCPUs := makeThreadSiblingCPUSet(cpus)
gomega.Expect(pod).To(HaveContainerCPUsEqualTo(cnt.Name, siblingsCPUs))
gomega.Expect(pod).To(HaveContainerCPUsWithSameUncoreCacheID(cnt.Name))
gomega.Expect(pod).ToNot(HaveContainerCPUsShareUncoreCacheWith(cnt.Name, reservedCPUs))
}
} else {
// for node with monolithic uncore cache processor
// expect default static behavior with pcpu-only policy enabled
// and prefer-align-cpus-by-uncore-cache enabled
ctnAttrs := []ctnAttribute{
{
ctnName: "test-gu-container-align-cpus-by-uncore-cache-on-mono-uncore",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
}
pod := makeCPUManagerPod("test-pod-align-cpus-by-uncore-cache", ctnAttrs)
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("validating each container in the testing pod")
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name))
}
}
})
})
// please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely.
ginkgo.Context("Strict CPU Reservation and Uncore Cache Alignment", ginkgo.Label("strict-cpu-reservation", "prefer-align-cpus-by-uncore-cache"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
reservedCPUs = cpuset.New(0)
})
ginkgo.It("should assign CPUs aligned to uncore caches with prefer-align-cpus-by-uncore-cache and avoid reserved cpus", func(ctx context.Context) {
// assume uncore caches's worth of cpus will always be an even integer value
// smallest integer cpu request can be 1 cpu
// for meaningful test, minimum allocatable cpu requirement should be:
// minCPUCapacity + reservedCPUs.Size() + 1 CPU allocated
cpuCount := minCPUCapacity + reservedCPUs.Size() + 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.StrictCPUReservationOption: "true",
cpumanager.PreferAlignByUnCoreCacheOption: "true",
},
}))
// check if the node processor architecture has split or monolithic uncore cache.
// prefer-align-cpus-by-uncore-cache can be enabled on non-split uncore cache processors
// with no change to default static behavior
allocatableCPUs := cpuDetailsFromNode(getLocalNode(ctx, f)).Allocatable
hasSplitUncore := (allocatableCPUs > int64(uncoreGroupSize))
if hasSplitUncore {
// for node with split uncore cache processor
// create a pod that requires a full uncore cache worth of CPUs
ctnAttrs := []ctnAttribute{
{
ctnName: "test-gu-container-align-cpus-by-uncore-cache-on-split-uncore",
cpuRequest: fmt.Sprintf("%d", uncoreGroupSize),
cpuLimit: fmt.Sprintf("%d", uncoreGroupSize),
},
}
pod := makeCPUManagerPod("test-pod-align-cpus-by-uncore-cache", ctnAttrs)
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
// 'prefer-align-cpus-by-uncore-cache' policy options will attempt at best-effort to allocate cpus
// so that distribution across uncore caches is minimized. Since the test container is requesting a full
// uncore cache worth of cpus and CPU0 is part of the reserved CPUset and not allocatable, the policy will attempt
// to allocate cpus from the next available uncore cache
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
cpus, err := getContainerAllowedCPUs(pod, cnt.Name, false)
framework.ExpectNoError(err, "cannot get cpus allocated to pod %s/%s cnt %s", pod.Namespace, pod.Name, cnt.Name)
siblingsCPUs := makeThreadSiblingCPUSet(cpus)
gomega.Expect(pod).To(HaveContainerCPUsEqualTo(cnt.Name, siblingsCPUs))
gomega.Expect(pod).To(HaveContainerCPUsWithSameUncoreCacheID(cnt.Name))
gomega.Expect(pod).ToNot(HaveContainerCPUsShareUncoreCacheWith(cnt.Name, reservedCPUs))
}
} else {
// for node with monolithic uncore cache processor
// expect default static packed behavior
// when prefer-align-cpus-by-uncore-cache enabled
ctnAttrs := []ctnAttribute{
{
ctnName: "test-gu-container-align-cpus-by-uncore-cache-on-mono-uncore",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
}
pod := makeCPUManagerPod("test-pod-align-cpus-by-uncore-cache", ctnAttrs)
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("validating each container in the testing pod")
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
// expect allocated CPUs to be on the same uncore cache
gomega.Expect(pod).To(HaveContainerCPUsWithSameUncoreCacheID(cnt.Name))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith("test-gu-container-align-cpus-by-uncore-cache-on-mono-uncore", reservedCPUs))
}
}
})
})
// please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely.
ginkgo.Context("SMT Alignment and distribution across NUMA", ginkgo.Label("smt-alignment", "distribute-cpus-across-numa"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
// strict SMT alignment is trivially verified and granted on non-SMT systems
if smtLevel < minSMTLevel {
e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption)
}
reservedCPUs = cpuset.New(0)
})
ginkgo.It("should assign packed CPUs with distribute-cpus-across-numa disabled and pcpu-only policy options enabled", func(ctx context.Context) {
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.FullPCPUsOnlyOption: "true",
cpumanager.DistributeCPUsAcrossNUMAOption: "false",
},
}))
ctnAttrs := []ctnAttribute{
{
ctnName: "test-gu-container-distribute-cpus-across-numa-disabled",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
}
pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa-disabled", ctnAttrs)
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("validating each container in the testing pod")
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name))
}
})
ginkgo.It("should assign CPUs distributed across NUMA with distribute-cpus-across-numa and pcpu-only policy options enabled", func(ctx context.Context) {
reservedCPUs = cpuset.New(0)
// this test is intended to be run on a multi-node NUMA system and
// a system with at least 4 cores per socket, hostcheck skips test
// if above requirements are not satisfied
numaNodeNum, _, _, cpusNumPerNUMA := hostCheck()
cpuReq := (cpusNumPerNUMA - smtLevel) * numaNodeNum
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuReq)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
enableCPUManagerOptions: true,
options: map[string]string{
cpumanager.FullPCPUsOnlyOption: "true",
cpumanager.DistributeCPUsAcrossNUMAOption: "true",
},
}))
// 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed
// across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation.
// So, we want to ensure that the CPU Request exceeds the number of CPUs that can fit within a single
// NUMA node. We have to pick cpuRequest such that:
// 1. CPURequest > cpusNumPerNUMA
// 2. Not occupy all the CPUs on the node ande leave room for reserved CPU
// 3. CPURequest is a multiple if number of NUMA nodes to allow equal CPU distribution across NUMA nodes
//
// In summary: cpusNumPerNUMA < CPURequest < ((cpusNumPerNuma * numaNodeNum) - reservedCPUscount)
// Considering all these constraints we select: CPURequest= (cpusNumPerNUMA-smtLevel)*numaNodeNum
ctnAttrs := []ctnAttribute{
{
ctnName: "test-gu-container-distribute-cpus-across-numa",
cpuRequest: fmt.Sprintf("%d", cpuReq),
cpuLimit: fmt.Sprintf("%d", cpuReq),
},
}
pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa", ctnAttrs)
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
for _, cnt := range pod.Spec.Containers {
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
cpus, err := getContainerAllowedCPUs(pod, cnt.Name, false)
framework.ExpectNoError(err, "cannot get cpus allocated to pod %s/%s cnt %s", pod.Namespace, pod.Name, cnt.Name)
siblingsCPUs := makeThreadSiblingCPUSet(cpus)
gomega.Expect(pod).To(HaveContainerCPUsEqualTo(cnt.Name, siblingsCPUs))
// We expect a perfectly even spilit i.e. equal distribution across NUMA Node as the CPU Request is 4*smtLevel*numaNodeNum.
expectedSpread := cpus.Size() / numaNodeNum
gomega.Expect(cpus).To(BeDistributedCPUs(expectedSpread))
}
})
})
})
ginkgo.When("checking the CFS quota management", ginkgo.Label("cfs-quota"), func() {
ginkgo.BeforeEach(func(ctx context.Context) {
requireCGroupV2()
// WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels.
// this means on more-than-2-way SMT systems this test will prove nothing
reservedCPUs = cpuset.New(0)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
disableCPUQuotaWithExclusiveCPUs: true,
}))
})
ginkgo.It("should enforce for best-effort pod", func(ctx context.Context) {
ctnName := "be-container"
pod := makeCPUManagerBEPod("be-pod", []ctnAttribute{
{
ctnName: ctnName,
ctnCommand: "sleep 1d",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("max"))
gomega.Expect(pod).To(HaveContainerQuota(ctnName, "max"))
})
ginkgo.It("should disable for guaranteed pod with exclusive CPUs assigned", func(ctx context.Context) {
cpuCount := 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
ctnName := "gu-container-cfsquota-disabled"
pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{
{
ctnName: ctnName,
cpuRequest: "1",
cpuLimit: "1",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("max"))
gomega.Expect(pod).To(HaveContainerQuota(ctnName, "max"))
})
ginkgo.It("should disable for guaranteed pod with exclusive CPUs assigned", func(ctx context.Context) {
cpuCount := 4
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
ctnName := "gu-container-cfsquota-disabled"
pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{
{
ctnName: ctnName,
cpuRequest: "3",
cpuLimit: "3",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("max"))
gomega.Expect(pod).To(HaveContainerQuota(ctnName, "max"))
gomega.Expect(pod).To(HaveContainerCPUsCount(ctnName, 3))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(ctnName, onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith(ctnName, reservedCPUs))
})
ginkgo.It("should enforce for guaranteed pod", func(ctx context.Context) {
cpuCount := 1 // overshoot, minimum request is 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
ctnName := "gu-container-cfsquota-enabled"
pod := makeCPUManagerPod("gu-pod-cfs-quota-on", []ctnAttribute{
{
ctnName: ctnName,
cpuRequest: "500m",
cpuLimit: "500m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("50000"))
gomega.Expect(pod).To(HaveContainerQuota(ctnName, "50000"))
})
ginkgo.It("should enforce for burstable pod", func(ctx context.Context) {
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 0)
ctnName := "bu-container-cfsquota-enabled"
pod := makeCPUManagerPod("bu-pod-cfs-quota-on", []ctnAttribute{
{
ctnName: ctnName,
cpuRequest: "100m",
cpuLimit: "500m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("50000"))
gomega.Expect(pod).To(HaveContainerQuota(ctnName, "50000"))
})
ginkgo.It("should not enforce with multiple containers without exclusive CPUs", func(ctx context.Context) {
cpuCount := 2
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
pod := makeCPUManagerPod("gu-pod-multicontainer", []ctnAttribute{
{
ctnName: "gu-container-non-int-values-1",
cpuRequest: "100m",
cpuLimit: "500m",
},
{
ctnName: "gu-container-non-int-values-2",
cpuRequest: "300m",
cpuLimit: "1200m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("170000"))
gomega.Expect(pod).To(HaveContainerQuota("gu-container-non-int-values-1", "50000"))
gomega.Expect(pod).To(HaveContainerQuota("gu-container-non-int-values-2", "120000"))
})
ginkgo.It("should not enforce with multiple containers only in the container with exclusive CPUs", func(ctx context.Context) {
cpuCount := 2
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
pod := makeCPUManagerPod("gu-pod-multicontainer-mixed", []ctnAttribute{
{
ctnName: "gu-container-non-int-values",
cpuRequest: "500m",
cpuLimit: "500m",
},
{
ctnName: "gu-container-int-values",
cpuRequest: "1",
cpuLimit: "1",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("max"))
gomega.Expect(pod).To(HaveContainerQuota("gu-container-non-int-values", "50000"))
gomega.Expect(pod).To(HaveContainerQuota("gu-container-int-values", "max"))
})
})
ginkgo.When("checking the CFS quota management can be disabled", ginkgo.Label("cfs-quota"), func() {
// NOTE: these tests check only cases on which the quota is set to "max", so we intentionally
// don't duplicate the all the tests
ginkgo.BeforeEach(func(ctx context.Context) {
requireCGroupV2()
// WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels.
// this means on more-than-2-way SMT systems this test will prove nothing
reservedCPUs = cpuset.New(0)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs,
disableCPUQuotaWithExclusiveCPUs: false,
}))
})
ginkgo.It("should enforce for a guaranteed pod with exclusive CPUs assigned", func(ctx context.Context) {
cpuCount := 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
ctnName := "gu-container-cfsquota-disabled"
pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{
{
ctnName: ctnName,
cpuRequest: "1",
cpuLimit: "1",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("100000"))
gomega.Expect(pod).To(HaveContainerQuota(ctnName, "100000"))
})
ginkgo.It("should enforce for a guaranteed pod with multiple exclusive CPUs assigned", func(ctx context.Context) {
cpuCount := 4
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
ctnName := "gu-container-cfsquota-disabled"
pod := makeCPUManagerPod("gu-pod-cfsquota-off", []ctnAttribute{
{
ctnName: ctnName,
cpuRequest: "3",
cpuLimit: "3",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("300000"))
gomega.Expect(pod).To(HaveContainerQuota(ctnName, "300000"))
gomega.Expect(pod).To(HaveContainerCPUsCount(ctnName, 3))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(ctnName, onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith(ctnName, reservedCPUs))
})
ginkgo.It("should enforce for guaranteed pod not requiring exclusive CPUs", func(ctx context.Context) {
cpuCount := 1 // overshoot, minimum request is 1
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
ctnName := "gu-container-cfsquota-enabled"
pod := makeCPUManagerPod("gu-pod-cfs-quota-on", []ctnAttribute{
{
ctnName: ctnName,
cpuRequest: "500m",
cpuLimit: "500m",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("50000"))
gomega.Expect(pod).To(HaveContainerQuota(ctnName, "50000"))
})
ginkgo.It("should enforce with multiple containers regardless if they require exclusive CPUs or not", func(ctx context.Context) {
cpuCount := 2
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
pod := makeCPUManagerPod("gu-pod-multicontainer-mixed", []ctnAttribute{
{
ctnName: "gu-container-non-int-values",
cpuRequest: "500m",
cpuLimit: "500m",
},
{
ctnName: "gu-container-int-values",
cpuRequest: "1",
cpuLimit: "1",
},
})
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
gomega.Expect(pod).To(HaveSandboxQuota("150000"))
gomega.Expect(pod).To(HaveContainerQuota("gu-container-non-int-values", "50000"))
gomega.Expect(pod).To(HaveContainerQuota("gu-container-int-values", "100000"))
})
})
f.Context("When checking the sidecar containers", feature.SidecarContainers, func() {
ginkgo.BeforeEach(func(ctx context.Context) {
reservedCPUs = cpuset.New(0)
})
ginkgo.It("should reuse init container exclusive CPUs, but not sidecar container exclusive CPUS", func(ctx context.Context) {
cpuCount := 2 // total
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
}))
var containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways
ctrAttrs := []ctnAttribute{
{
ctnName: "init-container1",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
{
ctnName: "sidecar-container",
cpuRequest: "1000m",
cpuLimit: "1000m",
restartPolicy: &containerRestartPolicyAlways,
},
}
pod := makeCPUManagerInitContainersPod("gu-pod", ctrAttrs)
ginkgo.By("running a Gu pod with a regular init container and a restartable init container")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
// when we get there the real initcontainer terminated, so we can only check its logs
ginkgo.By("checking if the expected cpuset was assigned")
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, pod.Namespace, pod.Name, pod.Spec.InitContainers[0].Name)
framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name)
reusableCPUs := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.InitContainers[0].Name, logs)
gomega.Expect(reusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", reusableCPUs.String())
nonReusableCPUs, err := getContainerAllowedCPUs(pod, pod.Spec.InitContainers[1].Name, true)
framework.ExpectNoError(err, "cannot get exclusive CPUs for pod %s/%s", pod.Namespace, pod.Name)
gomega.Expect(nonReusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", nonReusableCPUs.String())
gomega.Expect(reusableCPUs.Equals(nonReusableCPUs)).To(gomega.BeTrueBecause("expected reusable cpuset [%s] to be equal to non-reusable cpuset [%s]", reusableCPUs.String(), nonReusableCPUs.String()))
appContainerName := pod.Spec.Containers[0].Name
gomega.Expect(pod).To(HaveContainerCPUsCount(appContainerName, 1))
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(appContainerName, onlineCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith(appContainerName, reservedCPUs))
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith(appContainerName, nonReusableCPUs))
})
})
})
var _ = SIGDescribe("CPU Manager Incompatibility Pod Level Resources", ginkgo.Ordered, ginkgo.ContinueOnFailure, framework.WithSerial(), feature.CPUManager, feature.PodLevelResources, framework.WithFeatureGate(features.PodLevelResources), func() {
f := framework.NewDefaultFramework("cpu-manager-incompatibility-pod-level-resources-test")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
// original kubeletconfig before the context start, to be restored
var oldCfg *kubeletconfig.KubeletConfiguration
var reservedCPUs cpuset.CPUSet
var onlineCPUs cpuset.CPUSet
var smtLevel int
var uncoreGroupSize int
// tracks all the pods created by a It() block. Best would be a namespace per It block
var podMap map[string]*v1.Pod
ginkgo.BeforeAll(func(ctx context.Context) {
var err error
oldCfg, err = getCurrentKubeletConfig(ctx)
framework.ExpectNoError(err)
onlineCPUs, err = getOnlineCPUs() // this should not change at all, at least during this suite lifetime
framework.ExpectNoError(err)
framework.Logf("Online CPUs: %s", onlineCPUs)
smtLevel = smtLevelFromSysFS() // this should not change at all, at least during this suite lifetime
framework.Logf("SMT level: %d", smtLevel)
uncoreGroupSize = getUncoreCPUGroupSize()
framework.Logf("Uncore Group Size: %d", uncoreGroupSize)
e2enodeCgroupV2Enabled = IsCgroup2UnifiedMode()
framework.Logf("cgroup V2 enabled: %v", e2enodeCgroupV2Enabled)
e2enodeCgroupDriver = oldCfg.CgroupDriver
framework.Logf("cgroup driver: %s", e2enodeCgroupDriver)
runtime, _, err := getCRIClient()
framework.ExpectNoError(err, "Failed to get CRI client")
version, err := runtime.Version(context.Background(), "")
framework.ExpectNoError(err, "Failed to get runtime version")
e2enodeRuntimeName = version.GetRuntimeName()
framework.Logf("runtime: %s", e2enodeRuntimeName)
})
ginkgo.AfterAll(func(ctx context.Context) {
updateKubeletConfig(ctx, f, oldCfg, true)
})
ginkgo.BeforeEach(func(ctx context.Context) {
// note intentionally NOT set reservedCPUs - this must be initialized on a test-by-test basis
podMap = make(map[string]*v1.Pod)
})
ginkgo.JustBeforeEach(func(ctx context.Context) {
if !e2enodeCgroupV2Enabled {
e2eskipper.Skipf("Skipping since CgroupV2 not used")
}
})
ginkgo.AfterEach(func(ctx context.Context) {
deletePodsAsync(ctx, f, podMap)
})
ginkgo.When("running guaranteed pod level resources tests", ginkgo.Label("guaranteed pod level resources", "reserved-cpus"), func() {
ginkgo.It("should let the container access all the online CPUs without a reserved CPUs set", func(ctx context.Context) {
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: cpuset.CPUSet{},
enablePodLevelResources: true,
}))
pod := makeCPUManagerPod("gu-pod-level-resources", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: "1",
cpuLimit: "1",
},
})
pod.Spec.Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1"),
v1.ResourceMemory: resource.MustParse("100Mi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1"),
v1.ResourceMemory: resource.MustParse("100Mi"),
},
}
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
gomega.Expect(pod).To(HaveContainerCPUsEqualTo("gu-container", onlineCPUs))
})
ginkgo.It("should let the container access all the online CPUs when using a reserved CPUs set", func(ctx context.Context) {
reservedCPUs = cpuset.New(0)
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
enablePodLevelResources: true,
}))
pod := makeCPUManagerPod("gu-pod-level-resources", []ctnAttribute{
{
ctnName: "gu-container",
cpuRequest: "1",
cpuLimit: "1",
},
})
pod.Spec.Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1"),
v1.ResourceMemory: resource.MustParse("100Mi"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1"),
v1.ResourceMemory: resource.MustParse("100Mi"),
},
}
ginkgo.By("creating the test pod")
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
podMap[string(pod.UID)] = pod
ginkgo.By("checking if the expected cpuset was assigned")
gomega.Expect(pod).To(HaveContainerCPUsEqualTo("gu-container", onlineCPUs))
})
})
})
// Matching helpers
func HaveStatusReasonMatchingRegex(expr string) types.GomegaMatcher {
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
re, err := regexp.Compile(expr)
if err != nil {
return false, err
}
return re.MatchString(actual.Status.Reason), nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} reason {{.Actual.Status.Reason}} does not match regexp {{.Data}}", expr)
}
type msgData struct {
Name string
CurrentCPUs string
ExpectedCPUs string
MismatchedCPUs string
UncoreCacheAlign string
Count int
Aligned int
CurrentQuota string
ExpectedQuota string
}
func HaveContainerCPUsCount(ctnName string, val int) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
Count: val,
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
cpus, err := getContainerAllowedCPUs(actual, ctnName, false)
md.CurrentCPUs = cpus.String()
if err != nil {
framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err)
return false, err
}
return cpus.Size() == val, nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not matching expected count <{{.Data.Count}}> for container {{.Data.Name}}", md)
}
func HaveContainerCPUsAlignedTo(ctnName string, val int) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
Aligned: val,
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
cpus, err := getContainerAllowedCPUs(actual, ctnName, false)
md.CurrentCPUs = cpus.String()
if err != nil {
framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err)
return false, err
}
return cpus.Size()%val == 0, nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not aligned to value <{{.Data.Aligned}}> for container {{.Data.Name}}", md)
}
func HaveContainerCPUsOverlapWith(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
ExpectedCPUs: ref.String(),
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
cpus, err := getContainerAllowedCPUs(actual, ctnName, false)
md.CurrentCPUs = cpus.String()
if err != nil {
framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err)
return false, err
}
sharedCPUs := cpus.Intersection(ref)
return sharedCPUs.Size() > 0, nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> overlapping with expected CPUs <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md)
}
func HaveContainerCPUsASubsetOf(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
ExpectedCPUs: ref.String(),
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
cpus, err := getContainerAllowedCPUs(actual, ctnName, false)
md.CurrentCPUs = cpus.String()
if err != nil {
framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err)
return false, err
}
return cpus.IsSubsetOf(ref), nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not a subset of expected CPUs <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md)
}
func HaveContainerCPUsEqualTo(ctnName string, expectedCPUs cpuset.CPUSet) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
ExpectedCPUs: expectedCPUs.String(),
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
cpus, err := getContainerAllowedCPUs(actual, ctnName, false)
md.CurrentCPUs = cpus.String()
if err != nil {
framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err)
return false, err
}
return cpus.Equals(expectedCPUs), nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not matching the expected value <{{.Data.ExpectedCPUs}}> for container {{.Data.Name}}", md)
}
func HaveSandboxQuota(expectedQuota string) types.GomegaMatcher {
md := &msgData{
ExpectedQuota: expectedQuota,
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
md.Name = klog.KObj(actual).String()
quota, err := getSandboxCFSQuota(actual)
md.CurrentQuota = quota
if err != nil {
framework.Logf("getSandboxCFSQuota() failed: %v", err)
return false, err
}
re, err := regexp.Compile(fmt.Sprintf("^%s %s$", expectedQuota, defaultCFSPeriod))
if err != nil {
return false, err
}
return re.MatchString(quota), nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has quota <{{.Data.CurrentQuota}}> not matching expected value <{{.Data.ExpectedQuota}}>", md)
}
func HaveContainerQuota(ctnName, expectedQuota string) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
ExpectedQuota: expectedQuota,
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
quota, err := getContainerCFSQuota(actual, ctnName, false)
md.CurrentQuota = quota
if err != nil {
framework.Logf("getContainerCFSQuota(%s) failed: %v", ctnName, err)
return false, err
}
re, err := regexp.Compile(fmt.Sprintf("^%s %s$", expectedQuota, defaultCFSPeriod))
if err != nil {
return false, err
}
return re.MatchString(quota), nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has quota <{{.Data.CurrentQuota}}> not matching expected value <{{.Data.ExpectedQuota}}> for container {{.Data.Name}}", md)
}
func HaveContainerCPUsThreadSiblings(ctnName string) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
cpus, err := getContainerAllowedCPUs(actual, ctnName, false)
md.CurrentCPUs = cpus.String()
if err != nil {
framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err)
return false, err
}
expectedCPUs := makeThreadSiblingCPUSet(cpus)
md.ExpectedCPUs = expectedCPUs.String()
return cpus.Equals(expectedCPUs), nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not all thread sibling pairs (would be <{{.Data.ExpectedCPUs}}>) for container {{.Data.Name}}", md)
}
func HaveContainerCPUsQuasiThreadSiblings(ctnName string, toleration int) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
Count: toleration,
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
cpus, err := getContainerAllowedCPUs(actual, ctnName, false)
md.CurrentCPUs = cpus.String()
if err != nil {
framework.Logf("getContainerAllowedCPUs(%s) failed: %v", ctnName, err)
return false, err
}
// this is by construction >= cpus (extreme case: cpus is made by all non-thread-siblings)
expectedCPUs := makeThreadSiblingCPUSet(cpus)
md.ExpectedCPUs = expectedCPUs.String()
mismatchedCPUs := expectedCPUs.Difference(cpus)
md.MismatchedCPUs = mismatchedCPUs.String()
return mismatchedCPUs.Size() <= toleration, nil
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not all thread sibling pairs (would be <{{.Data.ExpectedCPUs}}> mismatched <{{.Data.MismatchedCPUs}}> toleration <{{.Data.Count}}>) for container {{.Data.Name}}", md)
}
func HaveContainerCPUsWithSameUncoreCacheID(ctnName string) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
cpus, err := getContainerAllowedCPUs(actual, ctnName, false)
if err != nil {
return false, fmt.Errorf("getContainerAllowedCPUs(%s) failed: %w", ctnName, err)
}
md.CurrentCPUs = cpus.String()
var commonCacheID *int64
for _, cpu := range cpus.List() {
// determine the Uncore Cache ID for each cpu
uncoreID, err := uncoreCacheIDFromSysFS(cpu)
if err != nil {
return false, fmt.Errorf("failed to read cache ID for CPU %d: %w", cpu, err)
}
// if this the first CPU we check, set the Uncore Cache ID as the reference
// for subsequent CPUs, compare the Uncore Cache ID to the reference
if commonCacheID == nil {
commonCacheID = &uncoreID
} else if *commonCacheID != uncoreID {
md.UncoreCacheAlign = fmt.Sprintf("shared uncoreID mismatch: CPU %d has uncoreID %d, CPUSet has uncoreID %d", cpu, uncoreID, *commonCacheID)
return false, nil
}
}
// All CPUs matched the same cache ID
md.UncoreCacheAlign = fmt.Sprintf("all CPUs share cache ID %d", *commonCacheID)
return true, nil
}).WithTemplate(
"Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} container {{.Data.Name}} has CPUSet <{{.Data.CurrentCPUs}}> where not all CPUs share the same uncore cache ID: {{.Data.UncoreCacheAlign}}",
md,
)
}
func HaveContainerCPUsShareUncoreCacheWith(ctnName string, ref cpuset.CPUSet) types.GomegaMatcher {
md := &msgData{
Name: ctnName,
ExpectedCPUs: ref.String(),
}
return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
containerCPUs, err := getContainerAllowedCPUs(actual, ctnName, false)
if err != nil {
return false, fmt.Errorf("getContainerAllowedCPUs(%s) failed: %w", ctnName, err)
}
md.CurrentCPUs = containerCPUs.String()
// Build set of uncore cache IDs from the reference cpuset
refUncoreIDs := sets.New[int64]()
for _, cpu := range ref.UnsortedList() {
uncoreID, err := uncoreCacheIDFromSysFS(cpu)
if err != nil {
return false, fmt.Errorf("failed to read uncore cache ID for reference CPU %d: %w", cpu, err)
}
refUncoreIDs.Insert(uncoreID)
}
// Check if any container CPUs share an uncore ID with the reference set
for _, cpu := range containerCPUs.UnsortedList() {
uncoreID, err := uncoreCacheIDFromSysFS(cpu)
if err != nil {
return false, fmt.Errorf("failed to read uncore cache ID for container CPU %d: %w", cpu, err)
}
if refUncoreIDs.Has(uncoreID) {
md.UncoreCacheAlign = fmt.Sprintf("%d", uncoreID)
return true, nil
}
}
return false, nil
}).WithTemplate(
"Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} container {{.Data.Name}} has CPUSet <{{.Data.CurrentCPUs}}> sharing uncoreCache ID <{{.Data.UncoreCacheAlign}}> with reference CPUSet <{{.Data.ExpectedCPUs}}>",
md,
)
}
// Other helpers
func getContainerAllowedCPUs(pod *v1.Pod, ctnName string, isInit bool) (cpuset.CPUSet, error) {
cgPath, err := makeCgroupPathForContainer(pod, ctnName, isInit, e2enodeCgroupV2Enabled)
if err != nil {
return cpuset.CPUSet{}, err
}
cgPath = filepath.Join(cgPath, cpusetFileNameFromVersion(e2enodeCgroupV2Enabled))
framework.Logf("pod %s/%s cnt %s qos=%s path %q", pod.Namespace, pod.Name, ctnName, pod.Status.QOSClass, cgPath)
data, err := os.ReadFile(cgPath)
if err != nil {
return cpuset.CPUSet{}, err
}
cpus := strings.TrimSpace(string(data))
framework.Logf("pod %s/%s cnt %s cpuset %q", pod.Namespace, pod.Name, ctnName, cpus)
return cpuset.Parse(cpus)
}
func getSandboxCFSQuota(pod *v1.Pod) (string, error) {
if !e2enodeCgroupV2Enabled {
return "", fmt.Errorf("only Cgroup V2 is supported")
}
cgPath := filepath.Join(makeCgroupPathForPod(pod, true), "cpu.max")
data, err := os.ReadFile(cgPath)
if err != nil {
return "", err
}
quota := strings.TrimSpace(string(data))
framework.Logf("pod %s/%s qos=%s path %q quota %q", pod.Namespace, pod.Name, pod.Status.QOSClass, cgPath, quota)
return quota, nil
}
func getContainerCFSQuota(pod *v1.Pod, ctnName string, isInit bool) (string, error) {
if !e2enodeCgroupV2Enabled {
return "", fmt.Errorf("only Cgroup V2 is supported")
}
cgPath, err := makeCgroupPathForContainer(pod, ctnName, isInit, true)
if err != nil {
return "", err
}
data, err := os.ReadFile(filepath.Join(cgPath, "cpu.max"))
if err != nil {
return "", err
}
quota := strings.TrimSpace(string(data))
framework.Logf("pod %s/%s qos=%s cnt %s path %q quota %q", pod.Namespace, pod.Name, pod.Status.QOSClass, ctnName, cgPath, quota)
return quota, nil
}
const (
kubeCgroupRoot = "/sys/fs/cgroup"
)
// example path (systemd, crio, v2):
// /sys/fs/cgroup/ kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod0b7632a2_a56e_4278_987a_22de18008dbe.slice/ crio-conmon-0bc5eac79e3ae7a0c2651f14722aa10fa333eb2325c2ca97da33aa284cda81b0.scope
// example path (cgroup, containerd, v1):
// /sys/fs/cgroup/cpuset kubepods/burstable pod8e414e92-17c2-41de-81c7-0045bba9103b b5791f89a6971bb4a751ffbebf533399c91630aa2906d7c6b5e239f405f3b97a
func makeCgroupPathForPod(pod *v1.Pod, isV2 bool) string {
components := []string{defaultNodeAllocatableCgroup}
if pod.Status.QOSClass != v1.PodQOSGuaranteed {
components = append(components, strings.ToLower(string(pod.Status.QOSClass)))
}
components = append(components, "pod"+string(pod.UID))
cgroupName := cm.NewCgroupName(cm.RootCgroupName, components...)
cgroupFsName := ""
// it's quite ugly to use a global, but it saves us to pass a parameter all across the stack many times
if e2enodeCgroupDriver == "systemd" {
cgroupFsName = cgroupName.ToSystemd()
} else {
cgroupFsName = cgroupName.ToCgroupfs()
}
if !isV2 {
cgroupFsName = filepath.Join("cpuset", cgroupFsName)
}
return filepath.Join(kubeCgroupRoot, cgroupFsName)
}
func makeCgroupPathForContainer(pod *v1.Pod, ctnName string, isInit, isV2 bool) (string, error) {
fullCntID, ok := findContainerIDByName(pod, ctnName, isInit)
if !ok {
return "", fmt.Errorf("cannot find status for container %q", ctnName)
}
cntID, err := parseContainerID(fullCntID)
if err != nil {
return "", err
}
cntPath := ""
if e2enodeCgroupDriver == "systemd" {
cntPath = containerCgroupPathPrefixFromDriver(e2enodeRuntimeName) + "-" + cntID + ".scope"
} else {
cntPath = cntID
}
return filepath.Join(makeCgroupPathForPod(pod, isV2), cntPath), nil
}
func cpusetFileNameFromVersion(isV2 bool) string {
if isV2 {
return "cpuset.cpus.effective"
}
return "cpuset.cpus"
}
func containerCgroupPathPrefixFromDriver(runtimeName string) string {
if runtimeName == "cri-o" {
return "crio"
}
return "cri-containerd"
}
func parseContainerID(fullID string) (string, error) {
_, cntID, found := strings.Cut(fullID, "://")
if !found {
return "", fmt.Errorf("unsupported containerID: %q", fullID)
}
// TODO: should we validate the kind?
return cntID, nil
}
func findContainerIDByName(pod *v1.Pod, ctnName string, isInit bool) (string, bool) {
cntStatuses := pod.Status.ContainerStatuses
if isInit {
cntStatuses = pod.Status.InitContainerStatuses
}
for idx := range cntStatuses {
if cntStatuses[idx].Name == ctnName {
return cntStatuses[idx].ContainerID, true
}
}
return "", false
}
func makeThreadSiblingCPUSet(cpus cpuset.CPUSet) cpuset.CPUSet {
siblingsCPUs := cpuset.New()
for _, cpuID := range cpus.UnsortedList() {
siblingsCPUs = siblingsCPUs.Union(cpuSiblingListFromSysFS(int64(cpuID)))
}
return siblingsCPUs
}
func updateKubeletConfigIfNeeded(ctx context.Context, f *framework.Framework, desiredCfg *kubeletconfig.KubeletConfiguration) *v1.Node {
curCfg, err := getCurrentKubeletConfig(ctx)
framework.ExpectNoError(err)
if equalKubeletConfiguration(curCfg, desiredCfg) {
framework.Logf("Kubelet configuration already compliant, nothing to do")
return getLocalNode(ctx, f)
}
framework.Logf("Updating Kubelet configuration")
updateKubeletConfig(ctx, f, desiredCfg, true)
framework.Logf("Updated Kubelet configuration")
return getLocalNode(ctx, f)
}
func equalKubeletConfiguration(cfgA, cfgB *kubeletconfig.KubeletConfiguration) bool {
cfgA = cfgA.DeepCopy()
cfgB = cfgB.DeepCopy()
// we care only about the payload, force metadata to be uniform
cfgA.TypeMeta = metav1.TypeMeta{}
cfgB.TypeMeta = metav1.TypeMeta{}
return reflect.DeepEqual(cfgA, cfgB)
}
type nodeCPUDetails struct {
Capacity int64
Allocatable int64
Reserved int64
}
func cpuDetailsFromNode(node *v1.Node) nodeCPUDetails {
localNodeCap := node.Status.Capacity
cpuCap := localNodeCap[v1.ResourceCPU]
localNodeAlloc := node.Status.Allocatable
cpuAlloc := localNodeAlloc[v1.ResourceCPU]
cpuRes := cpuCap.DeepCopy()
cpuRes.Sub(cpuAlloc)
// RoundUp reserved CPUs to get only integer cores.
cpuRes.RoundUp(0)
return nodeCPUDetails{
Capacity: cpuCap.Value(),
Allocatable: cpuCap.Value() - cpuRes.Value(),
Reserved: cpuRes.Value(),
}
}
// smtLevelFromSysFS returns the number of symmetrical multi-thread (SMT) execution units the processor provides.
// The most common value on x86_64 is 2 (2 virtual threads/cores per physical core), that would be smtLevel == 2.
// The following are all synonyms: threadsPerCore, smtLevel
// Note: can't find a good enough yet not overly long name, "threadSiblingCount", "smtLevel", "threadsPerCore" are all questionable.
func smtLevelFromSysFS() int {
cpuID := int64(0) // this is just the most likely cpu to be present in a random system. No special meaning besides this.
cpus := cpuSiblingListFromSysFS(cpuID)
return cpus.Size()
}
func cpuSiblingListFromSysFS(cpuID int64) cpuset.CPUSet {
data, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpuID))
framework.ExpectNoError(err)
// how many thread sibling you have = SMT level
// example: 2-way SMT means 2 threads sibling for each thread
cpus, err := cpuset.Parse(strings.TrimSpace(string(data)))
framework.ExpectNoError(err)
return cpus
}
func uncoreCacheIDFromSysFS(cpuID int) (int64, error) {
// expect sysfs path for Uncore Cache ID for each CPU to be:
// /sys/devices/system/cpu/cpu#/cache/index3/id
cacheIDPath := filepath.Join("/sys/devices/system/cpu", fmt.Sprintf("cpu%d", cpuID), "cache", "index3", "id")
cacheIDBytes, err := os.ReadFile(cacheIDPath)
if err != nil {
return 0, fmt.Errorf("failed to read cache ID for CPU %d: %w", cpuID, err)
}
cacheIDStr := strings.TrimSpace(string(cacheIDBytes))
cacheID, err := strconv.ParseInt(cacheIDStr, 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse cache ID for CPU %d: %w", cpuID, err)
}
return cacheID, nil
}
func makeCPUManagerBEPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod {
var containers []v1.Container
for _, ctnAttr := range ctnAttributes {
ctn := v1.Container{
Name: ctnAttr.ctnName,
Image: busyboxImage,
Command: []string{"sh", "-c", ctnAttr.ctnCommand},
VolumeMounts: []v1.VolumeMount{
{
Name: "sysfscgroup",
MountPath: "/sysfscgroup",
},
{
Name: "podinfo",
MountPath: "/podinfo",
},
},
}
containers = append(containers, ctn)
}
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: containers,
Volumes: []v1.Volume{
{
Name: "sysfscgroup",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"},
},
},
{
Name: "podinfo",
VolumeSource: v1.VolumeSource{
DownwardAPI: &v1.DownwardAPIVolumeSource{
Items: []v1.DownwardAPIVolumeFile{
{
Path: "uid",
FieldRef: &v1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "metadata.uid",
},
},
},
},
},
},
},
},
}
}
func requireCGroupV2() {
if e2enodeCgroupV2Enabled {
return
}
e2eskipper.Skipf("Skipping since CgroupV2 not used")
}