mirror of
https://github.com/outbackdingo/kubernetes.git
synced 2026-01-27 10:19:35 +00:00
Merge pull request #131850 from wongchar/odd-integer-cpu
cpumanager: uncorecache alignment for odd integer cpus
This commit is contained in:
@@ -556,13 +556,31 @@ func (a *cpuAccumulator) takeFullUncore() {
|
||||
}
|
||||
|
||||
func (a *cpuAccumulator) takePartialUncore(uncoreID int) {
|
||||
numCoresNeeded := a.numCPUsNeeded / a.topo.CPUsPerCore()
|
||||
// determine the number of cores needed whether SMT/hyperthread is enabled or disabled
|
||||
numCoresNeeded := (a.numCPUsNeeded + a.topo.CPUsPerCore() - 1) / a.topo.CPUsPerCore()
|
||||
|
||||
// determine the N number of free cores (physical cpus) within the UncoreCache, then
|
||||
// determine the M number of free cpus (virtual cpus) that correspond with the free cores
|
||||
freeCores := a.details.CoresNeededInUncoreCache(numCoresNeeded, uncoreID)
|
||||
freeCPUs := a.details.CPUsInCores(freeCores.UnsortedList()...)
|
||||
|
||||
// when SMT/hyperthread is enabled and remaining cpu requirement is an odd integer value:
|
||||
// sort the free CPUs that were determined based on the cores that have available cpus.
|
||||
// if the amount of free cpus is greather than the cpus needed, we can drop the last cpu
|
||||
// since the odd integer request will only require one out of the two free cpus that
|
||||
// correspond to the last core
|
||||
if a.numCPUsNeeded%2 != 0 && a.topo.CPUsPerCore() > 1 {
|
||||
// we sort freeCPUs to ensure we pack virtual cpu allocations, meaning we allocate
|
||||
// whole core's worth of cpus as much as possible to reduce smt-misalignment
|
||||
sortFreeCPUs := freeCPUs.List()
|
||||
if len(sortFreeCPUs) > a.numCPUsNeeded {
|
||||
// if we are in takePartialUncore, the accumulator is not satisfied after
|
||||
// takeFullUncore, so freeCPUs.Size() can't be < 1
|
||||
sortFreeCPUs = sortFreeCPUs[:freeCPUs.Size()-1]
|
||||
}
|
||||
freeCPUs = cpuset.New(sortFreeCPUs...)
|
||||
}
|
||||
|
||||
// claim the cpus if the free cpus within the UncoreCache can satisfy the needed cpus
|
||||
claimed := (a.numCPUsNeeded == freeCPUs.Size())
|
||||
klog.V(4).InfoS("takePartialUncore: trying to claim partial uncore",
|
||||
|
||||
@@ -685,7 +685,7 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) {
|
||||
cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||
1,
|
||||
"",
|
||||
cpuset.New(2),
|
||||
cpuset.New(1),
|
||||
},
|
||||
{
|
||||
"take first available UncoreCache from first socket",
|
||||
|
||||
@@ -1679,6 +1679,169 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) {
|
||||
),
|
||||
expUncoreCache: cpuset.New(0, 1), // best-effort across uncore cache 0 and 1
|
||||
},
|
||||
{
|
||||
// odd integer cpu required on smt-disabled processor
|
||||
description: "odd integer cpu required on smt-disabled",
|
||||
topo: topoSmallSingleSocketSingleNumaPerSocketNoSMTUncore, // 8 cpus per uncore
|
||||
numReservedCPUs: 4,
|
||||
reserved: cpuset.New(0, 1, 2, 3), // note 4 cpus taken from uncore 0
|
||||
cpuPolicyOptions: map[string]string{
|
||||
FullPCPUsOnlyOption: "true",
|
||||
PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: topoSmallSingleSocketSingleNumaPerSocketNoSMTUncore.CPUDetails.CPUs(),
|
||||
pod: WithPodUID(
|
||||
makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{}, // init container
|
||||
[]struct{ request, limit string }{ // app container
|
||||
{"5000m", "5000m"}, // full uncore cache worth of cpus
|
||||
},
|
||||
),
|
||||
"with-single-container",
|
||||
),
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(8, 9, 10, 11, 12),
|
||||
},
|
||||
{
|
||||
// odd integer cpu requirement on smt-enabled
|
||||
description: "odd integer required on smt-enabled",
|
||||
topo: topoSingleSocketSingleNumaPerSocketSMTSmallUncore, // 8 cpus per uncore
|
||||
numReservedCPUs: 4,
|
||||
reserved: cpuset.New(0, 1, 64, 65), // note 4 cpus taken from uncore 0
|
||||
cpuPolicyOptions: map[string]string{
|
||||
PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(),
|
||||
pod: WithPodUID(
|
||||
makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{}, // init container
|
||||
[]struct{ request, limit string }{ // app container
|
||||
{"3000m", "3000m"},
|
||||
},
|
||||
),
|
||||
"with-single-container",
|
||||
),
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(2, 3, 66),
|
||||
},
|
||||
{
|
||||
// odd integer cpu required on smt-enabled and odd integer free cpus available on uncore
|
||||
description: "odd integer required on odd integer partial uncore",
|
||||
topo: topoSingleSocketSingleNumaPerSocketSMTSmallUncore, // 8 cpus per uncore
|
||||
numReservedCPUs: 3,
|
||||
reserved: cpuset.New(0, 1, 64), // note 3 cpus taken from uncore 0
|
||||
cpuPolicyOptions: map[string]string{
|
||||
PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(),
|
||||
pod: WithPodUID(
|
||||
makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{}, // init container
|
||||
[]struct{ request, limit string }{ // app container
|
||||
{"3000m", "3000m"},
|
||||
},
|
||||
),
|
||||
"with-single-container",
|
||||
),
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(2, 65, 66),
|
||||
},
|
||||
{
|
||||
// even integer requested on smt-enabled processor with odd integer available cpus on uncore
|
||||
// even integer cpu containers will not be placed on uncore caches with odd integer free cpus
|
||||
description: "even integer required on odd integer partial uncore",
|
||||
topo: topoSingleSocketSingleNumaPerSocketSMTSmallUncore, // 8 cpus per uncore
|
||||
numReservedCPUs: 3,
|
||||
reserved: cpuset.New(0, 1, 64), // note 4 cpus taken from uncore 0
|
||||
cpuPolicyOptions: map[string]string{
|
||||
PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(),
|
||||
pod: WithPodUID(
|
||||
makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{}, // init container
|
||||
[]struct{ request, limit string }{ // app container
|
||||
{"4000m", "4000m"},
|
||||
},
|
||||
),
|
||||
"with-single-container",
|
||||
),
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(4, 5, 68, 69),
|
||||
},
|
||||
{
|
||||
// large odd integer cpu required on smt-enabled
|
||||
description: "large odd integer required on smt-enabled",
|
||||
topo: topoSingleSocketSingleNumaPerSocketSMTSmallUncore, // 8 cpus per uncore
|
||||
numReservedCPUs: 3,
|
||||
reserved: cpuset.New(0, 1, 64), // note 3 cpus taken from uncore 0
|
||||
cpuPolicyOptions: map[string]string{
|
||||
PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(),
|
||||
pod: WithPodUID(
|
||||
makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{}, // init container
|
||||
[]struct{ request, limit string }{ // app container
|
||||
{"11000m", "11000m"},
|
||||
},
|
||||
),
|
||||
"with-single-container",
|
||||
),
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(2, 65, 66, 4, 5, 6, 7, 68, 69, 70, 71), // full uncore 1 and partial uncore 0
|
||||
},
|
||||
{
|
||||
// odd integer cpu required on hyperthread-enabled and monolithic uncore cache
|
||||
description: "odd integer required on HT monolithic uncore",
|
||||
topo: topoDualSocketSubNumaPerSocketHTMonolithicUncore,
|
||||
numReservedCPUs: 3,
|
||||
reserved: cpuset.New(0, 1, 120), // note 3 cpus taken from uncore 0
|
||||
cpuPolicyOptions: map[string]string{
|
||||
PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(),
|
||||
pod: WithPodUID(
|
||||
makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{}, // init container
|
||||
[]struct{ request, limit string }{ // app container
|
||||
{"5000m", "5000m"},
|
||||
},
|
||||
),
|
||||
"with-single-container",
|
||||
),
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(2, 3, 121, 122, 123),
|
||||
},
|
||||
{
|
||||
// even integer cpu required on hyperthread-enabled and monolithic uncore cache
|
||||
description: "even integer required on HT monolithic uncore",
|
||||
topo: topoDualSocketSubNumaPerSocketHTMonolithicUncore,
|
||||
numReservedCPUs: 3,
|
||||
reserved: cpuset.New(0, 1, 120), // note 3 cpus taken from uncore 0
|
||||
cpuPolicyOptions: map[string]string{
|
||||
PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(),
|
||||
pod: WithPodUID(
|
||||
makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{}, // init container
|
||||
[]struct{ request, limit string }{ // app container
|
||||
{"4000m", "4000m"},
|
||||
},
|
||||
),
|
||||
"with-single-container",
|
||||
),
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(2, 3, 122, 123), // takeFullCores
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -126,6 +127,7 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra
|
||||
var reservedCPUs cpuset.CPUSet
|
||||
var onlineCPUs cpuset.CPUSet
|
||||
var smtLevel int
|
||||
var uncoreGroupSize int
|
||||
// tracks all the pods created by a It() block. Best would be a namespace per It block
|
||||
// TODO: move to a namespace per It block?
|
||||
var podMap map[string]*v1.Pod
|
||||
@@ -145,6 +147,9 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra
|
||||
smtLevel = smtLevelFromSysFS() // this should not change at all, at least during this suite lifetime
|
||||
framework.Logf("SMT level: %d", smtLevel)
|
||||
|
||||
uncoreGroupSize = getUncoreCPUGroupSize()
|
||||
framework.Logf("Uncore Group Size: %d", uncoreGroupSize)
|
||||
|
||||
e2enodeCgroupV2Enabled = IsCgroup2UnifiedMode()
|
||||
framework.Logf("cgroup V2 enabled: %v", e2enodeCgroupV2Enabled)
|
||||
|
||||
@@ -759,6 +764,90 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.When("running with Uncore Cache Alignment", ginkgo.Label("prefer-align-cpus-by-uncore-cache"), func() {
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
|
||||
reservedCPUs := cpuset.New(0)
|
||||
|
||||
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
|
||||
policyName: string(cpumanager.PolicyStatic),
|
||||
reservedSystemCPUs: reservedCPUs,
|
||||
enableCPUManagerOptions: true,
|
||||
options: map[string]string{
|
||||
cpumanager.PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
}))
|
||||
})
|
||||
|
||||
ginkgo.It("should admit container asking odd integer amount of cpus", func(ctx context.Context) {
|
||||
// assume uncore caches's worth of cpus will always be an even integer value
|
||||
// smallest odd integer cpu request can be 1 cpu
|
||||
// for meaningful test, minimum allocatable cpu requirement should be:
|
||||
// minCPUCapacity + reservedCPUs.Size() + 1 CPU allocated
|
||||
cpuCount := minCPUCapacity + reservedCPUs.Size() + 1
|
||||
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
|
||||
|
||||
// check if the node processor architecture has split or monolithic uncore cache.
|
||||
// prefer-align-cpus-by-uncore-cache can be enabled on non-split uncore cache processors
|
||||
// with no change to default static behavior
|
||||
allocatableCPUs := cpuDetailsFromNode(getLocalNode(ctx, f)).Allocatable
|
||||
hasSplitUncore := (allocatableCPUs > int64(uncoreGroupSize))
|
||||
|
||||
if hasSplitUncore {
|
||||
// create a container that requires one less cpu than a full uncore cache's worth of cpus
|
||||
// assume total shared CPUs of a single uncore cache will always be an even integer
|
||||
cpuRequest := fmt.Sprintf("%d000m", (uncoreGroupSize - 1))
|
||||
ginkgo.By(fmt.Sprintf("creating the testing pod cpuRequest=%v", cpuRequest))
|
||||
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
|
||||
{
|
||||
ctnName: "gu-container-pos",
|
||||
cpuRequest: cpuRequest,
|
||||
cpuLimit: cpuRequest,
|
||||
},
|
||||
})
|
||||
ginkgo.By("creating the test pod")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
|
||||
ginkgo.By("validating each container in the testing pod")
|
||||
for _, cnt := range pod.Spec.Containers {
|
||||
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
|
||||
|
||||
// expect allocated CPUs to be able to fit on uncore cache ID equal to 0
|
||||
expUncoreCPUSet, err := uncoreCPUSetFromSysFS(0)
|
||||
framework.ExpectNoError(err, "cannot determine shared cpus for uncore cache on node")
|
||||
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(cnt.Name, expUncoreCPUSet))
|
||||
}
|
||||
} else {
|
||||
// for node with monolithic uncore cache processor
|
||||
// uncoreGroupSize will be socket's worth of CPUs
|
||||
// subtract (minCPUCapacity + 1) CPU resource constraint
|
||||
cpuRequest := fmt.Sprintf("%d000m", (uncoreGroupSize - (minCPUCapacity + 1)))
|
||||
ginkgo.By(fmt.Sprintf("creating the testing pod cpuRequest=%v", cpuRequest))
|
||||
pod := makeCPUManagerPod("gu-pod", []ctnAttribute{
|
||||
{
|
||||
ctnName: "gu-container-pos",
|
||||
cpuRequest: cpuRequest,
|
||||
cpuLimit: cpuRequest,
|
||||
},
|
||||
})
|
||||
ginkgo.By("creating the test pod")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
|
||||
ginkgo.By("validating each container in the testing pod")
|
||||
for _, cnt := range pod.Spec.Containers {
|
||||
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
|
||||
|
||||
// expect allocated CPUs to be able to fit on uncore cache ID equal to 0
|
||||
expUncoreCPUSet, err := uncoreCPUSetFromSysFS(0)
|
||||
framework.ExpectNoError(err, "cannot determine shared cpus for uncore cache on node")
|
||||
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(cnt.Name, expUncoreCPUSet))
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.When("checking the compatibility between options", func() {
|
||||
// please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely.
|
||||
ginkgo.Context("SMT Alignment and strict CPU reservation", ginkgo.Label("smt-alignment", "strict-cpu-reservation"), func() {
|
||||
@@ -862,6 +951,135 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra
|
||||
})
|
||||
})
|
||||
|
||||
// please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely.
|
||||
ginkgo.Context("SMT Alignment and Uncore Cache Alignment", ginkgo.Label("smt-alignment", "prefer-align-cpus-by-uncore-cache"), func() {
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
// strict SMT alignment is trivially verified and granted on non-SMT systems
|
||||
if smtLevel < minSMTLevel {
|
||||
e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption)
|
||||
}
|
||||
reservedCPUs = cpuset.New(0)
|
||||
})
|
||||
|
||||
ginkgo.It("should assign packed CPUs with prefer-align-cpus-by-uncore-cache disabled and pcpu-only policy options enabled", func(ctx context.Context) {
|
||||
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel)
|
||||
|
||||
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
|
||||
policyName: string(cpumanager.PolicyStatic),
|
||||
reservedSystemCPUs: reservedCPUs,
|
||||
enableCPUManagerOptions: true,
|
||||
options: map[string]string{
|
||||
cpumanager.FullPCPUsOnlyOption: "true",
|
||||
cpumanager.PreferAlignByUnCoreCacheOption: "false",
|
||||
},
|
||||
}))
|
||||
|
||||
ctnAttrs := []ctnAttribute{
|
||||
{
|
||||
ctnName: "test-gu-container-uncore-cache-alignment-disabled",
|
||||
cpuRequest: "2000m",
|
||||
cpuLimit: "2000m",
|
||||
},
|
||||
}
|
||||
pod := makeCPUManagerPod("test-pod-uncore-cache-alignment-disabled", ctnAttrs)
|
||||
ginkgo.By("creating the test pod")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
|
||||
ginkgo.By("validating each container in the testing pod")
|
||||
for _, cnt := range pod.Spec.Containers {
|
||||
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
|
||||
|
||||
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
|
||||
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name))
|
||||
}
|
||||
})
|
||||
|
||||
ginkgo.It("should assign CPUs aligned to uncore caches with prefer-align-cpus-by-uncore-cache and pcpu-only policy options enabled", func(ctx context.Context) {
|
||||
|
||||
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel)
|
||||
|
||||
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
|
||||
policyName: string(cpumanager.PolicyStatic),
|
||||
reservedSystemCPUs: reservedCPUs,
|
||||
enableCPUManagerOptions: true,
|
||||
options: map[string]string{
|
||||
cpumanager.FullPCPUsOnlyOption: "true",
|
||||
cpumanager.PreferAlignByUnCoreCacheOption: "true",
|
||||
},
|
||||
}))
|
||||
|
||||
// check if the node processor architecture has split or monolithic uncore cache.
|
||||
// prefer-align-cpus-by-uncore-cache can be enabled on non-split uncore cache processors
|
||||
// with no change to default static behavior
|
||||
allocatableCPUs := cpuDetailsFromNode(getLocalNode(ctx, f)).Allocatable
|
||||
hasSplitUncore := (allocatableCPUs > int64(uncoreGroupSize))
|
||||
// hasSplitUncore := (nodeCPcpuDetails.Allocatable > int64(uncoreGroupSize))
|
||||
|
||||
if hasSplitUncore {
|
||||
// for node with split uncore cache processor
|
||||
// create a pod that requires a full uncore cache worth of CPUs
|
||||
ctnAttrs := []ctnAttribute{
|
||||
{
|
||||
ctnName: "test-gu-container-align-cpus-by-uncore-cache-on-split-uncore",
|
||||
cpuRequest: fmt.Sprintf("%d", uncoreGroupSize),
|
||||
cpuLimit: fmt.Sprintf("%d", uncoreGroupSize),
|
||||
},
|
||||
}
|
||||
pod := makeCPUManagerPod("test-pod-align-cpus-by-uncore-cache", ctnAttrs)
|
||||
ginkgo.By("creating the test pod")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
|
||||
// 'prefer-align-cpus-by-uncore-cache' policy options will attempt at best-effort to allocate cpus
|
||||
// so that distribution across uncore caches is minimized. Since the test container is requesting a full
|
||||
// uncore cache worth of cpus and CPU0 is part of the reserved CPUset and not allocatable, the policy will attempt
|
||||
// to allocate cpus from the next available uncore cache by numerical order (uncore cache ID equal to 1)
|
||||
|
||||
for _, cnt := range pod.Spec.Containers {
|
||||
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
|
||||
|
||||
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
|
||||
cpus, err := getContainerAllowedCPUs(pod, cnt.Name, false)
|
||||
framework.ExpectNoError(err, "cannot get cpus allocated to pod %s/%s cnt %s", pod.Namespace, pod.Name, cnt.Name)
|
||||
|
||||
siblingsCPUs := makeThreadSiblingCPUSet(cpus)
|
||||
gomega.Expect(pod).To(HaveContainerCPUsEqualTo(cnt.Name, siblingsCPUs))
|
||||
|
||||
// expect full uncore cache worth of cpus to be assigned to uncoreCacheID equal to 1
|
||||
// since CPU0 is part of reserved CPUset, resulting in insufficient CPUs from
|
||||
// uncoreCacheID equal to 0
|
||||
expUncoreCPUSet, err := uncoreCPUSetFromSysFS(1)
|
||||
framework.ExpectNoError(err, "cannot determine shared cpus for uncore cache on node")
|
||||
gomega.Expect(pod).To(HaveContainerCPUsEqualTo(cnt.Name, expUncoreCPUSet))
|
||||
}
|
||||
} else {
|
||||
// for node with monolithic uncore cache processor
|
||||
// expect default static behavior with pcpu-only policy enabled
|
||||
// and prefer-align-cpus-by-uncore-cache enabled
|
||||
ctnAttrs := []ctnAttribute{
|
||||
{
|
||||
ctnName: "test-gu-container-align-cpus-by-uncore-cache-on-mono-uncore",
|
||||
cpuRequest: "2000m",
|
||||
cpuLimit: "2000m",
|
||||
},
|
||||
}
|
||||
pod := makeCPUManagerPod("test-pod-align-cpus-by-uncore-cache", ctnAttrs)
|
||||
ginkgo.By("creating the test pod")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
|
||||
ginkgo.By("validating each container in the testing pod")
|
||||
for _, cnt := range pod.Spec.Containers {
|
||||
ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name))
|
||||
|
||||
gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel))
|
||||
gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name))
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
// please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely.
|
||||
ginkgo.Context("SMT Alignment and distribution across NUMA", ginkgo.Label("smt-alignment", "distribute-cpus-across-numa"), func() {
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
@@ -1771,6 +1989,65 @@ func cpuSiblingListFromSysFS(cpuID int64) cpuset.CPUSet {
|
||||
return cpus
|
||||
}
|
||||
|
||||
func uncoreCPUSetFromSysFS(uncoreID int64) (cpuset.CPUSet, error) {
|
||||
basePath := "/sys/devices/system/cpu"
|
||||
result := cpuset.New()
|
||||
entries, err := os.ReadDir(basePath)
|
||||
// return error if base path directory does not exist
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to read %s: %w", basePath, err)
|
||||
}
|
||||
// scan each CPU in sysfs for the following path:
|
||||
// /sys/devices/system/cpu/cpu#
|
||||
for _, entry := range entries {
|
||||
// expect sysfs path for each CPU to be /sys/devices/system/cpu/cpu#
|
||||
// ignore directories that do not match this format
|
||||
if !entry.IsDir() || !strings.HasPrefix(entry.Name(), "cpu") {
|
||||
continue
|
||||
}
|
||||
|
||||
// skip non-numeric 'cpu' directories meaning there is not a trailing
|
||||
// cpu ID for the directory (example: skip 'cpufreq')
|
||||
cpuNumStr := strings.TrimPrefix(entry.Name(), "cpu")
|
||||
if _, err := strconv.Atoi(cpuNumStr); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// determine if the input uncoreID matches the cpu's index3 cache ID found at:
|
||||
// /sys/devices/system/cpu/cpu#/cache/index3/id
|
||||
uncoreCacheIDPath := filepath.Join(basePath, entry.Name(), "cache", "index3", "id")
|
||||
sysFSUncoreIDByte, err := os.ReadFile(uncoreCacheIDPath)
|
||||
// return error if sysfs does not contain index3 cache ID
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to read %s: %w", uncoreCacheIDPath, err)
|
||||
}
|
||||
sysFSUncoreIDStr := strings.TrimSpace(string(sysFSUncoreIDByte))
|
||||
sysFSUncoreID, err := strconv.ParseInt(sysFSUncoreIDStr, 10, 64)
|
||||
// if output of /sys/devices/system/cpu/cpu#/cache/index3/id does not exist or
|
||||
// does not match uncoreID input, skip the cpu
|
||||
if err != nil || sysFSUncoreID != uncoreID {
|
||||
continue
|
||||
}
|
||||
|
||||
// once a cpu's index3 cache ID is matched to the input uncoreID
|
||||
// parse the shared cpus for uncoreID (sysfs index3 cache ID) from
|
||||
// /sys/devices/system/cpu/cpu#/cache/index3/shared_cpu_list
|
||||
// and return the cpuset
|
||||
uncoreSharedCPUListPath := filepath.Join(basePath, entry.Name(), "cache", "index3", "shared_cpu_list")
|
||||
uncoreSharedCPUBytes, err := os.ReadFile(uncoreSharedCPUListPath)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to read shared_cpu_list: %w", err)
|
||||
}
|
||||
uncoreSharedCPUStr := strings.TrimSpace(string(uncoreSharedCPUBytes))
|
||||
uncoreSharedCPU, err := cpuset.Parse(uncoreSharedCPUStr)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to parse CPUSet from %s: %w", uncoreSharedCPUStr, err)
|
||||
}
|
||||
return uncoreSharedCPU, nil
|
||||
}
|
||||
return result, fmt.Errorf("no CPUs found with cache ID %d", uncoreID)
|
||||
}
|
||||
|
||||
func makeCPUManagerBEPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod {
|
||||
var containers []v1.Container
|
||||
for _, ctnAttr := range ctnAttributes {
|
||||
|
||||
Reference in New Issue
Block a user