From 70191dd21ca9c225faaeff927d788bb0bb2af1a6 Mon Sep 17 00:00:00 2001 From: Charles Wong Date: Thu, 26 Jun 2025 15:27:45 -0500 Subject: [PATCH 1/2] fix uncore cache alignment for odd integer cpus --- pkg/kubelet/cm/cpumanager/cpu_assignment.go | 17 +- .../cm/cpumanager/cpu_assignment_test.go | 2 +- .../cm/cpumanager/policy_static_test.go | 163 ++++++++++++++++++ 3 files changed, 180 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index 27afabefa42..37168c1f621 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -556,13 +556,28 @@ func (a *cpuAccumulator) takeFullUncore() { } func (a *cpuAccumulator) takePartialUncore(uncoreID int) { - numCoresNeeded := a.numCPUsNeeded / a.topo.CPUsPerCore() + // determine the number of cores needed whether SMT/hyperthread is enabled or disabled + numCoresNeeded := (a.numCPUsNeeded + a.topo.CPUsPerCore() - 1) / a.topo.CPUsPerCore() // determine the N number of free cores (physical cpus) within the UncoreCache, then // determine the M number of free cpus (virtual cpus) that correspond with the free cores freeCores := a.details.CoresNeededInUncoreCache(numCoresNeeded, uncoreID) freeCPUs := a.details.CPUsInCores(freeCores.UnsortedList()...) + // when SMT/hyperthread is enabled and remaining cpu requirement is an odd integer value: + // sort the free CPUs that were determined based on the cores that have available cpus. + // if the amount of free cpus is greather than the cpus needed, we can drop the last cpu + // since the odd integer request will only require one out of the two free cpus that + // correspond to the last core + if a.numCPUsNeeded%2 != 0 && a.topo.CPUsPerCore() > 1 { + sortFreeCPUs := freeCPUs.List() + if len(sortFreeCPUs) > a.numCPUsNeeded { + // amount of freeCPUs cannot be < 1 + sortFreeCPUs = sortFreeCPUs[:freeCPUs.Size()-1] + } + freeCPUs = cpuset.New(sortFreeCPUs...) + } + // claim the cpus if the free cpus within the UncoreCache can satisfy the needed cpus claimed := (a.numCPUsNeeded == freeCPUs.Size()) klog.V(4).InfoS("takePartialUncore: trying to claim partial uncore", diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go index 241bfe611b3..8d5e05aa5d4 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go @@ -685,7 +685,7 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) { cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11), 1, "", - cpuset.New(2), + cpuset.New(1), }, { "take first available UncoreCache from first socket", diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index db3a3649b56..ed37008fbb7 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -1679,6 +1679,169 @@ func TestStaticPolicyAddWithUncoreAlignment(t *testing.T) { ), expUncoreCache: cpuset.New(0, 1), // best-effort across uncore cache 0 and 1 }, + { + // odd integer cpu required on smt-disabled processor + description: "odd integer cpu required on smt-disabled", + topo: topoSmallSingleSocketSingleNumaPerSocketNoSMTUncore, // 8 cpus per uncore + numReservedCPUs: 4, + reserved: cpuset.New(0, 1, 2, 3), // note 4 cpus taken from uncore 0 + cpuPolicyOptions: map[string]string{ + FullPCPUsOnlyOption: "true", + PreferAlignByUnCoreCacheOption: "true", + }, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: topoSmallSingleSocketSingleNumaPerSocketNoSMTUncore.CPUDetails.CPUs(), + pod: WithPodUID( + makeMultiContainerPod( + []struct{ request, limit string }{}, // init container + []struct{ request, limit string }{ // app container + {"5000m", "5000m"}, // full uncore cache worth of cpus + }, + ), + "with-single-container", + ), + expCPUAlloc: true, + expCSet: cpuset.New(8, 9, 10, 11, 12), + }, + { + // odd integer cpu requirement on smt-enabled + description: "odd integer required on smt-enabled", + topo: topoSingleSocketSingleNumaPerSocketSMTSmallUncore, // 8 cpus per uncore + numReservedCPUs: 4, + reserved: cpuset.New(0, 1, 64, 65), // note 4 cpus taken from uncore 0 + cpuPolicyOptions: map[string]string{ + PreferAlignByUnCoreCacheOption: "true", + }, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(), + pod: WithPodUID( + makeMultiContainerPod( + []struct{ request, limit string }{}, // init container + []struct{ request, limit string }{ // app container + {"3000m", "3000m"}, + }, + ), + "with-single-container", + ), + expCPUAlloc: true, + expCSet: cpuset.New(2, 3, 66), + }, + { + // odd integer cpu required on smt-enabled and odd integer free cpus available on uncore + description: "odd integer required on odd integer partial uncore", + topo: topoSingleSocketSingleNumaPerSocketSMTSmallUncore, // 8 cpus per uncore + numReservedCPUs: 3, + reserved: cpuset.New(0, 1, 64), // note 3 cpus taken from uncore 0 + cpuPolicyOptions: map[string]string{ + PreferAlignByUnCoreCacheOption: "true", + }, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(), + pod: WithPodUID( + makeMultiContainerPod( + []struct{ request, limit string }{}, // init container + []struct{ request, limit string }{ // app container + {"3000m", "3000m"}, + }, + ), + "with-single-container", + ), + expCPUAlloc: true, + expCSet: cpuset.New(2, 65, 66), + }, + { + // even integer requested on smt-enabled processor with odd integer available cpus on uncore + // even integer cpu containers will not be placed on uncore caches with odd integer free cpus + description: "even integer required on odd integer partial uncore", + topo: topoSingleSocketSingleNumaPerSocketSMTSmallUncore, // 8 cpus per uncore + numReservedCPUs: 3, + reserved: cpuset.New(0, 1, 64), // note 4 cpus taken from uncore 0 + cpuPolicyOptions: map[string]string{ + PreferAlignByUnCoreCacheOption: "true", + }, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(), + pod: WithPodUID( + makeMultiContainerPod( + []struct{ request, limit string }{}, // init container + []struct{ request, limit string }{ // app container + {"4000m", "4000m"}, + }, + ), + "with-single-container", + ), + expCPUAlloc: true, + expCSet: cpuset.New(4, 5, 68, 69), + }, + { + // large odd integer cpu required on smt-enabled + description: "large odd integer required on smt-enabled", + topo: topoSingleSocketSingleNumaPerSocketSMTSmallUncore, // 8 cpus per uncore + numReservedCPUs: 3, + reserved: cpuset.New(0, 1, 64), // note 3 cpus taken from uncore 0 + cpuPolicyOptions: map[string]string{ + PreferAlignByUnCoreCacheOption: "true", + }, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(), + pod: WithPodUID( + makeMultiContainerPod( + []struct{ request, limit string }{}, // init container + []struct{ request, limit string }{ // app container + {"11000m", "11000m"}, + }, + ), + "with-single-container", + ), + expCPUAlloc: true, + expCSet: cpuset.New(2, 65, 66, 4, 5, 6, 7, 68, 69, 70, 71), // full uncore 1 and partial uncore 0 + }, + { + // odd integer cpu required on hyperthread-enabled and monolithic uncore cache + description: "odd integer required on HT monolithic uncore", + topo: topoDualSocketSubNumaPerSocketHTMonolithicUncore, + numReservedCPUs: 3, + reserved: cpuset.New(0, 1, 120), // note 3 cpus taken from uncore 0 + cpuPolicyOptions: map[string]string{ + PreferAlignByUnCoreCacheOption: "true", + }, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(), + pod: WithPodUID( + makeMultiContainerPod( + []struct{ request, limit string }{}, // init container + []struct{ request, limit string }{ // app container + {"5000m", "5000m"}, + }, + ), + "with-single-container", + ), + expCPUAlloc: true, + expCSet: cpuset.New(2, 3, 121, 122, 123), + }, + { + // even integer cpu required on hyperthread-enabled and monolithic uncore cache + description: "even integer required on HT monolithic uncore", + topo: topoDualSocketSubNumaPerSocketHTMonolithicUncore, + numReservedCPUs: 3, + reserved: cpuset.New(0, 1, 120), // note 3 cpus taken from uncore 0 + cpuPolicyOptions: map[string]string{ + PreferAlignByUnCoreCacheOption: "true", + }, + stAssignments: state.ContainerCPUAssignments{}, + stDefaultCPUSet: topoSingleSocketSingleNumaPerSocketSMTSmallUncore.CPUDetails.CPUs(), + pod: WithPodUID( + makeMultiContainerPod( + []struct{ request, limit string }{}, // init container + []struct{ request, limit string }{ // app container + {"4000m", "4000m"}, + }, + ), + "with-single-container", + ), + expCPUAlloc: true, + expCSet: cpuset.New(2, 3, 122, 123), // takeFullCores + }, } for _, testCase := range testCases { From 98c4514eae2148c43470d4d971968b56a06112aa Mon Sep 17 00:00:00 2001 From: Charles Wong Date: Thu, 26 Jun 2025 17:28:10 -0500 Subject: [PATCH 2/2] add e2e_node tests for uncore alignment --- pkg/kubelet/cm/cpumanager/cpu_assignment.go | 5 +- test/e2e_node/cpumanager_test.go | 277 ++++++++++++++++++++ 2 files changed, 281 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go index 37168c1f621..4838056c0de 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_assignment.go +++ b/pkg/kubelet/cm/cpumanager/cpu_assignment.go @@ -570,9 +570,12 @@ func (a *cpuAccumulator) takePartialUncore(uncoreID int) { // since the odd integer request will only require one out of the two free cpus that // correspond to the last core if a.numCPUsNeeded%2 != 0 && a.topo.CPUsPerCore() > 1 { + // we sort freeCPUs to ensure we pack virtual cpu allocations, meaning we allocate + // whole core's worth of cpus as much as possible to reduce smt-misalignment sortFreeCPUs := freeCPUs.List() if len(sortFreeCPUs) > a.numCPUsNeeded { - // amount of freeCPUs cannot be < 1 + // if we are in takePartialUncore, the accumulator is not satisfied after + // takeFullUncore, so freeCPUs.Size() can't be < 1 sortFreeCPUs = sortFreeCPUs[:freeCPUs.Size()-1] } freeCPUs = cpuset.New(sortFreeCPUs...) diff --git a/test/e2e_node/cpumanager_test.go b/test/e2e_node/cpumanager_test.go index d8b96de8e61..e1651caf7ad 100644 --- a/test/e2e_node/cpumanager_test.go +++ b/test/e2e_node/cpumanager_test.go @@ -29,6 +29,7 @@ import ( "path/filepath" "reflect" "regexp" + "strconv" "strings" "time" @@ -126,6 +127,7 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra var reservedCPUs cpuset.CPUSet var onlineCPUs cpuset.CPUSet var smtLevel int + var uncoreGroupSize int // tracks all the pods created by a It() block. Best would be a namespace per It block // TODO: move to a namespace per It block? var podMap map[string]*v1.Pod @@ -145,6 +147,9 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra smtLevel = smtLevelFromSysFS() // this should not change at all, at least during this suite lifetime framework.Logf("SMT level: %d", smtLevel) + uncoreGroupSize = getUncoreCPUGroupSize() + framework.Logf("Uncore Group Size: %d", uncoreGroupSize) + e2enodeCgroupV2Enabled = IsCgroup2UnifiedMode() framework.Logf("cgroup V2 enabled: %v", e2enodeCgroupV2Enabled) @@ -759,6 +764,90 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra }) }) + ginkgo.When("running with Uncore Cache Alignment", ginkgo.Label("prefer-align-cpus-by-uncore-cache"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + + reservedCPUs := cpuset.New(0) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, + enableCPUManagerOptions: true, + options: map[string]string{ + cpumanager.PreferAlignByUnCoreCacheOption: "true", + }, + })) + }) + + ginkgo.It("should admit container asking odd integer amount of cpus", func(ctx context.Context) { + // assume uncore caches's worth of cpus will always be an even integer value + // smallest odd integer cpu request can be 1 cpu + // for meaningful test, minimum allocatable cpu requirement should be: + // minCPUCapacity + reservedCPUs.Size() + 1 CPU allocated + cpuCount := minCPUCapacity + reservedCPUs.Size() + 1 + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount) + + // check if the node processor architecture has split or monolithic uncore cache. + // prefer-align-cpus-by-uncore-cache can be enabled on non-split uncore cache processors + // with no change to default static behavior + allocatableCPUs := cpuDetailsFromNode(getLocalNode(ctx, f)).Allocatable + hasSplitUncore := (allocatableCPUs > int64(uncoreGroupSize)) + + if hasSplitUncore { + // create a container that requires one less cpu than a full uncore cache's worth of cpus + // assume total shared CPUs of a single uncore cache will always be an even integer + cpuRequest := fmt.Sprintf("%d000m", (uncoreGroupSize - 1)) + ginkgo.By(fmt.Sprintf("creating the testing pod cpuRequest=%v", cpuRequest)) + pod := makeCPUManagerPod("gu-pod", []ctnAttribute{ + { + ctnName: "gu-container-pos", + cpuRequest: cpuRequest, + cpuLimit: cpuRequest, + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + ginkgo.By("validating each container in the testing pod") + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name)) + + // expect allocated CPUs to be able to fit on uncore cache ID equal to 0 + expUncoreCPUSet, err := uncoreCPUSetFromSysFS(0) + framework.ExpectNoError(err, "cannot determine shared cpus for uncore cache on node") + gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(cnt.Name, expUncoreCPUSet)) + } + } else { + // for node with monolithic uncore cache processor + // uncoreGroupSize will be socket's worth of CPUs + // subtract (minCPUCapacity + 1) CPU resource constraint + cpuRequest := fmt.Sprintf("%d000m", (uncoreGroupSize - (minCPUCapacity + 1))) + ginkgo.By(fmt.Sprintf("creating the testing pod cpuRequest=%v", cpuRequest)) + pod := makeCPUManagerPod("gu-pod", []ctnAttribute{ + { + ctnName: "gu-container-pos", + cpuRequest: cpuRequest, + cpuLimit: cpuRequest, + }, + }) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + ginkgo.By("validating each container in the testing pod") + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name)) + + // expect allocated CPUs to be able to fit on uncore cache ID equal to 0 + expUncoreCPUSet, err := uncoreCPUSetFromSysFS(0) + framework.ExpectNoError(err, "cannot determine shared cpus for uncore cache on node") + gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(cnt.Name, expUncoreCPUSet)) + } + } + }) + }) + ginkgo.When("checking the compatibility between options", func() { // please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely. ginkgo.Context("SMT Alignment and strict CPU reservation", ginkgo.Label("smt-alignment", "strict-cpu-reservation"), func() { @@ -862,6 +951,135 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra }) }) + // please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely. + ginkgo.Context("SMT Alignment and Uncore Cache Alignment", ginkgo.Label("smt-alignment", "prefer-align-cpus-by-uncore-cache"), func() { + ginkgo.BeforeEach(func(ctx context.Context) { + // strict SMT alignment is trivially verified and granted on non-SMT systems + if smtLevel < minSMTLevel { + e2eskipper.Skipf("Skipping CPU Manager %q tests since SMT disabled", cpumanager.FullPCPUsOnlyOption) + } + reservedCPUs = cpuset.New(0) + }) + + ginkgo.It("should assign packed CPUs with prefer-align-cpus-by-uncore-cache disabled and pcpu-only policy options enabled", func(ctx context.Context) { + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, + enableCPUManagerOptions: true, + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.PreferAlignByUnCoreCacheOption: "false", + }, + })) + + ctnAttrs := []ctnAttribute{ + { + ctnName: "test-gu-container-uncore-cache-alignment-disabled", + cpuRequest: "2000m", + cpuLimit: "2000m", + }, + } + pod := makeCPUManagerPod("test-pod-uncore-cache-alignment-disabled", ctnAttrs) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + ginkgo.By("validating each container in the testing pod") + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name)) + + gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel)) + gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name)) + } + }) + + ginkgo.It("should assign CPUs aligned to uncore caches with prefer-align-cpus-by-uncore-cache and pcpu-only policy options enabled", func(ctx context.Context) { + + skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), smtLevel) + + updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + reservedSystemCPUs: reservedCPUs, + enableCPUManagerOptions: true, + options: map[string]string{ + cpumanager.FullPCPUsOnlyOption: "true", + cpumanager.PreferAlignByUnCoreCacheOption: "true", + }, + })) + + // check if the node processor architecture has split or monolithic uncore cache. + // prefer-align-cpus-by-uncore-cache can be enabled on non-split uncore cache processors + // with no change to default static behavior + allocatableCPUs := cpuDetailsFromNode(getLocalNode(ctx, f)).Allocatable + hasSplitUncore := (allocatableCPUs > int64(uncoreGroupSize)) + // hasSplitUncore := (nodeCPcpuDetails.Allocatable > int64(uncoreGroupSize)) + + if hasSplitUncore { + // for node with split uncore cache processor + // create a pod that requires a full uncore cache worth of CPUs + ctnAttrs := []ctnAttribute{ + { + ctnName: "test-gu-container-align-cpus-by-uncore-cache-on-split-uncore", + cpuRequest: fmt.Sprintf("%d", uncoreGroupSize), + cpuLimit: fmt.Sprintf("%d", uncoreGroupSize), + }, + } + pod := makeCPUManagerPod("test-pod-align-cpus-by-uncore-cache", ctnAttrs) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + // 'prefer-align-cpus-by-uncore-cache' policy options will attempt at best-effort to allocate cpus + // so that distribution across uncore caches is minimized. Since the test container is requesting a full + // uncore cache worth of cpus and CPU0 is part of the reserved CPUset and not allocatable, the policy will attempt + // to allocate cpus from the next available uncore cache by numerical order (uncore cache ID equal to 1) + + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name)) + + gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel)) + cpus, err := getContainerAllowedCPUs(pod, cnt.Name, false) + framework.ExpectNoError(err, "cannot get cpus allocated to pod %s/%s cnt %s", pod.Namespace, pod.Name, cnt.Name) + + siblingsCPUs := makeThreadSiblingCPUSet(cpus) + gomega.Expect(pod).To(HaveContainerCPUsEqualTo(cnt.Name, siblingsCPUs)) + + // expect full uncore cache worth of cpus to be assigned to uncoreCacheID equal to 1 + // since CPU0 is part of reserved CPUset, resulting in insufficient CPUs from + // uncoreCacheID equal to 0 + expUncoreCPUSet, err := uncoreCPUSetFromSysFS(1) + framework.ExpectNoError(err, "cannot determine shared cpus for uncore cache on node") + gomega.Expect(pod).To(HaveContainerCPUsEqualTo(cnt.Name, expUncoreCPUSet)) + } + } else { + // for node with monolithic uncore cache processor + // expect default static behavior with pcpu-only policy enabled + // and prefer-align-cpus-by-uncore-cache enabled + ctnAttrs := []ctnAttribute{ + { + ctnName: "test-gu-container-align-cpus-by-uncore-cache-on-mono-uncore", + cpuRequest: "2000m", + cpuLimit: "2000m", + }, + } + pod := makeCPUManagerPod("test-pod-align-cpus-by-uncore-cache", ctnAttrs) + ginkgo.By("creating the test pod") + pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) + podMap[string(pod.UID)] = pod + + ginkgo.By("validating each container in the testing pod") + for _, cnt := range pod.Spec.Containers { + ginkgo.By(fmt.Sprintf("validating the container %s on pod %s", cnt.Name, pod.Name)) + + gomega.Expect(pod).To(HaveContainerCPUsAlignedTo(cnt.Name, smtLevel)) + gomega.Expect(pod).To(HaveContainerCPUsThreadSiblings(cnt.Name)) + } + } + }) + }) + // please avoid nesting `BeforeEach` as much as possible. Ideally avoid completely. ginkgo.Context("SMT Alignment and distribution across NUMA", ginkgo.Label("smt-alignment", "distribute-cpus-across-numa"), func() { ginkgo.BeforeEach(func(ctx context.Context) { @@ -1771,6 +1989,65 @@ func cpuSiblingListFromSysFS(cpuID int64) cpuset.CPUSet { return cpus } +func uncoreCPUSetFromSysFS(uncoreID int64) (cpuset.CPUSet, error) { + basePath := "/sys/devices/system/cpu" + result := cpuset.New() + entries, err := os.ReadDir(basePath) + // return error if base path directory does not exist + if err != nil { + return result, fmt.Errorf("failed to read %s: %w", basePath, err) + } + // scan each CPU in sysfs for the following path: + // /sys/devices/system/cpu/cpu# + for _, entry := range entries { + // expect sysfs path for each CPU to be /sys/devices/system/cpu/cpu# + // ignore directories that do not match this format + if !entry.IsDir() || !strings.HasPrefix(entry.Name(), "cpu") { + continue + } + + // skip non-numeric 'cpu' directories meaning there is not a trailing + // cpu ID for the directory (example: skip 'cpufreq') + cpuNumStr := strings.TrimPrefix(entry.Name(), "cpu") + if _, err := strconv.Atoi(cpuNumStr); err != nil { + continue + } + + // determine if the input uncoreID matches the cpu's index3 cache ID found at: + // /sys/devices/system/cpu/cpu#/cache/index3/id + uncoreCacheIDPath := filepath.Join(basePath, entry.Name(), "cache", "index3", "id") + sysFSUncoreIDByte, err := os.ReadFile(uncoreCacheIDPath) + // return error if sysfs does not contain index3 cache ID + if err != nil { + return result, fmt.Errorf("failed to read %s: %w", uncoreCacheIDPath, err) + } + sysFSUncoreIDStr := strings.TrimSpace(string(sysFSUncoreIDByte)) + sysFSUncoreID, err := strconv.ParseInt(sysFSUncoreIDStr, 10, 64) + // if output of /sys/devices/system/cpu/cpu#/cache/index3/id does not exist or + // does not match uncoreID input, skip the cpu + if err != nil || sysFSUncoreID != uncoreID { + continue + } + + // once a cpu's index3 cache ID is matched to the input uncoreID + // parse the shared cpus for uncoreID (sysfs index3 cache ID) from + // /sys/devices/system/cpu/cpu#/cache/index3/shared_cpu_list + // and return the cpuset + uncoreSharedCPUListPath := filepath.Join(basePath, entry.Name(), "cache", "index3", "shared_cpu_list") + uncoreSharedCPUBytes, err := os.ReadFile(uncoreSharedCPUListPath) + if err != nil { + return result, fmt.Errorf("failed to read shared_cpu_list: %w", err) + } + uncoreSharedCPUStr := strings.TrimSpace(string(uncoreSharedCPUBytes)) + uncoreSharedCPU, err := cpuset.Parse(uncoreSharedCPUStr) + if err != nil { + return result, fmt.Errorf("failed to parse CPUSet from %s: %w", uncoreSharedCPUStr, err) + } + return uncoreSharedCPU, nil + } + return result, fmt.Errorf("no CPUs found with cache ID %d", uncoreID) +} + func makeCPUManagerBEPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { var containers []v1.Container for _, ctnAttr := range ctnAttributes {