mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 02:08:13 +00:00 
			
		
		
		
	Merge pull request #129950 from ffromani/alignment-error-detail-metrics
node: metrics for alignment failures
This commit is contained in:
		| @@ -325,13 +325,15 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai | |||||||
| 	defer func() { | 	defer func() { | ||||||
| 		if rerr != nil { | 		if rerr != nil { | ||||||
| 			metrics.CPUManagerPinningErrorsTotal.Inc() | 			metrics.CPUManagerPinningErrorsTotal.Inc() | ||||||
|  | 			if p.options.FullPhysicalCPUsOnly { | ||||||
|  | 				metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() | ||||||
|  | 			} | ||||||
| 			return | 			return | ||||||
| 		} | 		} | ||||||
| 		if !p.options.FullPhysicalCPUsOnly { | 		if p.options.FullPhysicalCPUsOnly { | ||||||
| 			// increment only if we know we allocate aligned resources | 			// increment only if we know we allocate aligned resources | ||||||
| 			return | 			metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() | ||||||
| 		} | 		} | ||||||
| 		metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc() |  | ||||||
| 	}() | 	}() | ||||||
|  |  | ||||||
| 	if p.options.FullPhysicalCPUsOnly { | 	if p.options.FullPhysicalCPUsOnly { | ||||||
| @@ -752,6 +754,7 @@ func (p *staticPolicy) getAlignedCPUs(numaAffinity bitmask.BitMask, allocatableC | |||||||
| func (p *staticPolicy) initializeMetrics(s state.State) { | func (p *staticPolicy) initializeMetrics(s state.State) { | ||||||
| 	metrics.CPUManagerSharedPoolSizeMilliCores.Set(float64(p.GetAvailableCPUs(s).Size() * 1000)) | 	metrics.CPUManagerSharedPoolSizeMilliCores.Set(float64(p.GetAvailableCPUs(s).Size() * 1000)) | ||||||
| 	metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(countExclusiveCPUs(s))) | 	metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(countExclusiveCPUs(s))) | ||||||
|  | 	metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists | ||||||
| } | } | ||||||
|  |  | ||||||
| func (p *staticPolicy) updateMetricsOnAllocate(cset cpuset.CPUSet) { | func (p *staticPolicy) updateMetricsOnAllocate(cset cpuset.CPUSet) { | ||||||
|   | |||||||
| @@ -50,6 +50,9 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult { | |||||||
| 		klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) | 		klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) | ||||||
|  |  | ||||||
| 		if !admit { | 		if !admit { | ||||||
|  | 			if IsAlignmentGuaranteed(s.policy) { | ||||||
|  | 				metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Inc() | ||||||
|  | 			} | ||||||
| 			metrics.TopologyManagerAdmissionErrorsTotal.Inc() | 			metrics.TopologyManagerAdmissionErrorsTotal.Inc() | ||||||
| 			return admission.GetPodAdmitResult(&TopologyAffinityError{}) | 			return admission.GetPodAdmitResult(&TopologyAffinityError{}) | ||||||
| 		} | 		} | ||||||
|   | |||||||
| @@ -48,6 +48,10 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult { | |||||||
| 	bestHint, admit := s.calculateAffinity(pod) | 	bestHint, admit := s.calculateAffinity(pod) | ||||||
| 	klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod)) | 	klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod)) | ||||||
| 	if !admit { | 	if !admit { | ||||||
|  | 		if IsAlignmentGuaranteed(s.policy) { | ||||||
|  | 			// increment only if we know we allocate aligned resources. | ||||||
|  | 			metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Inc() | ||||||
|  | 		} | ||||||
| 		metrics.TopologyManagerAdmissionErrorsTotal.Inc() | 		metrics.TopologyManagerAdmissionErrorsTotal.Inc() | ||||||
| 		return admission.GetPodAdmitResult(&TopologyAffinityError{}) | 		return admission.GetPodAdmitResult(&TopologyAffinityError{}) | ||||||
| 	} | 	} | ||||||
|   | |||||||
| @@ -188,9 +188,19 @@ func NewManager(topology []cadvisorapi.Node, topologyPolicyName string, topology | |||||||
| 		scope: scope, | 		scope: scope, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	manager.initializeMetrics() | ||||||
|  |  | ||||||
| 	return manager, nil | 	return manager, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (m *manager) initializeMetrics() { | ||||||
|  | 	// ensure the values exist | ||||||
|  | 	metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Add(0) | ||||||
|  | 	metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Add(0) | ||||||
|  | 	metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Add(0) | ||||||
|  | 	metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Add(0) | ||||||
|  | } | ||||||
|  |  | ||||||
| func (m *manager) GetAffinity(podUID string, containerName string) TopologyHint { | func (m *manager) GetAffinity(podUID string, containerName string) TopologyHint { | ||||||
| 	return m.scope.GetAffinity(podUID, containerName) | 	return m.scope.GetAffinity(podUID, containerName) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -132,6 +132,7 @@ const ( | |||||||
|  |  | ||||||
| 	// Metric for tracking aligment of compute resources | 	// Metric for tracking aligment of compute resources | ||||||
| 	ContainerAlignedComputeResourcesNameKey          = "container_aligned_compute_resources_count" | 	ContainerAlignedComputeResourcesNameKey          = "container_aligned_compute_resources_count" | ||||||
|  | 	ContainerAlignedComputeResourcesFailureNameKey   = "container_aligned_compute_resources_failure_count" | ||||||
| 	ContainerAlignedComputeResourcesScopeLabelKey    = "scope" | 	ContainerAlignedComputeResourcesScopeLabelKey    = "scope" | ||||||
| 	ContainerAlignedComputeResourcesBoundaryLabelKey = "boundary" | 	ContainerAlignedComputeResourcesBoundaryLabelKey = "boundary" | ||||||
|  |  | ||||||
| @@ -818,7 +819,18 @@ var ( | |||||||
| 		}, | 		}, | ||||||
| 		[]string{ContainerAlignedComputeResourcesScopeLabelKey, ContainerAlignedComputeResourcesBoundaryLabelKey}, | 		[]string{ContainerAlignedComputeResourcesScopeLabelKey, ContainerAlignedComputeResourcesBoundaryLabelKey}, | ||||||
| 	) | 	) | ||||||
| 	// MemoryManagerPinningRequestTotal tracks the number of times the pod spec required the memory manager to pin memory pages |  | ||||||
|  | 	// ContainerAlignedComputeResourcesFailure reports the count of resources allocation attempts which failed to align resources, per alignment boundary | ||||||
|  | 	ContainerAlignedComputeResourcesFailure = metrics.NewCounterVec( | ||||||
|  | 		&metrics.CounterOpts{ | ||||||
|  | 			Subsystem:      KubeletSubsystem, | ||||||
|  | 			Name:           ContainerAlignedComputeResourcesFailureNameKey, | ||||||
|  | 			Help:           "Cumulative number of failures to allocate aligned compute resources to containers by alignment type.", | ||||||
|  | 			StabilityLevel: metrics.ALPHA, | ||||||
|  | 		}, | ||||||
|  | 		[]string{ContainerAlignedComputeResourcesScopeLabelKey, ContainerAlignedComputeResourcesBoundaryLabelKey}, | ||||||
|  | 	) | ||||||
|  |  | ||||||
| 	MemoryManagerPinningRequestTotal = metrics.NewCounter( | 	MemoryManagerPinningRequestTotal = metrics.NewCounter( | ||||||
| 		&metrics.CounterOpts{ | 		&metrics.CounterOpts{ | ||||||
| 			Subsystem:      KubeletSubsystem, | 			Subsystem:      KubeletSubsystem, | ||||||
| @@ -1079,6 +1091,7 @@ func Register(collectors ...metrics.StableCollector) { | |||||||
| 		legacyregistry.MustRegister(CPUManagerSharedPoolSizeMilliCores) | 		legacyregistry.MustRegister(CPUManagerSharedPoolSizeMilliCores) | ||||||
| 		legacyregistry.MustRegister(CPUManagerExclusiveCPUsAllocationCount) | 		legacyregistry.MustRegister(CPUManagerExclusiveCPUsAllocationCount) | ||||||
| 		legacyregistry.MustRegister(ContainerAlignedComputeResources) | 		legacyregistry.MustRegister(ContainerAlignedComputeResources) | ||||||
|  | 		legacyregistry.MustRegister(ContainerAlignedComputeResourcesFailure) | ||||||
| 		legacyregistry.MustRegister(MemoryManagerPinningRequestTotal) | 		legacyregistry.MustRegister(MemoryManagerPinningRequestTotal) | ||||||
| 		legacyregistry.MustRegister(MemoryManagerPinningErrorsTotal) | 		legacyregistry.MustRegister(MemoryManagerPinningErrorsTotal) | ||||||
| 		legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal) | 		legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal) | ||||||
|   | |||||||
| @@ -104,6 +104,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa | |||||||
| 			// being [Serial], we can also assume noone else but us is running pods. | 			// being [Serial], we can also assume noone else but us is running pods. | ||||||
| 			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with no pods running") | 			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with no pods running") | ||||||
|  |  | ||||||
|  | 			idFn := makeCustomPairID("scope", "boundary") | ||||||
| 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | ||||||
| 				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(0), | 					"": timelessSample(0), | ||||||
| @@ -111,6 +112,9 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa | |||||||
| 				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(0), | 					"": timelessSample(0), | ||||||
| 				}), | 				}), | ||||||
|  | 				"kubelet_container_aligned_compute_resources_failure_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
|  | 					"container::physical_cpu": timelessSample(0), | ||||||
|  | 				}), | ||||||
| 			}) | 			}) | ||||||
|  |  | ||||||
| 			ginkgo.By("Giving the Kubelet time to start up and produce metrics") | 			ginkgo.By("Giving the Kubelet time to start up and produce metrics") | ||||||
| @@ -127,6 +131,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa | |||||||
| 			// being [Serial], we can also assume noone else but us is running pods. | 			// being [Serial], we can also assume noone else but us is running pods. | ||||||
| 			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod failed to admit") | 			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod failed to admit") | ||||||
|  |  | ||||||
|  | 			idFn := makeCustomPairID("scope", "boundary") | ||||||
| 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | ||||||
| 				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(1), | 					"": timelessSample(1), | ||||||
| @@ -134,6 +139,9 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa | |||||||
| 				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(1), | 					"": timelessSample(1), | ||||||
| 				}), | 				}), | ||||||
|  | 				"kubelet_container_aligned_compute_resources_failure_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
|  | 					"container::physical_cpu": timelessSample(1), | ||||||
|  | 				}), | ||||||
| 			}) | 			}) | ||||||
|  |  | ||||||
| 			ginkgo.By("Giving the Kubelet time to start up and produce metrics") | 			ginkgo.By("Giving the Kubelet time to start up and produce metrics") | ||||||
| @@ -150,6 +158,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa | |||||||
| 			// being [Serial], we can also assume noone else but us is running pods. | 			// being [Serial], we can also assume noone else but us is running pods. | ||||||
| 			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted") | 			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted") | ||||||
|  |  | ||||||
|  | 			idFn := makeCustomPairID("scope", "boundary") | ||||||
| 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | ||||||
| 				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(1), | 					"": timelessSample(1), | ||||||
| @@ -157,6 +166,9 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa | |||||||
| 				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(0), | 					"": timelessSample(0), | ||||||
| 				}), | 				}), | ||||||
|  | 				"kubelet_container_aligned_compute_resources_failure_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
|  | 					"container::physical_cpu": timelessSample(0), | ||||||
|  | 				}), | ||||||
| 			}) | 			}) | ||||||
|  |  | ||||||
| 			ginkgo.By("Giving the Kubelet time to start up and produce metrics") | 			ginkgo.By("Giving the Kubelet time to start up and produce metrics") | ||||||
| @@ -178,6 +190,9 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa | |||||||
| 				"kubelet_container_aligned_compute_resources_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | 				"kubelet_container_aligned_compute_resources_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
| 					"container::physical_cpu": timelessSample(1), | 					"container::physical_cpu": timelessSample(1), | ||||||
| 				}), | 				}), | ||||||
|  | 				"kubelet_container_aligned_compute_resources_failure_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
|  | 					"container::physical_cpu": timelessSample(0), | ||||||
|  | 				}), | ||||||
| 			}) | 			}) | ||||||
|  |  | ||||||
| 			ginkgo.By("Giving the Kubelet time to update the alignment metrics") | 			ginkgo.By("Giving the Kubelet time to update the alignment metrics") | ||||||
|   | |||||||
| @@ -28,7 +28,6 @@ import ( | |||||||
| 	v1 "k8s.io/api/core/v1" | 	v1 "k8s.io/api/core/v1" | ||||||
| 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" | 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" | ||||||
| 	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" | 	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" | ||||||
| 	"k8s.io/kubernetes/pkg/kubelet/metrics" |  | ||||||
| 	"k8s.io/kubernetes/test/e2e/feature" | 	"k8s.io/kubernetes/test/e2e/feature" | ||||||
| 	"k8s.io/kubernetes/test/e2e/framework" | 	"k8s.io/kubernetes/test/e2e/framework" | ||||||
| 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod" | 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod" | ||||||
| @@ -84,6 +83,7 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature. | |||||||
| 			// being [Serial], we can also assume noone else but us is running pods. | 			// being [Serial], we can also assume noone else but us is running pods. | ||||||
| 			ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with no pods running") | 			ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with no pods running") | ||||||
|  |  | ||||||
|  | 			idFn := makeCustomPairID("scope", "boundary") | ||||||
| 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | ||||||
| 				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(0), | 					"": timelessSample(0), | ||||||
| @@ -91,6 +91,10 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature. | |||||||
| 				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(0), | 					"": timelessSample(0), | ||||||
| 				}), | 				}), | ||||||
|  | 				"kubelet_container_aligned_compute_resources_failure_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
|  | 					"container::numa_node": timelessSample(0), | ||||||
|  | 					"pod::numa_node":       timelessSample(0), | ||||||
|  | 				}), | ||||||
| 				"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{ | 				"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
| 					"": timelessSample(0), | 					"": timelessSample(0), | ||||||
| 				}), | 				}), | ||||||
| @@ -110,6 +114,7 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature. | |||||||
| 			// being [Serial], we can also assume noone else but us is running pods. | 			// being [Serial], we can also assume noone else but us is running pods. | ||||||
| 			ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod failed to admit") | 			ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod failed to admit") | ||||||
|  |  | ||||||
|  | 			idFn := makeCustomPairID("scope", "boundary") | ||||||
| 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | ||||||
| 				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(1), | 					"": timelessSample(1), | ||||||
| @@ -117,6 +122,10 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature. | |||||||
| 				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(1), | 					"": timelessSample(1), | ||||||
| 				}), | 				}), | ||||||
|  | 				"kubelet_container_aligned_compute_resources_failure_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
|  | 					"container::numa_node": timelessSample(0), | ||||||
|  | 					"pod::numa_node":       timelessSample(1), | ||||||
|  | 				}), | ||||||
| 				"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{ | 				"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
| 					"": checkMetricValueGreaterThan(0), | 					"": checkMetricValueGreaterThan(0), | ||||||
| 				}), | 				}), | ||||||
| @@ -136,6 +145,7 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature. | |||||||
| 			// being [Serial], we can also assume noone else but us is running pods. | 			// being [Serial], we can also assume noone else but us is running pods. | ||||||
| 			ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod should be admitted") | 			ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod should be admitted") | ||||||
|  |  | ||||||
|  | 			idFn := makeCustomPairID("scope", "boundary") | ||||||
| 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | 			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | ||||||
| 				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(1), | 					"": timelessSample(1), | ||||||
| @@ -143,6 +153,10 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature. | |||||||
| 				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | ||||||
| 					"": timelessSample(0), | 					"": timelessSample(0), | ||||||
| 				}), | 				}), | ||||||
|  | 				"kubelet_container_aligned_compute_resources_failure_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
|  | 					"container::numa_node": timelessSample(0), | ||||||
|  | 					"pod::numa_node":       timelessSample(0), | ||||||
|  | 				}), | ||||||
| 				"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{ | 				"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
| 					"": checkMetricValueGreaterThan(0), | 					"": checkMetricValueGreaterThan(0), | ||||||
| 				}), | 				}), | ||||||
| @@ -162,9 +176,15 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature. | |||||||
| 			// being [Serial], we can also assume noone else but us is running pods. | 			// being [Serial], we can also assume noone else but us is running pods. | ||||||
| 			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted") | 			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted") | ||||||
|  |  | ||||||
|  | 			idFn := makeCustomPairID("scope", "boundary") | ||||||
| 			matchAlignmentMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | 			matchAlignmentMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ | ||||||
| 				"kubelet_container_aligned_compute_resources_count": gstruct.MatchAllElements(nodeID, gstruct.Elements{ | 				"kubelet_container_aligned_compute_resources_count": gstruct.MatchAllElements(idFn, gstruct.Elements{ | ||||||
| 					metrics.AlignedNUMANode: timelessSample(1), | 					"container::numa_node": timelessSample(0), | ||||||
|  | 					"pod::numa_node":       timelessSample(1), | ||||||
|  | 				}), | ||||||
|  | 				"kubelet_container_aligned_compute_resources_failure_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{ | ||||||
|  | 					"container::numa_node": timelessSample(0), | ||||||
|  | 					"pod::numa_node":       timelessSample(0), | ||||||
| 				}), | 				}), | ||||||
| 			}) | 			}) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot