mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Add kubelet managed pod metrics
This commit is contained in:
		@@ -48,6 +48,7 @@ import (
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/images"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/logs"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/metrics"
 | 
			
		||||
	proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/runtimeclass"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/types"
 | 
			
		||||
@@ -774,6 +775,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
 | 
			
		||||
		var err error
 | 
			
		||||
 | 
			
		||||
		klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
 | 
			
		||||
		metrics.StartedPodsTotal.Inc()
 | 
			
		||||
		createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
 | 
			
		||||
		result.AddSyncResult(createSandboxResult)
 | 
			
		||||
		podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
 | 
			
		||||
@@ -786,6 +788,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
 | 
			
		||||
				klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
			metrics.StartedPodsErrorsTotal.WithLabelValues(err.Error()).Inc()
 | 
			
		||||
			createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
 | 
			
		||||
			klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
 | 
			
		||||
			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
 | 
			
		||||
@@ -838,9 +841,11 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Helper containing boilerplate common to starting all types of containers.
 | 
			
		||||
	// typeName is a label used to describe this type of container in log messages,
 | 
			
		||||
	// typeName is a description used to describe this type of container in log messages,
 | 
			
		||||
	// currently: "container", "init container" or "ephemeral container"
 | 
			
		||||
	start := func(typeName string, spec *startSpec) error {
 | 
			
		||||
	// metricLabel is the label used to describe this type of container in monitoring metrics.
 | 
			
		||||
	// currently: "container", "init_container" or "ephemeral_container"
 | 
			
		||||
	start := func(typeName, metricLabel string, spec *startSpec) error {
 | 
			
		||||
		startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
 | 
			
		||||
		result.AddSyncResult(startContainerResult)
 | 
			
		||||
 | 
			
		||||
@@ -851,9 +856,13 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		metrics.StartedContainersTotal.WithLabelValues(metricLabel).Inc()
 | 
			
		||||
		klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
 | 
			
		||||
		// NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs.
 | 
			
		||||
		if msg, err := m.startContainer(podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
 | 
			
		||||
			// startContainer() returns well-defined error codes that have reasonable cardinality for metrics and are
 | 
			
		||||
			// useful to cluster administrators to distinguish "server errors" from "user errors".
 | 
			
		||||
			metrics.StartedContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
 | 
			
		||||
			startContainerResult.Fail(err, msg)
 | 
			
		||||
			// known errors that are logged in other places are logged at higher levels here to avoid
 | 
			
		||||
			// repetitive log spam
 | 
			
		||||
@@ -875,14 +884,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
 | 
			
		||||
	// containers cannot be specified on pod creation.
 | 
			
		||||
	if utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {
 | 
			
		||||
		for _, idx := range podContainerChanges.EphemeralContainersToStart {
 | 
			
		||||
			start("ephemeral container", ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
 | 
			
		||||
			start("ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Step 6: start the init container.
 | 
			
		||||
	if container := podContainerChanges.NextInitContainerToStart; container != nil {
 | 
			
		||||
		// Start the next init container.
 | 
			
		||||
		if err := start("init container", containerStartSpec(container)); err != nil {
 | 
			
		||||
		if err := start("init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -892,7 +901,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
 | 
			
		||||
 | 
			
		||||
	// Step 7: start containers in podContainerChanges.ContainersToStart.
 | 
			
		||||
	for _, idx := range podContainerChanges.ContainersToStart {
 | 
			
		||||
		start("container", containerStartSpec(&pod.Spec.Containers[idx]))
 | 
			
		||||
		start("container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return
 | 
			
		||||
 
 | 
			
		||||
@@ -83,6 +83,19 @@ const (
 | 
			
		||||
	// Metrics keys for RuntimeClass
 | 
			
		||||
	RunPodSandboxDurationKey = "run_podsandbox_duration_seconds"
 | 
			
		||||
	RunPodSandboxErrorsKey   = "run_podsandbox_errors_total"
 | 
			
		||||
 | 
			
		||||
	// Metrics to keep track of objects under management
 | 
			
		||||
	ManagedPodsKey                  = "managed_pods"
 | 
			
		||||
	ManagedContainersKey            = "managed_containers"
 | 
			
		||||
	StartedPodsTotalKey             = "started_pods_total"
 | 
			
		||||
	StartedPodsErrorsTotalKey       = "started_pods_errors_total"
 | 
			
		||||
	StartedContainersTotalKey       = "started_containers_total"
 | 
			
		||||
	StartedContainersErrorsTotalKey = "started_containers_errors_total"
 | 
			
		||||
 | 
			
		||||
	// Values used in metric labels
 | 
			
		||||
	Container          = "container"
 | 
			
		||||
	InitContainer      = "init_container"
 | 
			
		||||
	EphemeralContainer = "ephemeral_container"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
@@ -431,6 +444,64 @@ var (
 | 
			
		||||
		},
 | 
			
		||||
		[]string{"container_state"},
 | 
			
		||||
	)
 | 
			
		||||
	// StartedPodsTotal is a counter that tracks pod sandbox creation operations
 | 
			
		||||
	StartedPodsTotal = metrics.NewCounter(
 | 
			
		||||
		&metrics.CounterOpts{
 | 
			
		||||
			Subsystem:      KubeletSubsystem,
 | 
			
		||||
			Name:           StartedPodsTotalKey,
 | 
			
		||||
			Help:           "Cumulative number of pods started",
 | 
			
		||||
			StabilityLevel: metrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
	)
 | 
			
		||||
	// StartedPodsErrorsTotal is a counter that tracks the number of errors creating pod sandboxes
 | 
			
		||||
	StartedPodsErrorsTotal = metrics.NewCounterVec(
 | 
			
		||||
		&metrics.CounterOpts{
 | 
			
		||||
			Subsystem:      KubeletSubsystem,
 | 
			
		||||
			Name:           StartedPodsErrorsTotalKey,
 | 
			
		||||
			Help:           "Cumulative number of errors when starting pods",
 | 
			
		||||
			StabilityLevel: metrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
		[]string{"message"},
 | 
			
		||||
	)
 | 
			
		||||
	// StartedContainersTotal is a counter that tracks the number of container creation operations
 | 
			
		||||
	StartedContainersTotal = metrics.NewCounterVec(
 | 
			
		||||
		&metrics.CounterOpts{
 | 
			
		||||
			Subsystem:      KubeletSubsystem,
 | 
			
		||||
			Name:           StartedContainersTotalKey,
 | 
			
		||||
			Help:           "Cumulative number of containers started",
 | 
			
		||||
			StabilityLevel: metrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
		[]string{"container_type"},
 | 
			
		||||
	)
 | 
			
		||||
	// StartedContainersTotal is a counter that tracks the number of errors creating containers
 | 
			
		||||
	StartedContainersErrorsTotal = metrics.NewCounterVec(
 | 
			
		||||
		&metrics.CounterOpts{
 | 
			
		||||
			Subsystem:      KubeletSubsystem,
 | 
			
		||||
			Name:           StartedContainersErrorsTotalKey,
 | 
			
		||||
			Help:           "Cumulative number of errors when starting containers",
 | 
			
		||||
			StabilityLevel: metrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
		[]string{"container_type", "code"},
 | 
			
		||||
	)
 | 
			
		||||
	// ManagedPods is a gauge that tracks how many pods are managed by this kubelet
 | 
			
		||||
	ManagedPods = metrics.NewGauge(
 | 
			
		||||
		&metrics.GaugeOpts{
 | 
			
		||||
			Subsystem:      KubeletSubsystem,
 | 
			
		||||
			Name:           ManagedPodsKey,
 | 
			
		||||
			Help:           "Number of pods managed by this kubelet",
 | 
			
		||||
			StabilityLevel: metrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
	)
 | 
			
		||||
	// ManagedContainers is a gauge that tracks how many containers are managed by this kubelet
 | 
			
		||||
	ManagedContainers = metrics.NewGaugeVec(
 | 
			
		||||
		&metrics.GaugeOpts{
 | 
			
		||||
			Subsystem:      KubeletSubsystem,
 | 
			
		||||
			Name:           ManagedContainersKey,
 | 
			
		||||
			Help:           "Number of containers managed by this kubelet",
 | 
			
		||||
			StabilityLevel: metrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
		[]string{"container_type"},
 | 
			
		||||
	)
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var registerMetrics sync.Once
 | 
			
		||||
@@ -459,6 +530,12 @@ func Register(collectors ...metrics.StableCollector) {
 | 
			
		||||
		legacyregistry.MustRegister(DevicePluginAllocationDuration)
 | 
			
		||||
		legacyregistry.MustRegister(RunningContainerCount)
 | 
			
		||||
		legacyregistry.MustRegister(RunningPodCount)
 | 
			
		||||
		legacyregistry.MustRegister(ManagedPods)
 | 
			
		||||
		legacyregistry.MustRegister(ManagedContainers)
 | 
			
		||||
		legacyregistry.MustRegister(StartedPodsTotal)
 | 
			
		||||
		legacyregistry.MustRegister(StartedPodsErrorsTotal)
 | 
			
		||||
		legacyregistry.MustRegister(StartedContainersTotal)
 | 
			
		||||
		legacyregistry.MustRegister(StartedContainersErrorsTotal)
 | 
			
		||||
		legacyregistry.MustRegister(RunPodSandboxDuration)
 | 
			
		||||
		legacyregistry.MustRegister(RunPodSandboxErrors)
 | 
			
		||||
		if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
 | 
			
		||||
 
 | 
			
		||||
@@ -19,10 +19,13 @@ package pod
 | 
			
		||||
import (
 | 
			
		||||
	"sync"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/api/core/v1"
 | 
			
		||||
	v1 "k8s.io/api/core/v1"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/types"
 | 
			
		||||
	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/features"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/configmap"
 | 
			
		||||
	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/metrics"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/secret"
 | 
			
		||||
	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
			
		||||
)
 | 
			
		||||
@@ -159,6 +162,45 @@ func isPodInTerminatedState(pod *v1.Pod) bool {
 | 
			
		||||
	return pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// updateMetrics updates the gauge metrics that track how many pods and containers this kubelet manages.
 | 
			
		||||
// oldPod or newPod may be nil to signify creation or deletion, respectively.
 | 
			
		||||
func updateMetrics(oldPod, newPod *v1.Pod) {
 | 
			
		||||
	var numC, numIC, numEC int
 | 
			
		||||
	countEC := utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers)
 | 
			
		||||
 | 
			
		||||
	if oldPod != nil {
 | 
			
		||||
		if newPod == nil {
 | 
			
		||||
			metrics.ManagedPods.Dec()
 | 
			
		||||
		}
 | 
			
		||||
		numC -= len(oldPod.Spec.Containers)
 | 
			
		||||
		numIC -= len(oldPod.Spec.InitContainers)
 | 
			
		||||
		if countEC {
 | 
			
		||||
			numEC -= len(oldPod.Spec.EphemeralContainers)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if newPod != nil {
 | 
			
		||||
		if oldPod == nil {
 | 
			
		||||
			metrics.ManagedPods.Inc()
 | 
			
		||||
		}
 | 
			
		||||
		numC += len(newPod.Spec.Containers)
 | 
			
		||||
		numIC += len(newPod.Spec.InitContainers)
 | 
			
		||||
		if countEC {
 | 
			
		||||
			numEC += len(newPod.Spec.EphemeralContainers)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if numC != 0 {
 | 
			
		||||
		metrics.ManagedContainers.WithLabelValues(metrics.Container).Add(float64(numC))
 | 
			
		||||
	}
 | 
			
		||||
	if numIC != 0 {
 | 
			
		||||
		metrics.ManagedContainers.WithLabelValues(metrics.InitContainer).Add(float64(numIC))
 | 
			
		||||
	}
 | 
			
		||||
	if countEC && numEC != 0 {
 | 
			
		||||
		metrics.ManagedContainers.WithLabelValues(metrics.EphemeralContainer).Add(float64(numEC))
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// updatePodsInternal replaces the given pods in the current state of the
 | 
			
		||||
// manager, updating the various indices. The caller is assumed to hold the
 | 
			
		||||
// lock.
 | 
			
		||||
@@ -202,6 +244,7 @@ func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			resolvedPodUID := kubetypes.ResolvedPodUID(pod.UID)
 | 
			
		||||
			updateMetrics(pm.podByUID[resolvedPodUID], pod)
 | 
			
		||||
			pm.podByUID[resolvedPodUID] = pod
 | 
			
		||||
			pm.podByFullName[podFullName] = pod
 | 
			
		||||
			if mirror, ok := pm.mirrorPodByFullName[podFullName]; ok {
 | 
			
		||||
@@ -212,6 +255,7 @@ func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (pm *basicManager) DeletePod(pod *v1.Pod) {
 | 
			
		||||
	updateMetrics(pod, nil)
 | 
			
		||||
	pm.lock.Lock()
 | 
			
		||||
	defer pm.lock.Unlock()
 | 
			
		||||
	if pm.secretManager != nil {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user