mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 12:18:16 +00:00 
			
		
		
		
	Merge pull request #116897 from Richabanker/kubelete-resource-metrics-ga
Graduate kubelet resource metrics to GA
This commit is contained in:
		@@ -31,14 +31,14 @@ var (
 | 
			
		||||
		"Cumulative cpu time consumed by the node in core-seconds",
 | 
			
		||||
		nil,
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.ALPHA,
 | 
			
		||||
		metrics.STABLE,
 | 
			
		||||
		"")
 | 
			
		||||
 | 
			
		||||
	nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
 | 
			
		||||
		"Current working set of the node in bytes",
 | 
			
		||||
		nil,
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.ALPHA,
 | 
			
		||||
		metrics.STABLE,
 | 
			
		||||
		"")
 | 
			
		||||
 | 
			
		||||
	nodeSwapUsageDesc = metrics.NewDesc("node_swap_usage_bytes",
 | 
			
		||||
@@ -52,14 +52,14 @@ var (
 | 
			
		||||
		"Cumulative cpu time consumed by the container in core-seconds",
 | 
			
		||||
		[]string{"container", "pod", "namespace"},
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.ALPHA,
 | 
			
		||||
		metrics.STABLE,
 | 
			
		||||
		"")
 | 
			
		||||
 | 
			
		||||
	containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
 | 
			
		||||
		"Current working set of the container in bytes",
 | 
			
		||||
		[]string{"container", "pod", "namespace"},
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.ALPHA,
 | 
			
		||||
		metrics.STABLE,
 | 
			
		||||
		"")
 | 
			
		||||
 | 
			
		||||
	containerSwapUsageDesc = metrics.NewDesc("container_swap_usage_bytes",
 | 
			
		||||
@@ -73,14 +73,14 @@ var (
 | 
			
		||||
		"Cumulative cpu time consumed by the pod in core-seconds",
 | 
			
		||||
		[]string{"pod", "namespace"},
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.ALPHA,
 | 
			
		||||
		metrics.STABLE,
 | 
			
		||||
		"")
 | 
			
		||||
 | 
			
		||||
	podMemoryUsageDesc = metrics.NewDesc("pod_memory_working_set_bytes",
 | 
			
		||||
		"Current working set of the pod in bytes",
 | 
			
		||||
		[]string{"pod", "namespace"},
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.ALPHA,
 | 
			
		||||
		metrics.STABLE,
 | 
			
		||||
		"")
 | 
			
		||||
 | 
			
		||||
	podSwapUsageDesc = metrics.NewDesc("pod_swap_usage_bytes",
 | 
			
		||||
@@ -95,13 +95,20 @@ var (
 | 
			
		||||
		nil,
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.ALPHA,
 | 
			
		||||
		"1.29.0")
 | 
			
		||||
 | 
			
		||||
	resourceScrapeErrorResultDesc = metrics.NewDesc("resource_scrape_error",
 | 
			
		||||
		"1 if there was an error while getting container metrics, 0 otherwise",
 | 
			
		||||
		nil,
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.STABLE,
 | 
			
		||||
		"")
 | 
			
		||||
 | 
			
		||||
	containerStartTimeDesc = metrics.NewDesc("container_start_time_seconds",
 | 
			
		||||
		"Start time of the container since unix epoch in seconds",
 | 
			
		||||
		[]string{"container", "pod", "namespace"},
 | 
			
		||||
		nil,
 | 
			
		||||
		metrics.ALPHA,
 | 
			
		||||
		metrics.STABLE,
 | 
			
		||||
		"")
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -134,6 +141,7 @@ func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Des
 | 
			
		||||
	ch <- podMemoryUsageDesc
 | 
			
		||||
	ch <- podSwapUsageDesc
 | 
			
		||||
	ch <- resourceScrapeResultDesc
 | 
			
		||||
	ch <- resourceScrapeErrorResultDesc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CollectWithStability implements metrics.StableCollector
 | 
			
		||||
@@ -145,6 +153,7 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri
 | 
			
		||||
	var errorCount float64
 | 
			
		||||
	defer func() {
 | 
			
		||||
		ch <- metrics.NewLazyConstMetric(resourceScrapeResultDesc, metrics.GaugeValue, errorCount)
 | 
			
		||||
		ch <- metrics.NewLazyConstMetric(resourceScrapeErrorResultDesc, metrics.GaugeValue, errorCount)
 | 
			
		||||
	}()
 | 
			
		||||
	statsSummary, err := rc.provider.GetCPUAndMemoryStats(ctx)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
 
 | 
			
		||||
@@ -36,6 +36,7 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
	testTime := metav1.NewTime(staticTimestamp)
 | 
			
		||||
	interestedMetrics := []string{
 | 
			
		||||
		"scrape_error",
 | 
			
		||||
		"resource_scrape_error",
 | 
			
		||||
		"node_cpu_usage_seconds_total",
 | 
			
		||||
		"node_memory_working_set_bytes",
 | 
			
		||||
		"node_swap_usage_bytes",
 | 
			
		||||
@@ -64,6 +65,9 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE scrape_error gauge
 | 
			
		||||
				scrape_error 1
 | 
			
		||||
				# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE resource_scrape_error gauge
 | 
			
		||||
				resource_scrape_error 1
 | 
			
		||||
			`,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
@@ -86,10 +90,10 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
			},
 | 
			
		||||
			summaryErr: nil,
 | 
			
		||||
			expectedMetrics: `
 | 
			
		||||
				# HELP node_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the node in core-seconds
 | 
			
		||||
				# HELP node_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the node in core-seconds
 | 
			
		||||
				# TYPE node_cpu_usage_seconds_total counter
 | 
			
		||||
				node_cpu_usage_seconds_total 10 1624396278302
 | 
			
		||||
				# HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes
 | 
			
		||||
				# HELP node_memory_working_set_bytes [STABLE] Current working set of the node in bytes
 | 
			
		||||
				# TYPE node_memory_working_set_bytes gauge
 | 
			
		||||
				node_memory_working_set_bytes 1000 1624396278302
 | 
			
		||||
				# HELP node_swap_usage_bytes [ALPHA] Current swap usage of the node in bytes. Reported only on non-windows systems
 | 
			
		||||
@@ -98,6 +102,9 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE scrape_error gauge
 | 
			
		||||
				scrape_error 0
 | 
			
		||||
				# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE resource_scrape_error gauge
 | 
			
		||||
				resource_scrape_error 0
 | 
			
		||||
			`,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
@@ -119,6 +126,9 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE scrape_error gauge
 | 
			
		||||
				scrape_error 0
 | 
			
		||||
				# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE resource_scrape_error gauge
 | 
			
		||||
				resource_scrape_error 0
 | 
			
		||||
			`,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
@@ -188,17 +198,20 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE scrape_error gauge
 | 
			
		||||
				scrape_error 0
 | 
			
		||||
				# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
 | 
			
		||||
				# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE resource_scrape_error gauge
 | 
			
		||||
				resource_scrape_error 0
 | 
			
		||||
				# HELP container_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the container in core-seconds
 | 
			
		||||
				# TYPE container_cpu_usage_seconds_total counter
 | 
			
		||||
				container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 1624396278302
 | 
			
		||||
				container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
 | 
			
		||||
				container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 1624396278302
 | 
			
		||||
				# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
 | 
			
		||||
				# HELP container_memory_working_set_bytes [STABLE] Current working set of the container in bytes
 | 
			
		||||
				# TYPE container_memory_working_set_bytes gauge
 | 
			
		||||
				container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
 | 
			
		||||
				container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
 | 
			
		||||
				container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
 | 
			
		||||
				# HELP container_start_time_seconds [ALPHA] Start time of the container since unix epoch in seconds
 | 
			
		||||
				# HELP container_start_time_seconds [STABLE] Start time of the container since unix epoch in seconds
 | 
			
		||||
				# TYPE container_start_time_seconds gauge
 | 
			
		||||
				container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09 1624396248302
 | 
			
		||||
				container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09 1624395678302
 | 
			
		||||
@@ -239,10 +252,13 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE scrape_error gauge
 | 
			
		||||
				scrape_error 0
 | 
			
		||||
				# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
 | 
			
		||||
				# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE resource_scrape_error gauge
 | 
			
		||||
				resource_scrape_error 0
 | 
			
		||||
				# HELP container_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the container in core-seconds
 | 
			
		||||
				# TYPE container_cpu_usage_seconds_total counter
 | 
			
		||||
				container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 1624396278302
 | 
			
		||||
				# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
 | 
			
		||||
				# HELP container_memory_working_set_bytes [STABLE] Current working set of the container in bytes
 | 
			
		||||
				# TYPE container_memory_working_set_bytes gauge
 | 
			
		||||
				container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
 | 
			
		||||
			`,
 | 
			
		||||
@@ -295,19 +311,22 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
			},
 | 
			
		||||
			summaryErr: nil,
 | 
			
		||||
			expectedMetrics: `
 | 
			
		||||
				# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
 | 
			
		||||
				# HELP container_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the container in core-seconds
 | 
			
		||||
				# TYPE container_cpu_usage_seconds_total counter
 | 
			
		||||
				container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 1624396278302
 | 
			
		||||
				# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
 | 
			
		||||
				# HELP container_memory_working_set_bytes [STABLE] Current working set of the container in bytes
 | 
			
		||||
				# TYPE container_memory_working_set_bytes gauge
 | 
			
		||||
				container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 1624396278302
 | 
			
		||||
				# HELP container_start_time_seconds [ALPHA] Start time of the container since unix epoch in seconds
 | 
			
		||||
				# HELP container_start_time_seconds [STABLE] Start time of the container since unix epoch in seconds
 | 
			
		||||
				# TYPE container_start_time_seconds gauge
 | 
			
		||||
				container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09 1624396248302
 | 
			
		||||
				container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09 1624395678302
 | 
			
		||||
				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE scrape_error gauge
 | 
			
		||||
				scrape_error 0
 | 
			
		||||
				# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE resource_scrape_error gauge
 | 
			
		||||
				resource_scrape_error 0
 | 
			
		||||
			`,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
@@ -339,10 +358,13 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE scrape_error gauge
 | 
			
		||||
				scrape_error 0
 | 
			
		||||
				# HELP pod_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the pod in core-seconds
 | 
			
		||||
				# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE resource_scrape_error gauge
 | 
			
		||||
				resource_scrape_error 0
 | 
			
		||||
				# HELP pod_cpu_usage_seconds_total [STABLE] Cumulative cpu time consumed by the pod in core-seconds
 | 
			
		||||
				# TYPE pod_cpu_usage_seconds_total counter
 | 
			
		||||
				pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 1624396278302
 | 
			
		||||
				# HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes
 | 
			
		||||
				# HELP pod_memory_working_set_bytes [STABLE] Current working set of the pod in bytes
 | 
			
		||||
				# TYPE pod_memory_working_set_bytes gauge
 | 
			
		||||
				pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 1624396278302
 | 
			
		||||
				# HELP pod_swap_usage_bytes [ALPHA] Current amount of the pod swap usage in bytes. Reported only on non-windows systems
 | 
			
		||||
@@ -375,6 +397,9 @@ func TestCollectResourceMetrics(t *testing.T) {
 | 
			
		||||
				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE scrape_error gauge
 | 
			
		||||
				scrape_error 0
 | 
			
		||||
				# HELP resource_scrape_error [STABLE] 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
				# TYPE resource_scrape_error gauge
 | 
			
		||||
				resource_scrape_error 0
 | 
			
		||||
			`,
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -181,21 +181,34 @@ func (g *Grabber) GrabFromKubelet(ctx context.Context, nodeName string) (Kubelet
 | 
			
		||||
		return KubeletMetrics{}, fmt.Errorf("Error listing nodes with name %v, got %v", nodeName, nodes.Items)
 | 
			
		||||
	}
 | 
			
		||||
	kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port
 | 
			
		||||
	return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort))
 | 
			
		||||
	return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort), "metrics")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (g *Grabber) grabFromKubeletInternal(ctx context.Context, nodeName string, kubeletPort int) (KubeletMetrics, error) {
 | 
			
		||||
// GrabresourceMetricsFromKubelet returns resource metrics from kubelet
 | 
			
		||||
func (g *Grabber) GrabResourceMetricsFromKubelet(ctx context.Context, nodeName string) (KubeletMetrics, error) {
 | 
			
		||||
	nodes, err := g.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{FieldSelector: fields.Set{"metadata.name": nodeName}.AsSelector().String()})
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return KubeletMetrics{}, err
 | 
			
		||||
	}
 | 
			
		||||
	if len(nodes.Items) != 1 {
 | 
			
		||||
		return KubeletMetrics{}, fmt.Errorf("Error listing nodes with name %v, got %v", nodeName, nodes.Items)
 | 
			
		||||
	}
 | 
			
		||||
	kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port
 | 
			
		||||
	return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort), "metrics/resource")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (g *Grabber) grabFromKubeletInternal(ctx context.Context, nodeName string, kubeletPort int, pathSuffix string) (KubeletMetrics, error) {
 | 
			
		||||
	if kubeletPort <= 0 || kubeletPort > 65535 {
 | 
			
		||||
		return KubeletMetrics{}, fmt.Errorf("Invalid Kubelet port %v. Skipping Kubelet's metrics gathering", kubeletPort)
 | 
			
		||||
	}
 | 
			
		||||
	output, err := g.getMetricsFromNode(ctx, nodeName, int(kubeletPort))
 | 
			
		||||
	output, err := g.getMetricsFromNode(ctx, nodeName, int(kubeletPort), pathSuffix)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return KubeletMetrics{}, err
 | 
			
		||||
	}
 | 
			
		||||
	return parseKubeletMetrics(output)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubeletPort int) (string, error) {
 | 
			
		||||
func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubeletPort int, pathSuffix string) (string, error) {
 | 
			
		||||
	// There's a problem with timing out during proxy. Wrapping this in a goroutine to prevent deadlock.
 | 
			
		||||
	finished := make(chan struct{}, 1)
 | 
			
		||||
	var err error
 | 
			
		||||
@@ -205,7 +218,7 @@ func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubel
 | 
			
		||||
			Resource("nodes").
 | 
			
		||||
			SubResource("proxy").
 | 
			
		||||
			Name(fmt.Sprintf("%v:%v", nodeName, kubeletPort)).
 | 
			
		||||
			Suffix("metrics").
 | 
			
		||||
			Suffix(pathSuffix).
 | 
			
		||||
			Do(ctx).Raw()
 | 
			
		||||
		finished <- struct{}{}
 | 
			
		||||
	}()
 | 
			
		||||
@@ -432,7 +445,7 @@ func (g *Grabber) Grab(ctx context.Context) (Collection, error) {
 | 
			
		||||
		} else {
 | 
			
		||||
			for _, node := range nodes.Items {
 | 
			
		||||
				kubeletPort := node.Status.DaemonEndpoints.KubeletEndpoint.Port
 | 
			
		||||
				metrics, err := g.grabFromKubeletInternal(ctx, node.Name, int(kubeletPort))
 | 
			
		||||
				metrics, err := g.grabFromKubeletInternal(ctx, node.Name, int(kubeletPort), "metrics")
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					errs = append(errs, err)
 | 
			
		||||
				}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										70
									
								
								test/e2e/instrumentation/metrics.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								test/e2e/instrumentation/metrics.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,70 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2023 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package instrumentation
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"errors"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/onsi/gomega"
 | 
			
		||||
	clientset "k8s.io/client-go/kubernetes"
 | 
			
		||||
	"k8s.io/kubernetes/test/e2e/framework"
 | 
			
		||||
	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
 | 
			
		||||
	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
 | 
			
		||||
	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
 | 
			
		||||
	"k8s.io/kubernetes/test/e2e/instrumentation/common"
 | 
			
		||||
	admissionapi "k8s.io/pod-security-admission/api"
 | 
			
		||||
 | 
			
		||||
	"github.com/onsi/ginkgo/v2"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var _ = common.SIGDescribe("Metrics", func() {
 | 
			
		||||
	f := framework.NewDefaultFramework("metrics")
 | 
			
		||||
	f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
 | 
			
		||||
	var c, ec clientset.Interface
 | 
			
		||||
	var grabber *e2emetrics.Grabber
 | 
			
		||||
	ginkgo.BeforeEach(func(ctx context.Context) {
 | 
			
		||||
		var err error
 | 
			
		||||
		c = f.ClientSet
 | 
			
		||||
		ec = f.KubemarkExternalClusterClientSet
 | 
			
		||||
		gomega.Eventually(ctx, func() error {
 | 
			
		||||
			grabber, err = e2emetrics.NewMetricsGrabber(ctx, c, ec, f.ClientConfig(), true, true, true, true, true, true)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				framework.ExpectNoError(err, "failed to create metrics grabber")
 | 
			
		||||
			}
 | 
			
		||||
			return nil
 | 
			
		||||
		}, 5*time.Minute, 10*time.Second).Should(gomega.BeNil())
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	   Release: v1.29
 | 
			
		||||
	   Testname: Kubelet resource metrics
 | 
			
		||||
	   Description: Should attempt to grab all resource metrics from kubelet metrics/resource endpoint.
 | 
			
		||||
	*/
 | 
			
		||||
	ginkgo.It("should grab all metrics from kubelet /metrics/resource endpoint", func(ctx context.Context) {
 | 
			
		||||
		ginkgo.By("Connecting to kubelet's /metrics/resource endpoint")
 | 
			
		||||
		node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet)
 | 
			
		||||
		if errors.Is(err, e2emetrics.MetricsGrabbingDisabledError) {
 | 
			
		||||
			e2eskipper.Skipf("%v", err)
 | 
			
		||||
		}
 | 
			
		||||
		framework.ExpectNoError(err)
 | 
			
		||||
		response, err := grabber.GrabResourceMetricsFromKubelet(ctx, node.Name)
 | 
			
		||||
		framework.ExpectNoError(err)
 | 
			
		||||
		gomega.Expect(response).NotTo(gomega.BeEmpty())
 | 
			
		||||
	})
 | 
			
		||||
})
 | 
			
		||||
@@ -74,6 +74,56 @@
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
  labels:
 | 
			
		||||
  - zone
 | 
			
		||||
- name: container_cpu_usage_seconds_total
 | 
			
		||||
  help: Cumulative cpu time consumed by the container in core-seconds
 | 
			
		||||
  type: Custom
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
  labels:
 | 
			
		||||
  - container
 | 
			
		||||
  - pod
 | 
			
		||||
  - namespace
 | 
			
		||||
- name: container_memory_working_set_bytes
 | 
			
		||||
  help: Current working set of the container in bytes
 | 
			
		||||
  type: Custom
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
  labels:
 | 
			
		||||
  - container
 | 
			
		||||
  - pod
 | 
			
		||||
  - namespace
 | 
			
		||||
- name: container_start_time_seconds
 | 
			
		||||
  help: Start time of the container since unix epoch in seconds
 | 
			
		||||
  type: Custom
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
  labels:
 | 
			
		||||
  - container
 | 
			
		||||
  - pod
 | 
			
		||||
  - namespace
 | 
			
		||||
- name: node_cpu_usage_seconds_total
 | 
			
		||||
  help: Cumulative cpu time consumed by the node in core-seconds
 | 
			
		||||
  type: Custom
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
- name: node_memory_working_set_bytes
 | 
			
		||||
  help: Current working set of the node in bytes
 | 
			
		||||
  type: Custom
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
- name: pod_cpu_usage_seconds_total
 | 
			
		||||
  help: Cumulative cpu time consumed by the pod in core-seconds
 | 
			
		||||
  type: Custom
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
  labels:
 | 
			
		||||
  - pod
 | 
			
		||||
  - namespace
 | 
			
		||||
- name: pod_memory_working_set_bytes
 | 
			
		||||
  help: Current working set of the pod in bytes
 | 
			
		||||
  type: Custom
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
  labels:
 | 
			
		||||
  - pod
 | 
			
		||||
  - namespace
 | 
			
		||||
- name: resource_scrape_error
 | 
			
		||||
  help: 1 if there was an error while getting container metrics, 0 otherwise
 | 
			
		||||
  type: Custom
 | 
			
		||||
  stabilityLevel: STABLE
 | 
			
		||||
- name: pod_scheduling_sli_duration_seconds
 | 
			
		||||
  subsystem: scheduler
 | 
			
		||||
  help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user