mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	rootcacertpublisher: drop the namespace label from metrics to reduce its cardinality
The `root_ca_cert_publisher_sync_duration_seconds` metric tracks the sync duration in the root CA cert publisher per code and namespace. In clusters with a high namespace turnover (like CI clusters), this may cause the kube-controller-manager to expose over 100k series to Prometheus, which may cause degradation of that service. Drop the `namespace` label to remove the metrics' cardinality, tracking this metric by namespace does not justify the impact of keeping it.
This commit is contained in:
		@@ -37,7 +37,7 @@ var (
 | 
				
			|||||||
			Help:           "Number of namespace syncs happened in root ca cert publisher.",
 | 
								Help:           "Number of namespace syncs happened in root ca cert publisher.",
 | 
				
			||||||
			StabilityLevel: metrics.ALPHA,
 | 
								StabilityLevel: metrics.ALPHA,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		[]string{"namespace", "code"},
 | 
							[]string{"code"},
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
	syncLatency = metrics.NewHistogramVec(
 | 
						syncLatency = metrics.NewHistogramVec(
 | 
				
			||||||
		&metrics.HistogramOpts{
 | 
							&metrics.HistogramOpts{
 | 
				
			||||||
@@ -47,19 +47,19 @@ var (
 | 
				
			|||||||
			Buckets:        metrics.ExponentialBuckets(0.001, 2, 15),
 | 
								Buckets:        metrics.ExponentialBuckets(0.001, 2, 15),
 | 
				
			||||||
			StabilityLevel: metrics.ALPHA,
 | 
								StabilityLevel: metrics.ALPHA,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		[]string{"namespace", "code"},
 | 
							[]string{"code"},
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func recordMetrics(start time.Time, ns string, err error) {
 | 
					func recordMetrics(start time.Time, err error) {
 | 
				
			||||||
	code := "500"
 | 
						code := "500"
 | 
				
			||||||
	if err == nil {
 | 
						if err == nil {
 | 
				
			||||||
		code = "200"
 | 
							code = "200"
 | 
				
			||||||
	} else if se, ok := err.(*apierrors.StatusError); ok && se.Status().Code != 0 {
 | 
						} else if se, ok := err.(*apierrors.StatusError); ok && se.Status().Code != 0 {
 | 
				
			||||||
		code = strconv.Itoa(int(se.Status().Code))
 | 
							code = strconv.Itoa(int(se.Status().Code))
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	syncLatency.WithLabelValues(ns, code).Observe(time.Since(start).Seconds())
 | 
						syncLatency.WithLabelValues(code).Observe(time.Since(start).Seconds())
 | 
				
			||||||
	syncCounter.WithLabelValues(ns, code).Inc()
 | 
						syncCounter.WithLabelValues(code).Inc()
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
var once sync.Once
 | 
					var once sync.Once
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -44,7 +44,7 @@ func TestSyncCounter(t *testing.T) {
 | 
				
			|||||||
			want: `
 | 
								want: `
 | 
				
			||||||
# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
					# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
				
			||||||
# TYPE root_ca_cert_publisher_sync_total counter
 | 
					# TYPE root_ca_cert_publisher_sync_total counter
 | 
				
			||||||
root_ca_cert_publisher_sync_total{code="200",namespace="test-ns"} 1
 | 
					root_ca_cert_publisher_sync_total{code="200"} 1
 | 
				
			||||||
				`,
 | 
									`,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
@@ -56,7 +56,7 @@ root_ca_cert_publisher_sync_total{code="200",namespace="test-ns"} 1
 | 
				
			|||||||
			want: `
 | 
								want: `
 | 
				
			||||||
# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
					# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
				
			||||||
# TYPE root_ca_cert_publisher_sync_total counter
 | 
					# TYPE root_ca_cert_publisher_sync_total counter
 | 
				
			||||||
root_ca_cert_publisher_sync_total{code="404",namespace="test-ns"} 1
 | 
					root_ca_cert_publisher_sync_total{code="404"} 1
 | 
				
			||||||
				`,
 | 
									`,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
@@ -68,7 +68,7 @@ root_ca_cert_publisher_sync_total{code="404",namespace="test-ns"} 1
 | 
				
			|||||||
			want: `
 | 
								want: `
 | 
				
			||||||
# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
					# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
				
			||||||
# TYPE root_ca_cert_publisher_sync_total counter
 | 
					# TYPE root_ca_cert_publisher_sync_total counter
 | 
				
			||||||
root_ca_cert_publisher_sync_total{code="500",namespace="test-ns"} 1
 | 
					root_ca_cert_publisher_sync_total{code="500"} 1
 | 
				
			||||||
				`,
 | 
									`,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
@@ -80,14 +80,14 @@ root_ca_cert_publisher_sync_total{code="500",namespace="test-ns"} 1
 | 
				
			|||||||
			want: `
 | 
								want: `
 | 
				
			||||||
# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
					# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
				
			||||||
# TYPE root_ca_cert_publisher_sync_total counter
 | 
					# TYPE root_ca_cert_publisher_sync_total counter
 | 
				
			||||||
root_ca_cert_publisher_sync_total{code="500",namespace="test-ns"} 1
 | 
					root_ca_cert_publisher_sync_total{code="500"} 1
 | 
				
			||||||
				`,
 | 
									`,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for _, tc := range testCases {
 | 
						for _, tc := range testCases {
 | 
				
			||||||
		t.Run(tc.desc, func(t *testing.T) {
 | 
							t.Run(tc.desc, func(t *testing.T) {
 | 
				
			||||||
			recordMetrics(time.Now(), "test-ns", tc.err)
 | 
								recordMetrics(time.Now(), tc.err)
 | 
				
			||||||
			defer syncCounter.Reset()
 | 
								defer syncCounter.Reset()
 | 
				
			||||||
			if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tc.want), tc.metrics...); err != nil {
 | 
								if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tc.want), tc.metrics...); err != nil {
 | 
				
			||||||
				t.Fatal(err)
 | 
									t.Fatal(err)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -182,7 +182,7 @@ func (c *Publisher) processNextWorkItem() bool {
 | 
				
			|||||||
func (c *Publisher) syncNamespace(ns string) (err error) {
 | 
					func (c *Publisher) syncNamespace(ns string) (err error) {
 | 
				
			||||||
	startTime := time.Now()
 | 
						startTime := time.Now()
 | 
				
			||||||
	defer func() {
 | 
						defer func() {
 | 
				
			||||||
		recordMetrics(startTime, ns, err)
 | 
							recordMetrics(startTime, err)
 | 
				
			||||||
		klog.V(4).Infof("Finished syncing namespace %q (%v)", ns, time.Since(startTime))
 | 
							klog.V(4).Infof("Finished syncing namespace %q (%v)", ns, time.Since(startTime))
 | 
				
			||||||
	}()
 | 
						}()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user