mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	rootcacertpublisher: drop the namespace label from metrics to reduce its cardinality
The `root_ca_cert_publisher_sync_duration_seconds` metric tracks the sync duration in the root CA cert publisher per code and namespace. In clusters with a high namespace turnover (like CI clusters), this may cause the kube-controller-manager to expose over 100k series to Prometheus, which may cause degradation of that service. Drop the `namespace` label to remove the metrics' cardinality, tracking this metric by namespace does not justify the impact of keeping it.
This commit is contained in:
		@@ -37,7 +37,7 @@ var (
 | 
			
		||||
			Help:           "Number of namespace syncs happened in root ca cert publisher.",
 | 
			
		||||
			StabilityLevel: metrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
		[]string{"namespace", "code"},
 | 
			
		||||
		[]string{"code"},
 | 
			
		||||
	)
 | 
			
		||||
	syncLatency = metrics.NewHistogramVec(
 | 
			
		||||
		&metrics.HistogramOpts{
 | 
			
		||||
@@ -47,19 +47,19 @@ var (
 | 
			
		||||
			Buckets:        metrics.ExponentialBuckets(0.001, 2, 15),
 | 
			
		||||
			StabilityLevel: metrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
		[]string{"namespace", "code"},
 | 
			
		||||
		[]string{"code"},
 | 
			
		||||
	)
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func recordMetrics(start time.Time, ns string, err error) {
 | 
			
		||||
func recordMetrics(start time.Time, err error) {
 | 
			
		||||
	code := "500"
 | 
			
		||||
	if err == nil {
 | 
			
		||||
		code = "200"
 | 
			
		||||
	} else if se, ok := err.(*apierrors.StatusError); ok && se.Status().Code != 0 {
 | 
			
		||||
		code = strconv.Itoa(int(se.Status().Code))
 | 
			
		||||
	}
 | 
			
		||||
	syncLatency.WithLabelValues(ns, code).Observe(time.Since(start).Seconds())
 | 
			
		||||
	syncCounter.WithLabelValues(ns, code).Inc()
 | 
			
		||||
	syncLatency.WithLabelValues(code).Observe(time.Since(start).Seconds())
 | 
			
		||||
	syncCounter.WithLabelValues(code).Inc()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var once sync.Once
 | 
			
		||||
 
 | 
			
		||||
@@ -44,7 +44,7 @@ func TestSyncCounter(t *testing.T) {
 | 
			
		||||
			want: `
 | 
			
		||||
# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
			
		||||
# TYPE root_ca_cert_publisher_sync_total counter
 | 
			
		||||
root_ca_cert_publisher_sync_total{code="200",namespace="test-ns"} 1
 | 
			
		||||
root_ca_cert_publisher_sync_total{code="200"} 1
 | 
			
		||||
				`,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
@@ -56,7 +56,7 @@ root_ca_cert_publisher_sync_total{code="200",namespace="test-ns"} 1
 | 
			
		||||
			want: `
 | 
			
		||||
# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
			
		||||
# TYPE root_ca_cert_publisher_sync_total counter
 | 
			
		||||
root_ca_cert_publisher_sync_total{code="404",namespace="test-ns"} 1
 | 
			
		||||
root_ca_cert_publisher_sync_total{code="404"} 1
 | 
			
		||||
				`,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
@@ -68,7 +68,7 @@ root_ca_cert_publisher_sync_total{code="404",namespace="test-ns"} 1
 | 
			
		||||
			want: `
 | 
			
		||||
# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
			
		||||
# TYPE root_ca_cert_publisher_sync_total counter
 | 
			
		||||
root_ca_cert_publisher_sync_total{code="500",namespace="test-ns"} 1
 | 
			
		||||
root_ca_cert_publisher_sync_total{code="500"} 1
 | 
			
		||||
				`,
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
@@ -80,14 +80,14 @@ root_ca_cert_publisher_sync_total{code="500",namespace="test-ns"} 1
 | 
			
		||||
			want: `
 | 
			
		||||
# HELP root_ca_cert_publisher_sync_total [ALPHA] Number of namespace syncs happened in root ca cert publisher.
 | 
			
		||||
# TYPE root_ca_cert_publisher_sync_total counter
 | 
			
		||||
root_ca_cert_publisher_sync_total{code="500",namespace="test-ns"} 1
 | 
			
		||||
root_ca_cert_publisher_sync_total{code="500"} 1
 | 
			
		||||
				`,
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, tc := range testCases {
 | 
			
		||||
		t.Run(tc.desc, func(t *testing.T) {
 | 
			
		||||
			recordMetrics(time.Now(), "test-ns", tc.err)
 | 
			
		||||
			recordMetrics(time.Now(), tc.err)
 | 
			
		||||
			defer syncCounter.Reset()
 | 
			
		||||
			if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tc.want), tc.metrics...); err != nil {
 | 
			
		||||
				t.Fatal(err)
 | 
			
		||||
 
 | 
			
		||||
@@ -182,7 +182,7 @@ func (c *Publisher) processNextWorkItem() bool {
 | 
			
		||||
func (c *Publisher) syncNamespace(ns string) (err error) {
 | 
			
		||||
	startTime := time.Now()
 | 
			
		||||
	defer func() {
 | 
			
		||||
		recordMetrics(startTime, ns, err)
 | 
			
		||||
		recordMetrics(startTime, err)
 | 
			
		||||
		klog.V(4).Infof("Finished syncing namespace %q (%v)", ns, time.Since(startTime))
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user