mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Merge pull request #86251 from bboreham/pleg-last-seen-metric
Kubelet: add a metric to observe time since PLEG last seen
This commit is contained in:
		@@ -44,6 +44,7 @@ const (
 | 
				
			|||||||
	PLEGRelistDurationKey                = "pleg_relist_duration_seconds"
 | 
						PLEGRelistDurationKey                = "pleg_relist_duration_seconds"
 | 
				
			||||||
	PLEGDiscardEventsKey                 = "pleg_discard_events"
 | 
						PLEGDiscardEventsKey                 = "pleg_discard_events"
 | 
				
			||||||
	PLEGRelistIntervalKey                = "pleg_relist_interval_seconds"
 | 
						PLEGRelistIntervalKey                = "pleg_relist_interval_seconds"
 | 
				
			||||||
 | 
						PLEGLastSeenKey                      = "pleg_last_seen_seconds"
 | 
				
			||||||
	EvictionsKey                         = "evictions"
 | 
						EvictionsKey                         = "evictions"
 | 
				
			||||||
	EvictionStatsAgeKey                  = "eviction_stats_age_seconds"
 | 
						EvictionStatsAgeKey                  = "eviction_stats_age_seconds"
 | 
				
			||||||
	PreemptionsKey                       = "preemptions"
 | 
						PreemptionsKey                       = "preemptions"
 | 
				
			||||||
@@ -186,6 +187,16 @@ var (
 | 
				
			|||||||
			StabilityLevel: metrics.ALPHA,
 | 
								StabilityLevel: metrics.ALPHA,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
 | 
						// PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's
 | 
				
			||||||
 | 
						// Pod Lifecycle Event Generator (PLEG) was last seen active.
 | 
				
			||||||
 | 
						PLEGLastSeen = metrics.NewGauge(
 | 
				
			||||||
 | 
							&metrics.GaugeOpts{
 | 
				
			||||||
 | 
								Subsystem:      KubeletSubsystem,
 | 
				
			||||||
 | 
								Name:           PLEGLastSeenKey,
 | 
				
			||||||
 | 
								Help:           "Timestamp in seconds when PLEG was last seen active.",
 | 
				
			||||||
 | 
								StabilityLevel: metrics.ALPHA,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
	// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
 | 
						// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
 | 
				
			||||||
	// Broken down by operation type.
 | 
						// Broken down by operation type.
 | 
				
			||||||
	RuntimeOperations = metrics.NewCounterVec(
 | 
						RuntimeOperations = metrics.NewCounterVec(
 | 
				
			||||||
@@ -522,6 +533,7 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...metrics.S
 | 
				
			|||||||
		legacyregistry.MustRegister(PLEGRelistDuration)
 | 
							legacyregistry.MustRegister(PLEGRelistDuration)
 | 
				
			||||||
		legacyregistry.MustRegister(PLEGDiscardEvents)
 | 
							legacyregistry.MustRegister(PLEGDiscardEvents)
 | 
				
			||||||
		legacyregistry.MustRegister(PLEGRelistInterval)
 | 
							legacyregistry.MustRegister(PLEGRelistInterval)
 | 
				
			||||||
 | 
							legacyregistry.MustRegister(PLEGLastSeen)
 | 
				
			||||||
		legacyregistry.MustRegister(RuntimeOperations)
 | 
							legacyregistry.MustRegister(RuntimeOperations)
 | 
				
			||||||
		legacyregistry.MustRegister(RuntimeOperationsDuration)
 | 
							legacyregistry.MustRegister(RuntimeOperationsDuration)
 | 
				
			||||||
		legacyregistry.MustRegister(RuntimeOperationsErrors)
 | 
							legacyregistry.MustRegister(RuntimeOperationsErrors)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -138,6 +138,8 @@ func (g *GenericPLEG) Healthy() (bool, error) {
 | 
				
			|||||||
	if relistTime.IsZero() {
 | 
						if relistTime.IsZero() {
 | 
				
			||||||
		return false, fmt.Errorf("pleg has yet to be successful")
 | 
							return false, fmt.Errorf("pleg has yet to be successful")
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						// Expose as metric so you can alert on `time()-pleg_last_seen_seconds > nn`
 | 
				
			||||||
 | 
						metrics.PLEGLastSeen.Set(float64(relistTime.Unix()))
 | 
				
			||||||
	elapsed := g.clock.Since(relistTime)
 | 
						elapsed := g.clock.Since(relistTime)
 | 
				
			||||||
	if elapsed > relistThreshold {
 | 
						if elapsed > relistThreshold {
 | 
				
			||||||
		return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold)
 | 
							return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user