mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Support metrics for node shutdown
This commit is contained in:
		@@ -832,6 +832,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 | 
				
			|||||||
		ShutdownGracePeriodRequested:     kubeCfg.ShutdownGracePeriod.Duration,
 | 
							ShutdownGracePeriodRequested:     kubeCfg.ShutdownGracePeriod.Duration,
 | 
				
			||||||
		ShutdownGracePeriodCriticalPods:  kubeCfg.ShutdownGracePeriodCriticalPods.Duration,
 | 
							ShutdownGracePeriodCriticalPods:  kubeCfg.ShutdownGracePeriodCriticalPods.Duration,
 | 
				
			||||||
		ShutdownGracePeriodByPodPriority: kubeCfg.ShutdownGracePeriodByPodPriority,
 | 
							ShutdownGracePeriodByPodPriority: kubeCfg.ShutdownGracePeriodByPodPriority,
 | 
				
			||||||
 | 
							StateDirectory:                   rootDirectory,
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
	klet.shutdownManager = shutdownManager
 | 
						klet.shutdownManager = shutdownManager
 | 
				
			||||||
	klet.admitHandlers.AddPodAdmitHandler(shutdownAdmitHandler)
 | 
						klet.admitHandlers.AddPodAdmitHandler(shutdownAdmitHandler)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -462,6 +462,26 @@ var (
 | 
				
			|||||||
			StabilityLevel: metrics.ALPHA,
 | 
								StabilityLevel: metrics.ALPHA,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// GracefulShutdownStartTime is a gauge that records the time at which the kubelet started graceful shutdown.
 | 
				
			||||||
 | 
						GracefulShutdownStartTime = metrics.NewGauge(
 | 
				
			||||||
 | 
							&metrics.GaugeOpts{
 | 
				
			||||||
 | 
								Subsystem:      KubeletSubsystem,
 | 
				
			||||||
 | 
								Name:           "graceful_shutdown_start_time_seconds",
 | 
				
			||||||
 | 
								Help:           "Last graceful shutdown start time since unix epoch in seconds",
 | 
				
			||||||
 | 
								StabilityLevel: metrics.ALPHA,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// GracefulShutdownEndTime is a gauge that records the time at which the kubelet completed graceful shutdown.
 | 
				
			||||||
 | 
						GracefulShutdownEndTime = metrics.NewGauge(
 | 
				
			||||||
 | 
							&metrics.GaugeOpts{
 | 
				
			||||||
 | 
								Subsystem:      KubeletSubsystem,
 | 
				
			||||||
 | 
								Name:           "graceful_shutdown_end_time_seconds",
 | 
				
			||||||
 | 
								Help:           "Last graceful shutdown start time since unix epoch in seconds",
 | 
				
			||||||
 | 
								StabilityLevel: metrics.ALPHA,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
var registerMetrics sync.Once
 | 
					var registerMetrics sync.Once
 | 
				
			||||||
@@ -504,6 +524,13 @@ func Register(collectors ...metrics.StableCollector) {
 | 
				
			|||||||
		for _, collector := range collectors {
 | 
							for _, collector := range collectors {
 | 
				
			||||||
			legacyregistry.CustomMustRegister(collector)
 | 
								legacyregistry.CustomMustRegister(collector)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) &&
 | 
				
			||||||
 | 
								utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdownBasedOnPodPriority) {
 | 
				
			||||||
 | 
								legacyregistry.MustRegister(GracefulShutdownStartTime)
 | 
				
			||||||
 | 
								legacyregistry.MustRegister(GracefulShutdownEndTime)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -46,6 +46,7 @@ type Config struct {
 | 
				
			|||||||
	ShutdownGracePeriodRequested     time.Duration
 | 
						ShutdownGracePeriodRequested     time.Duration
 | 
				
			||||||
	ShutdownGracePeriodCriticalPods  time.Duration
 | 
						ShutdownGracePeriodCriticalPods  time.Duration
 | 
				
			||||||
	ShutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority
 | 
						ShutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority
 | 
				
			||||||
 | 
						StateDirectory                   string
 | 
				
			||||||
	Clock                            clock.Clock
 | 
						Clock                            clock.Clock
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -22,6 +22,7 @@ package nodeshutdown
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
 | 
						"path/filepath"
 | 
				
			||||||
	"sort"
 | 
						"sort"
 | 
				
			||||||
	"sync"
 | 
						"sync"
 | 
				
			||||||
	"time"
 | 
						"time"
 | 
				
			||||||
@@ -36,6 +37,7 @@ import (
 | 
				
			|||||||
	kubeletevents "k8s.io/kubernetes/pkg/kubelet/events"
 | 
						kubeletevents "k8s.io/kubernetes/pkg/kubelet/events"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/eviction"
 | 
						"k8s.io/kubernetes/pkg/kubelet/eviction"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
						"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/kubelet/metrics"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd"
 | 
						"k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/prober"
 | 
						"k8s.io/kubernetes/pkg/kubelet/prober"
 | 
				
			||||||
	"k8s.io/utils/clock"
 | 
						"k8s.io/utils/clock"
 | 
				
			||||||
@@ -47,6 +49,7 @@ const (
 | 
				
			|||||||
	nodeShutdownNotAdmittedReason  = "NodeShutdown"
 | 
						nodeShutdownNotAdmittedReason  = "NodeShutdown"
 | 
				
			||||||
	nodeShutdownNotAdmittedMessage = "Pod was rejected as the node is shutting down."
 | 
						nodeShutdownNotAdmittedMessage = "Pod was rejected as the node is shutting down."
 | 
				
			||||||
	dbusReconnectPeriod            = 1 * time.Second
 | 
						dbusReconnectPeriod            = 1 * time.Second
 | 
				
			||||||
 | 
						localStorageStateFile          = "graceful_node_shutdown_state"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
var systemDbus = func() (dbusInhibiter, error) {
 | 
					var systemDbus = func() (dbusInhibiter, error) {
 | 
				
			||||||
@@ -81,6 +84,9 @@ type managerImpl struct {
 | 
				
			|||||||
	nodeShuttingDownNow   bool
 | 
						nodeShuttingDownNow   bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	clock clock.Clock
 | 
						clock clock.Clock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						enableMetrics bool
 | 
				
			||||||
 | 
						storage       storage
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// NewManager returns a new node shutdown manager.
 | 
					// NewManager returns a new node shutdown manager.
 | 
				
			||||||
@@ -120,6 +126,10 @@ func NewManager(conf *Config) (Manager, lifecycle.PodAdmitHandler) {
 | 
				
			|||||||
		syncNodeStatus:                   conf.SyncNodeStatusFunc,
 | 
							syncNodeStatus:                   conf.SyncNodeStatusFunc,
 | 
				
			||||||
		shutdownGracePeriodByPodPriority: shutdownGracePeriodByPodPriority,
 | 
							shutdownGracePeriodByPodPriority: shutdownGracePeriodByPodPriority,
 | 
				
			||||||
		clock:                            conf.Clock,
 | 
							clock:                            conf.Clock,
 | 
				
			||||||
 | 
							enableMetrics:                    utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdownBasedOnPodPriority),
 | 
				
			||||||
 | 
							storage: localStorage{
 | 
				
			||||||
 | 
								Path: filepath.Join(conf.StateDirectory, localStorageStateFile),
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	klog.InfoS("Creating node shutdown manager",
 | 
						klog.InfoS("Creating node shutdown manager",
 | 
				
			||||||
		"shutdownGracePeriodRequested", conf.ShutdownGracePeriodRequested,
 | 
							"shutdownGracePeriodRequested", conf.ShutdownGracePeriodRequested,
 | 
				
			||||||
@@ -143,6 +153,24 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
 | 
				
			|||||||
	return lifecycle.PodAdmitResult{Admit: true}
 | 
						return lifecycle.PodAdmitResult{Admit: true}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// setMetrics sets the metrics for the node shutdown manager.
 | 
				
			||||||
 | 
					func (m *managerImpl) setMetrics() {
 | 
				
			||||||
 | 
						if m.enableMetrics && m.storage != nil {
 | 
				
			||||||
 | 
							sta := state{}
 | 
				
			||||||
 | 
							err := m.storage.Load(&sta)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								klog.ErrorS(err, "Failed to load graceful shutdown state")
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								if !sta.StartTime.IsZero() {
 | 
				
			||||||
 | 
									metrics.GracefulShutdownStartTime.Set(timestamp(sta.StartTime))
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								if !sta.EndTime.IsZero() {
 | 
				
			||||||
 | 
									metrics.GracefulShutdownEndTime.Set(timestamp(sta.EndTime))
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Start starts the node shutdown manager and will start watching the node for shutdown events.
 | 
					// Start starts the node shutdown manager and will start watching the node for shutdown events.
 | 
				
			||||||
func (m *managerImpl) Start() error {
 | 
					func (m *managerImpl) Start() error {
 | 
				
			||||||
	stop, err := m.start()
 | 
						stop, err := m.start()
 | 
				
			||||||
@@ -163,6 +191,8 @@ func (m *managerImpl) Start() error {
 | 
				
			|||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}()
 | 
						}()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						m.setMetrics()
 | 
				
			||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -289,6 +319,32 @@ func (m *managerImpl) processShutdownEvent() error {
 | 
				
			|||||||
	klog.V(1).InfoS("Shutdown manager processing shutdown event")
 | 
						klog.V(1).InfoS("Shutdown manager processing shutdown event")
 | 
				
			||||||
	activePods := m.getPods()
 | 
						activePods := m.getPods()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						defer func() {
 | 
				
			||||||
 | 
							m.dbusCon.ReleaseInhibitLock(m.inhibitLock)
 | 
				
			||||||
 | 
							klog.V(1).InfoS("Shutdown manager completed processing shutdown event, node will shutdown shortly")
 | 
				
			||||||
 | 
						}()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if m.enableMetrics && m.storage != nil {
 | 
				
			||||||
 | 
							startTime := time.Now()
 | 
				
			||||||
 | 
							err := m.storage.Store(state{
 | 
				
			||||||
 | 
								StartTime: startTime,
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								klog.ErrorS(err, "Failed to store graceful shutdown state")
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							defer func() {
 | 
				
			||||||
 | 
								endTime := time.Now()
 | 
				
			||||||
 | 
								err := m.storage.Store(state{
 | 
				
			||||||
 | 
									StartTime: startTime,
 | 
				
			||||||
 | 
									EndTime:   endTime,
 | 
				
			||||||
 | 
								})
 | 
				
			||||||
 | 
								if err != nil {
 | 
				
			||||||
 | 
									klog.ErrorS(err, "Failed to store graceful shutdown state")
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}()
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	groups := groupByPriority(m.shutdownGracePeriodByPodPriority, activePods)
 | 
						groups := groupByPriority(m.shutdownGracePeriodByPodPriority, activePods)
 | 
				
			||||||
	for _, group := range groups {
 | 
						for _, group := range groups {
 | 
				
			||||||
		// If there are no pods in a particular range,
 | 
							// If there are no pods in a particular range,
 | 
				
			||||||
@@ -347,9 +403,6 @@ func (m *managerImpl) processShutdownEvent() error {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	m.dbusCon.ReleaseInhibitLock(m.inhibitLock)
 | 
					 | 
				
			||||||
	klog.V(1).InfoS("Shutdown manager completed processing shutdown event, node will shutdown shortly")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										63
									
								
								pkg/kubelet/nodeshutdown/storage.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								pkg/kubelet/nodeshutdown/storage.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,63 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2022 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package nodeshutdown
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"encoding/json"
 | 
				
			||||||
 | 
						"os"
 | 
				
			||||||
 | 
						"time"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type storage interface {
 | 
				
			||||||
 | 
						Store(data interface{}) (err error)
 | 
				
			||||||
 | 
						Load(data interface{}) (err error)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type localStorage struct {
 | 
				
			||||||
 | 
						Path string
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (l localStorage) Store(data interface{}) (err error) {
 | 
				
			||||||
 | 
						b, err := json.Marshal(data)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return os.WriteFile(l.Path, b, 0644)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (l localStorage) Load(data interface{}) (err error) {
 | 
				
			||||||
 | 
						b, err := os.ReadFile(l.Path)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							if os.IsNotExist(err) {
 | 
				
			||||||
 | 
								return nil
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return json.Unmarshal(b, data)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func timestamp(t time.Time) float64 {
 | 
				
			||||||
 | 
						if t.IsZero() {
 | 
				
			||||||
 | 
							return 0
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return float64(t.Unix())
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type state struct {
 | 
				
			||||||
 | 
						StartTime time.Time `json:"startTime"`
 | 
				
			||||||
 | 
						EndTime   time.Time `json:"endTime"`
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user