mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Merge pull request #126287 from devppratik/121793-update-node-monitor-grace-period
node: Update Node Monitor Grace Period default duration to 50s
This commit is contained in:
		@@ -41,7 +41,8 @@ func (o *NodeLifecycleControllerOptions) AddFlags(fs *pflag.FlagSet) {
 | 
			
		||||
	fs.DurationVar(&o.NodeMonitorGracePeriod.Duration, "node-monitor-grace-period", o.NodeMonitorGracePeriod.Duration,
 | 
			
		||||
		"Amount of time which we allow running Node to be unresponsive before marking it unhealthy. "+
 | 
			
		||||
			"Must be N times more than kubelet's nodeStatusUpdateFrequency, "+
 | 
			
		||||
			"where N means number of retries allowed for kubelet to post node status.")
 | 
			
		||||
			"where N means number of retries allowed for kubelet to post node status. "+
 | 
			
		||||
			"This value should also be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS")
 | 
			
		||||
	fs.Float32Var(&o.NodeEvictionRate, "node-eviction-rate", 0.1, "Number of nodes per second on which pods are deleted in case of node failure when a zone is healthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters.")
 | 
			
		||||
	fs.Float32Var(&o.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.")
 | 
			
		||||
	fs.Int32Var(&o.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, fmt.Sprintf("Number of nodes from which %s treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller. Notice: If nodes reside in multiple zones, this threshold will be considered as zone node size threshold for each zone to determine node eviction rate independently.", names.NodeLifecycleController))
 | 
			
		||||
 
 | 
			
		||||
@@ -32,7 +32,8 @@ type NodeLifecycleControllerConfiguration struct {
 | 
			
		||||
	// NodeMonitorGracePeriod is the amount of time which we allow a running node to be
 | 
			
		||||
	// unresponsive before marking it unhealthy. Must be N times more than kubelet's
 | 
			
		||||
	// nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet
 | 
			
		||||
	// to post node status.
 | 
			
		||||
	// to post node status. This value should also be greater than the sum of
 | 
			
		||||
	// HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.
 | 
			
		||||
	NodeMonitorGracePeriod metav1.Duration
 | 
			
		||||
	// secondaryNodeEvictionRate is implicitly overridden to 0 for clusters smaller than or equal to largeClusterSizeThreshold
 | 
			
		||||
	LargeClusterSizeThreshold int32
 | 
			
		||||
 
 | 
			
		||||
@@ -37,8 +37,13 @@ func RecommendedDefaultNodeLifecycleControllerConfiguration(obj *kubectrlmgrconf
 | 
			
		||||
	if obj.PodEvictionTimeout == zero {
 | 
			
		||||
		obj.PodEvictionTimeout = metav1.Duration{Duration: 5 * time.Minute}
 | 
			
		||||
	}
 | 
			
		||||
	// NodeMonitorGracePeriod is set to a default value of 50 seconds.
 | 
			
		||||
	// This value should be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS (30s)
 | 
			
		||||
	// and HTTP2_READ_IDLE_TIMEOUT_SECONDS (15s) from the http2 health check
 | 
			
		||||
	// to ensure that the server has adequate time to handle slow or idle connections
 | 
			
		||||
	// properly before marking a node as unhealthy.
 | 
			
		||||
	if obj.NodeMonitorGracePeriod == zero {
 | 
			
		||||
		obj.NodeMonitorGracePeriod = metav1.Duration{Duration: 40 * time.Second}
 | 
			
		||||
		obj.NodeMonitorGracePeriod = metav1.Duration{Duration: 50 * time.Second}
 | 
			
		||||
	}
 | 
			
		||||
	if obj.NodeStartupGracePeriod == zero {
 | 
			
		||||
		obj.NodeStartupGracePeriod = metav1.Duration{Duration: 60 * time.Second}
 | 
			
		||||
 
 | 
			
		||||
@@ -284,7 +284,11 @@ type Controller struct {
 | 
			
		||||
	//    be less than the node health signal update frequency, since there will
 | 
			
		||||
	//    only be fresh values from Kubelet at an interval of node health signal
 | 
			
		||||
	//    update frequency.
 | 
			
		||||
	// 2. nodeMonitorGracePeriod can't be too large for user experience - larger
 | 
			
		||||
	// 2. nodeMonitorGracePeriod should be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS (30s)
 | 
			
		||||
	// 	  and HTTP2_READ_IDLE_TIMEOUT_SECONDS (15s) from the http2 health check
 | 
			
		||||
	// 	  to ensure that the server has adequate time to handle slow or idle connections
 | 
			
		||||
	//    properly before marking a node as unhealthy.
 | 
			
		||||
	// 3. nodeMonitorGracePeriod can't be too large for user experience - larger
 | 
			
		||||
	//    value takes longer for user to see up-to-date node health.
 | 
			
		||||
	nodeMonitorGracePeriod time.Duration
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -52,7 +52,7 @@ import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	testNodeMonitorGracePeriod = 40 * time.Second
 | 
			
		||||
	testNodeMonitorGracePeriod = 50 * time.Second
 | 
			
		||||
	testNodeStartupGracePeriod = 60 * time.Second
 | 
			
		||||
	testNodeMonitorPeriod      = 5 * time.Second
 | 
			
		||||
	testRateLimiterQPS         = float32(100000)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								pkg/generated/openapi/zz_generated.openapi.go
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										2
									
								
								pkg/generated/openapi/zz_generated.openapi.go
									
									
									
										generated
									
									
									
								
							@@ -59237,7 +59237,7 @@ func schema_k8sio_kube_controller_manager_config_v1alpha1_NodeLifecycleControlle
 | 
			
		||||
					},
 | 
			
		||||
					"NodeMonitorGracePeriod": {
 | 
			
		||||
						SchemaProps: spec.SchemaProps{
 | 
			
		||||
							Description: "nodeMontiorGracePeriod is the amount of time which we allow a running node to be unresponsive before marking it unhealthy. Must be N times more than kubelet's nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet to post node status.",
 | 
			
		||||
							Description: "nodeMontiorGracePeriod is the amount of time which we allow a running node to be unresponsive before marking it unhealthy. Must be N times more than kubelet's nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet to post node status. This value should also be greater than the sum of HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.",
 | 
			
		||||
							Ref:         ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"),
 | 
			
		||||
						},
 | 
			
		||||
					},
 | 
			
		||||
 
 | 
			
		||||
@@ -403,7 +403,8 @@ type NodeLifecycleControllerConfiguration struct {
 | 
			
		||||
	// nodeMontiorGracePeriod is the amount of time which we allow a running node to be
 | 
			
		||||
	// unresponsive before marking it unhealthy. Must be N times more than kubelet's
 | 
			
		||||
	// nodeStatusUpdateFrequency, where N means number of retries allowed for kubelet
 | 
			
		||||
	// to post node status.
 | 
			
		||||
	// to post node status. This value should also be greater than the sum of
 | 
			
		||||
	// HTTP2_PING_TIMEOUT_SECONDS and HTTP2_READ_IDLE_TIMEOUT_SECONDS.
 | 
			
		||||
	NodeMonitorGracePeriod metav1.Duration
 | 
			
		||||
	// podEvictionTimeout is the grace period for deleting pods on failed nodes.
 | 
			
		||||
	PodEvictionTimeout metav1.Duration
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user