mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Add activeDeadlineSeconds to kubeadm upgrade-health-check job
With https://github.com/kubernetes/kubernetes/pull/122079, kubeadm now relies on `ttlSecondsAfterFinished` to clean up `upgrade-health-check` once its pod reaches a terminal state. However, there is a case where the pod won't reach a terminal state and the job will not register a terminal state, hence no garbage collection. For example, if the pause image is not present, `ErrImagePull` will make the pod keep retrying to pull the image and the pod will never reach a terminal state on its own. And the job will continue to wait for the pod to reach a terminal state which will not happen. So we need to set `activeDeadlineSeconds` to prevent the job from waiting forever for the pod to reach a terminal state. Without this, users invoking `kubeadm upgrade plan` need to cleanup the job outside of kubeadm even if they ignore the preflight result because the job still runs when the result is configured to be ignored via `--ignore-prelight-errors=CreateJob` flag. Since the timeout for the polling in the `CreateJob` step in kubeadm is 15 seconds, we should set the `activeDeadlineSeconds` to the same timeout.
This commit is contained in:
		@@ -98,6 +98,7 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
 | 
				
			|||||||
		fieldSelector = "spec.unschedulable=false"
 | 
							fieldSelector = "spec.unschedulable=false"
 | 
				
			||||||
		ns            = metav1.NamespaceSystem
 | 
							ns            = metav1.NamespaceSystem
 | 
				
			||||||
		timeout       = 15 * time.Second
 | 
							timeout       = 15 * time.Second
 | 
				
			||||||
 | 
							timeoutMargin = 5 * time.Second
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
	var (
 | 
						var (
 | 
				
			||||||
		err, lastError error
 | 
							err, lastError error
 | 
				
			||||||
@@ -132,6 +133,9 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
 | 
				
			|||||||
		return nil
 | 
							return nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Adding a margin of error to the polling timeout.
 | 
				
			||||||
 | 
						timeoutWithMargin := timeout.Seconds() + timeoutMargin.Seconds()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Prepare Job
 | 
						// Prepare Job
 | 
				
			||||||
	job := &batchv1.Job{
 | 
						job := &batchv1.Job{
 | 
				
			||||||
		ObjectMeta: metav1.ObjectMeta{
 | 
							ObjectMeta: metav1.ObjectMeta{
 | 
				
			||||||
@@ -140,7 +144,8 @@ func createJob(client clientset.Interface, cfg *kubeadmapi.ClusterConfiguration)
 | 
				
			|||||||
		},
 | 
							},
 | 
				
			||||||
		Spec: batchv1.JobSpec{
 | 
							Spec: batchv1.JobSpec{
 | 
				
			||||||
			BackoffLimit:            ptr.To[int32](0),
 | 
								BackoffLimit:            ptr.To[int32](0),
 | 
				
			||||||
			TTLSecondsAfterFinished: ptr.To[int32](int32(timeout.Seconds()) + 5), // Make sure it's more than 'timeout'.
 | 
								TTLSecondsAfterFinished: ptr.To[int32](int32(timeoutWithMargin)),
 | 
				
			||||||
 | 
								ActiveDeadlineSeconds:   ptr.To[int64](int64(timeoutWithMargin)),
 | 
				
			||||||
			Template: v1.PodTemplateSpec{
 | 
								Template: v1.PodTemplateSpec{
 | 
				
			||||||
				Spec: v1.PodSpec{
 | 
									Spec: v1.PodSpec{
 | 
				
			||||||
					RestartPolicy: v1.RestartPolicyNever,
 | 
										RestartPolicy: v1.RestartPolicyNever,
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user