mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Include ignored pods when computing backoff delay for Job pod failures
This commit is contained in:
		@@ -794,7 +794,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
 | 
				
			|||||||
	active := int32(len(jobCtx.activePods))
 | 
						active := int32(len(jobCtx.activePods))
 | 
				
			||||||
	newSucceededPods, newFailedPods := getNewFinishedPods(jobCtx)
 | 
						newSucceededPods, newFailedPods := getNewFinishedPods(jobCtx)
 | 
				
			||||||
	jobCtx.succeeded = job.Status.Succeeded + int32(len(newSucceededPods)) + int32(len(jobCtx.uncounted.succeeded))
 | 
						jobCtx.succeeded = job.Status.Succeeded + int32(len(newSucceededPods)) + int32(len(jobCtx.uncounted.succeeded))
 | 
				
			||||||
	failed := job.Status.Failed + int32(len(newFailedPods)) + int32(len(jobCtx.uncounted.failed))
 | 
						failed := job.Status.Failed + int32(nonIgnoredFailedPodsCount(jobCtx, newFailedPods)) + int32(len(jobCtx.uncounted.failed))
 | 
				
			||||||
	var ready *int32
 | 
						var ready *int32
 | 
				
			||||||
	if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
 | 
						if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
 | 
				
			||||||
		ready = pointer.Int32(countReadyPods(jobCtx.activePods))
 | 
							ready = pointer.Int32(countReadyPods(jobCtx.activePods))
 | 
				
			||||||
@@ -951,6 +951,19 @@ func (jm *Controller) deleteActivePods(ctx context.Context, job *batch.Job, pods
 | 
				
			|||||||
	return successfulDeletes, errorFromChannel(errCh)
 | 
						return successfulDeletes, errorFromChannel(errCh)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func nonIgnoredFailedPodsCount(jobCtx *syncJobCtx, failedPods []*v1.Pod) int {
 | 
				
			||||||
 | 
						result := len(failedPods)
 | 
				
			||||||
 | 
						if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
 | 
				
			||||||
 | 
							for _, p := range failedPods {
 | 
				
			||||||
 | 
								_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
 | 
				
			||||||
 | 
								if !countFailed {
 | 
				
			||||||
 | 
									result--
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return result
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// deleteJobPods deletes the pods, returns the number of successful removals
 | 
					// deleteJobPods deletes the pods, returns the number of successful removals
 | 
				
			||||||
// and any error.
 | 
					// and any error.
 | 
				
			||||||
func (jm *Controller) deleteJobPods(ctx context.Context, job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) {
 | 
					func (jm *Controller) deleteJobPods(ctx context.Context, job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) {
 | 
				
			||||||
@@ -1406,15 +1419,7 @@ func getNewFinishedPods(jobCtx *syncJobCtx) (succeededPods, failedPods []*v1.Pod
 | 
				
			|||||||
		return p.Status.Phase == v1.PodSucceeded
 | 
							return p.Status.Phase == v1.PodSucceeded
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
	failedPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Failed(), func(p *v1.Pod) bool {
 | 
						failedPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Failed(), func(p *v1.Pod) bool {
 | 
				
			||||||
		if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
 | 
							return isPodFailed(p, jobCtx.job)
 | 
				
			||||||
			if !isPodFailed(p, jobCtx.job) {
 | 
					 | 
				
			||||||
				return false
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
 | 
					 | 
				
			||||||
			return countFailed
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			return isPodFailed(p, jobCtx.job)
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
	return succeededPods, failedPods
 | 
						return succeededPods, failedPods
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3019,6 +3019,53 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
 | 
				
			|||||||
			wantStatusFailed:    0,
 | 
								wantStatusFailed:    0,
 | 
				
			||||||
			wantStatusSucceeded: 0,
 | 
								wantStatusSucceeded: 0,
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
							"ignore pod failure based on OnPodConditions, ignored failures delays pod recreation": {
 | 
				
			||||||
 | 
								enableJobPodFailurePolicy: true,
 | 
				
			||||||
 | 
								job: batch.Job{
 | 
				
			||||||
 | 
									TypeMeta:   metav1.TypeMeta{Kind: "Job"},
 | 
				
			||||||
 | 
									ObjectMeta: validObjectMeta,
 | 
				
			||||||
 | 
									Spec: batch.JobSpec{
 | 
				
			||||||
 | 
										Selector:     validSelector,
 | 
				
			||||||
 | 
										Template:     validTemplate,
 | 
				
			||||||
 | 
										Parallelism:  pointer.Int32(1),
 | 
				
			||||||
 | 
										Completions:  pointer.Int32(1),
 | 
				
			||||||
 | 
										BackoffLimit: pointer.Int32(0),
 | 
				
			||||||
 | 
										PodFailurePolicy: &batch.PodFailurePolicy{
 | 
				
			||||||
 | 
											Rules: []batch.PodFailurePolicyRule{
 | 
				
			||||||
 | 
												{
 | 
				
			||||||
 | 
													Action: batch.PodFailurePolicyActionIgnore,
 | 
				
			||||||
 | 
													OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
 | 
				
			||||||
 | 
														{
 | 
				
			||||||
 | 
															Type:   v1.DisruptionTarget,
 | 
				
			||||||
 | 
															Status: v1.ConditionTrue,
 | 
				
			||||||
 | 
														},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								pods: []v1.Pod{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										ObjectMeta: metav1.ObjectMeta{
 | 
				
			||||||
 | 
											DeletionTimestamp: &now,
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										Status: v1.PodStatus{
 | 
				
			||||||
 | 
											Phase: v1.PodFailed,
 | 
				
			||||||
 | 
											Conditions: []v1.PodCondition{
 | 
				
			||||||
 | 
												{
 | 
				
			||||||
 | 
													Type:   v1.DisruptionTarget,
 | 
				
			||||||
 | 
													Status: v1.ConditionTrue,
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								wantConditions:      nil,
 | 
				
			||||||
 | 
								wantStatusActive:    0,
 | 
				
			||||||
 | 
								wantStatusFailed:    0,
 | 
				
			||||||
 | 
								wantStatusSucceeded: 0,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
		"fail job based on OnPodConditions": {
 | 
							"fail job based on OnPodConditions": {
 | 
				
			||||||
			enableJobPodFailurePolicy: true,
 | 
								enableJobPodFailurePolicy: true,
 | 
				
			||||||
			job: batch.Job{
 | 
								job: batch.Job{
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user