mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 12:18:16 +00:00 
			
		
		
		
	Include ignored pods when computing backoff delay for Job pod failures
This commit is contained in:
		@@ -794,7 +794,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
 | 
			
		||||
	active := int32(len(jobCtx.activePods))
 | 
			
		||||
	newSucceededPods, newFailedPods := getNewFinishedPods(jobCtx)
 | 
			
		||||
	jobCtx.succeeded = job.Status.Succeeded + int32(len(newSucceededPods)) + int32(len(jobCtx.uncounted.succeeded))
 | 
			
		||||
	failed := job.Status.Failed + int32(len(newFailedPods)) + int32(len(jobCtx.uncounted.failed))
 | 
			
		||||
	failed := job.Status.Failed + int32(nonIgnoredFailedPodsCount(jobCtx, newFailedPods)) + int32(len(jobCtx.uncounted.failed))
 | 
			
		||||
	var ready *int32
 | 
			
		||||
	if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
 | 
			
		||||
		ready = pointer.Int32(countReadyPods(jobCtx.activePods))
 | 
			
		||||
@@ -951,6 +951,19 @@ func (jm *Controller) deleteActivePods(ctx context.Context, job *batch.Job, pods
 | 
			
		||||
	return successfulDeletes, errorFromChannel(errCh)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func nonIgnoredFailedPodsCount(jobCtx *syncJobCtx, failedPods []*v1.Pod) int {
 | 
			
		||||
	result := len(failedPods)
 | 
			
		||||
	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
 | 
			
		||||
		for _, p := range failedPods {
 | 
			
		||||
			_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
 | 
			
		||||
			if !countFailed {
 | 
			
		||||
				result--
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return result
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// deleteJobPods deletes the pods, returns the number of successful removals
 | 
			
		||||
// and any error.
 | 
			
		||||
func (jm *Controller) deleteJobPods(ctx context.Context, job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) {
 | 
			
		||||
@@ -1406,15 +1419,7 @@ func getNewFinishedPods(jobCtx *syncJobCtx) (succeededPods, failedPods []*v1.Pod
 | 
			
		||||
		return p.Status.Phase == v1.PodSucceeded
 | 
			
		||||
	})
 | 
			
		||||
	failedPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Failed(), func(p *v1.Pod) bool {
 | 
			
		||||
		if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
 | 
			
		||||
			if !isPodFailed(p, jobCtx.job) {
 | 
			
		||||
				return false
 | 
			
		||||
			}
 | 
			
		||||
			_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
 | 
			
		||||
			return countFailed
 | 
			
		||||
		} else {
 | 
			
		||||
		return isPodFailed(p, jobCtx.job)
 | 
			
		||||
		}
 | 
			
		||||
	})
 | 
			
		||||
	return succeededPods, failedPods
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -3019,6 +3019,53 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
 | 
			
		||||
			wantStatusFailed:    0,
 | 
			
		||||
			wantStatusSucceeded: 0,
 | 
			
		||||
		},
 | 
			
		||||
		"ignore pod failure based on OnPodConditions, ignored failures delays pod recreation": {
 | 
			
		||||
			enableJobPodFailurePolicy: true,
 | 
			
		||||
			job: batch.Job{
 | 
			
		||||
				TypeMeta:   metav1.TypeMeta{Kind: "Job"},
 | 
			
		||||
				ObjectMeta: validObjectMeta,
 | 
			
		||||
				Spec: batch.JobSpec{
 | 
			
		||||
					Selector:     validSelector,
 | 
			
		||||
					Template:     validTemplate,
 | 
			
		||||
					Parallelism:  pointer.Int32(1),
 | 
			
		||||
					Completions:  pointer.Int32(1),
 | 
			
		||||
					BackoffLimit: pointer.Int32(0),
 | 
			
		||||
					PodFailurePolicy: &batch.PodFailurePolicy{
 | 
			
		||||
						Rules: []batch.PodFailurePolicyRule{
 | 
			
		||||
							{
 | 
			
		||||
								Action: batch.PodFailurePolicyActionIgnore,
 | 
			
		||||
								OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
 | 
			
		||||
									{
 | 
			
		||||
										Type:   v1.DisruptionTarget,
 | 
			
		||||
										Status: v1.ConditionTrue,
 | 
			
		||||
									},
 | 
			
		||||
								},
 | 
			
		||||
							},
 | 
			
		||||
						},
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			pods: []v1.Pod{
 | 
			
		||||
				{
 | 
			
		||||
					ObjectMeta: metav1.ObjectMeta{
 | 
			
		||||
						DeletionTimestamp: &now,
 | 
			
		||||
					},
 | 
			
		||||
					Status: v1.PodStatus{
 | 
			
		||||
						Phase: v1.PodFailed,
 | 
			
		||||
						Conditions: []v1.PodCondition{
 | 
			
		||||
							{
 | 
			
		||||
								Type:   v1.DisruptionTarget,
 | 
			
		||||
								Status: v1.ConditionTrue,
 | 
			
		||||
							},
 | 
			
		||||
						},
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			wantConditions:      nil,
 | 
			
		||||
			wantStatusActive:    0,
 | 
			
		||||
			wantStatusFailed:    0,
 | 
			
		||||
			wantStatusSucceeded: 0,
 | 
			
		||||
		},
 | 
			
		||||
		"fail job based on OnPodConditions": {
 | 
			
		||||
			enableJobPodFailurePolicy: true,
 | 
			
		||||
			job: batch.Job{
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user