mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Add Job e2e for tracking failure count per index (#130390)
* Add Job e2e for tracking failure count per index * Review remarks
This commit is contained in:
		@@ -660,6 +660,57 @@ done`}
 | 
			
		||||
		gomega.Expect(job.Status.Failed).Should(gomega.Equal(int32(1)))
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
		Testname: Track the failure count per index in Pod annotation when backoffLimitPerIndex is used
 | 
			
		||||
		Description: Create an indexed job and ensure that the Pods are
 | 
			
		||||
		re-created with the failure-count Pod annotation set properly to
 | 
			
		||||
		indicate the number of so-far failures per index.
 | 
			
		||||
	*/
 | 
			
		||||
	ginkgo.It("should record the failure-count in the Pod annotation when using backoffLimitPerIndex", func(ctx context.Context) {
 | 
			
		||||
		jobName := "e2e-backofflimitperindex-" + utilrand.String(5)
 | 
			
		||||
		label := map[string]string{batchv1.JobNameLabel: jobName}
 | 
			
		||||
		labelSelector := labels.SelectorFromSet(label).String()
 | 
			
		||||
 | 
			
		||||
		parallelism := int32(2)
 | 
			
		||||
		completions := int32(2)
 | 
			
		||||
		backoffLimit := int32(6) // default value
 | 
			
		||||
 | 
			
		||||
		job := e2ejob.NewTestJob("fail", jobName, v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit)
 | 
			
		||||
		job.Spec.BackoffLimit = nil
 | 
			
		||||
		job.Spec.BackoffLimitPerIndex = ptr.To[int32](1)
 | 
			
		||||
		job.Spec.CompletionMode = ptr.To(batchv1.IndexedCompletion)
 | 
			
		||||
 | 
			
		||||
		tracker := NewIndexedPodAnnotationTracker(jobName, f.Namespace.Name, labelSelector, batchv1.JobCompletionIndexAnnotation, batchv1.JobIndexFailureCountAnnotation)
 | 
			
		||||
		trackerCancel := tracker.Start(ctx, f.ClientSet)
 | 
			
		||||
		ginkgo.DeferCleanup(trackerCancel)
 | 
			
		||||
 | 
			
		||||
		ginkgo.By("Creating an indexed job with backoffLimit per index and failing pods")
 | 
			
		||||
		job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
 | 
			
		||||
		framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
 | 
			
		||||
 | 
			
		||||
		ginkgo.By("Awaiting for the job to fail as there are failed indexes")
 | 
			
		||||
		err = e2ejob.WaitForJobFailed(ctx, f.ClientSet, f.Namespace.Name, job.Name)
 | 
			
		||||
		framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
 | 
			
		||||
 | 
			
		||||
		ginkgo.By("Verify the failure-count annotation on Pods")
 | 
			
		||||
		// Since the Job is already failed all the relevant Pod events are
 | 
			
		||||
		// already being distributed. Still, there might be a little bit of lag
 | 
			
		||||
		// between the events being receiced by the Job controller and the test
 | 
			
		||||
		// code so we need to wait a little bit.
 | 
			
		||||
		gomega.Eventually(ctx, tracker.cloneTrackedAnnotations).
 | 
			
		||||
			WithTimeout(15 * time.Second).
 | 
			
		||||
			WithPolling(500 * time.Millisecond).
 | 
			
		||||
			Should(gomega.Equal(map[int][]string{0: {"0", "1"}, 1: {"0", "1"}}))
 | 
			
		||||
 | 
			
		||||
		ginkgo.By("Verifying the Job status fields")
 | 
			
		||||
		job, err = e2ejob.GetJob(ctx, f.ClientSet, f.Namespace.Name, job.Name)
 | 
			
		||||
		framework.ExpectNoError(err, "failed to retrieve latest job object")
 | 
			
		||||
		gomega.Expect(job.Status.FailedIndexes).Should(gomega.HaveValue(gomega.Equal("0,1")))
 | 
			
		||||
		gomega.Expect(job.Status.CompletedIndexes).Should(gomega.Equal(""))
 | 
			
		||||
		gomega.Expect(job.Status.Failed).Should(gomega.Equal(int32(4)))
 | 
			
		||||
		gomega.Expect(job.Status.Succeeded).Should(gomega.Equal(int32(0)))
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
		Testcase: Mark indexes as failed when the FailIndex action is matched in podFailurePolicy
 | 
			
		||||
		Description: Create an indexed job with backoffLimitPerIndex, and podFailurePolicy
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										114
									
								
								test/e2e/apps/util.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								test/e2e/apps/util.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,114 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2025 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package apps
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"maps"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"sync"
 | 
			
		||||
 | 
			
		||||
	"github.com/onsi/ginkgo/v2"
 | 
			
		||||
	v1 "k8s.io/api/core/v1"
 | 
			
		||||
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/runtime"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/watch"
 | 
			
		||||
	clientset "k8s.io/client-go/kubernetes"
 | 
			
		||||
	"k8s.io/client-go/tools/cache"
 | 
			
		||||
	"k8s.io/klog/v2"
 | 
			
		||||
	"k8s.io/kubernetes/test/e2e/framework"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type IndexedPodAnnotationTracker struct {
 | 
			
		||||
	sync.Mutex
 | 
			
		||||
	ownerName            string
 | 
			
		||||
	ownerNs              string
 | 
			
		||||
	labelSelector        string
 | 
			
		||||
	podIndexAnnotation   string
 | 
			
		||||
	podTrackedAnnotation string
 | 
			
		||||
	trackedAnnotations   map[int][]string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewIndexedPodAnnotationTracker(ownerName, ownerNs, labelSelector, podIndexAnnotation, podTrackedAnnotation string) *IndexedPodAnnotationTracker {
 | 
			
		||||
	return &IndexedPodAnnotationTracker{
 | 
			
		||||
		ownerName:            ownerName,
 | 
			
		||||
		ownerNs:              ownerNs,
 | 
			
		||||
		labelSelector:        labelSelector,
 | 
			
		||||
		podIndexAnnotation:   podIndexAnnotation,
 | 
			
		||||
		podTrackedAnnotation: podTrackedAnnotation,
 | 
			
		||||
		trackedAnnotations:   make(map[int][]string),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (t *IndexedPodAnnotationTracker) Start(ctx context.Context, c clientset.Interface) context.CancelFunc {
 | 
			
		||||
	trackerCtx, trackerCancel := context.WithCancel(ctx)
 | 
			
		||||
	_, podTracker := cache.NewInformerWithOptions(cache.InformerOptions{
 | 
			
		||||
		ListerWatcher: &cache.ListWatch{
 | 
			
		||||
			ListWithContextFunc: func(ctx context.Context, options metav1.ListOptions) (runtime.Object, error) {
 | 
			
		||||
				options.LabelSelector = t.labelSelector
 | 
			
		||||
				obj, err := c.CoreV1().Pods(t.ownerNs).List(ctx, options)
 | 
			
		||||
				return runtime.Object(obj), err
 | 
			
		||||
			},
 | 
			
		||||
			WatchFuncWithContext: func(ctx context.Context, options metav1.ListOptions) (watch.Interface, error) {
 | 
			
		||||
				options.LabelSelector = t.labelSelector
 | 
			
		||||
				return c.CoreV1().Pods(t.ownerNs).Watch(ctx, options)
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
		ObjectType: &v1.Pod{},
 | 
			
		||||
		Handler: cache.ResourceEventHandlerFuncs{
 | 
			
		||||
			AddFunc: func(obj interface{}) {
 | 
			
		||||
				defer ginkgo.GinkgoRecover()
 | 
			
		||||
				if pod, ok := obj.(*v1.Pod); ok {
 | 
			
		||||
					framework.Logf("Observed event for Pod %q with index=%v, annotation value=%v",
 | 
			
		||||
						klog.KObj(pod), pod.Annotations[t.podIndexAnnotation], pod.Annotations[t.podTrackedAnnotation])
 | 
			
		||||
					podIndex, err := strconv.Atoi(pod.Annotations[t.podIndexAnnotation])
 | 
			
		||||
					if err != nil {
 | 
			
		||||
						framework.Failf("failed to parse pod index for Pod %q: %v", klog.KObj(pod), err.Error())
 | 
			
		||||
					} else {
 | 
			
		||||
						t.Lock()
 | 
			
		||||
						defer t.Unlock()
 | 
			
		||||
						t.trackedAnnotations[podIndex] = append(t.trackedAnnotations[podIndex], pod.Annotations[t.podTrackedAnnotation])
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			},
 | 
			
		||||
			UpdateFunc: func(old, new interface{}) {
 | 
			
		||||
				defer ginkgo.GinkgoRecover()
 | 
			
		||||
				oldPod, oldOk := old.(*v1.Pod)
 | 
			
		||||
				newPod, newOk := new.(*v1.Pod)
 | 
			
		||||
				if !oldOk || !newOk {
 | 
			
		||||
					return
 | 
			
		||||
				}
 | 
			
		||||
				if oldPod.Annotations[t.podTrackedAnnotation] != newPod.Annotations[t.podTrackedAnnotation] {
 | 
			
		||||
					framework.Failf("Unexepected mutation of the annotation %q for Pod %q, old=%q, new=%q",
 | 
			
		||||
						t.podTrackedAnnotation,
 | 
			
		||||
						klog.KObj(newPod),
 | 
			
		||||
						oldPod.Annotations[t.podTrackedAnnotation],
 | 
			
		||||
						newPod.Annotations[t.podTrackedAnnotation],
 | 
			
		||||
					)
 | 
			
		||||
				}
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	})
 | 
			
		||||
	go podTracker.RunWithContext(trackerCtx)
 | 
			
		||||
	return trackerCancel
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (t *IndexedPodAnnotationTracker) cloneTrackedAnnotations() map[int][]string {
 | 
			
		||||
	t.Lock()
 | 
			
		||||
	defer t.Unlock()
 | 
			
		||||
	return maps.Clone(t.trackedAnnotations)
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user