mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Allow for not-ready pods in large clusters
This commit is contained in:
		@@ -137,7 +137,11 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
 | 
			
		||||
	// test pods from running, and tests that ensure all pods are running and
 | 
			
		||||
	// ready will fail).
 | 
			
		||||
	podStartupTimeout := framework.TestContext.SystemPodsStartupTimeout
 | 
			
		||||
	if err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), podStartupTimeout, framework.ImagePullerLabels, true); err != nil {
 | 
			
		||||
	// TODO: In large clusters, we often observe a non-starting pods due to
 | 
			
		||||
	// #41007. To avoid those pods preventing the whole test runs (and just
 | 
			
		||||
	// wasting the whole run), we allow for some not-ready pods (with the
 | 
			
		||||
	// number equal to the number of allowed not-ready nodes).
 | 
			
		||||
	if err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), podStartupTimeout, framework.ImagePullerLabels, true); err != nil {
 | 
			
		||||
		framework.DumpAllNamespaceInfo(c, metav1.NamespaceSystem)
 | 
			
		||||
		framework.LogFailedContainers(c, metav1.NamespaceSystem, framework.Logf)
 | 
			
		||||
		runKubernetesServiceTestContainer(c, metav1.NamespaceDefault)
 | 
			
		||||
 
 | 
			
		||||
@@ -495,8 +495,7 @@ func WaitForPodsSuccess(c clientset.Interface, ns string, successPodLabels map[s
 | 
			
		||||
// and some in Success. This is to allow the client to decide if "Success"
 | 
			
		||||
// means "Ready" or not.
 | 
			
		||||
// If skipSucceeded is true, any pods that are Succeeded are not counted.
 | 
			
		||||
func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods int32, timeout time.Duration, ignoreLabels map[string]string, skipSucceeded bool) error {
 | 
			
		||||
 | 
			
		||||
func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration, ignoreLabels map[string]string, skipSucceeded bool) error {
 | 
			
		||||
	ignoreSelector := labels.SelectorFromSet(ignoreLabels)
 | 
			
		||||
	start := time.Now()
 | 
			
		||||
	Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",
 | 
			
		||||
@@ -504,6 +503,7 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods int32, ti
 | 
			
		||||
	wg := sync.WaitGroup{}
 | 
			
		||||
	wg.Add(1)
 | 
			
		||||
	var waitForSuccessError error
 | 
			
		||||
	var ignoreNotReady bool
 | 
			
		||||
	badPods := []v1.Pod{}
 | 
			
		||||
	desiredPods := 0
 | 
			
		||||
	go func() {
 | 
			
		||||
@@ -544,6 +544,7 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods int32, ti
 | 
			
		||||
			return false, nil
 | 
			
		||||
		}
 | 
			
		||||
		nOk := int32(0)
 | 
			
		||||
		notReady := int32(0)
 | 
			
		||||
		badPods = []v1.Pod{}
 | 
			
		||||
		desiredPods = len(podList.Items)
 | 
			
		||||
		for _, pod := range podList.Items {
 | 
			
		||||
@@ -564,6 +565,7 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods int32, ti
 | 
			
		||||
				return false, errors.New("unexpected Succeeded pod state")
 | 
			
		||||
			case pod.Status.Phase != v1.PodFailed:
 | 
			
		||||
				Logf("The status of Pod %s is %s (Ready = false), waiting for it to be either Running (with Ready = true) or Failed", pod.ObjectMeta.Name, pod.Status.Phase)
 | 
			
		||||
				notReady++
 | 
			
		||||
				badPods = append(badPods, pod)
 | 
			
		||||
			default:
 | 
			
		||||
				if _, ok := pod.Annotations[v1.CreatedByAnnotation]; !ok {
 | 
			
		||||
@@ -581,11 +583,15 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods int32, ti
 | 
			
		||||
		if replicaOk == replicas && nOk >= minPods && len(badPods) == 0 {
 | 
			
		||||
			return true, nil
 | 
			
		||||
		}
 | 
			
		||||
		ignoreNotReady = (notReady <= allowedNotReadyPods)
 | 
			
		||||
		logPodStates(badPods)
 | 
			
		||||
		return false, nil
 | 
			
		||||
	}) != nil {
 | 
			
		||||
		if !ignoreNotReady {
 | 
			
		||||
			return errors.New(errorBadPodsStates(badPods, desiredPods, ns, "RUNNING and READY", timeout))
 | 
			
		||||
		}
 | 
			
		||||
		Logf("Number of not-ready pods is allowed.")
 | 
			
		||||
	}
 | 
			
		||||
	wg.Wait()
 | 
			
		||||
	if waitForSuccessError != nil {
 | 
			
		||||
		return waitForSuccessError
 | 
			
		||||
 
 | 
			
		||||
@@ -68,7 +68,7 @@ var _ = framework.KubeDescribe("Mesos", func() {
 | 
			
		||||
		nodelist := framework.GetReadySchedulableNodesOrDie(client)
 | 
			
		||||
		const ns = "static-pods"
 | 
			
		||||
		numpods := int32(len(nodelist.Items))
 | 
			
		||||
		framework.ExpectNoError(framework.WaitForPodsRunningReady(client, ns, numpods, wait.ForeverTestTimeout, map[string]string{}, false),
 | 
			
		||||
		framework.ExpectNoError(framework.WaitForPodsRunningReady(client, ns, numpods, 0, wait.ForeverTestTimeout, map[string]string{}, false),
 | 
			
		||||
			fmt.Sprintf("number of static pods in namespace %s is %d", ns, numpods))
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -237,7 +237,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
 | 
			
		||||
			// Many e2e tests assume that the cluster is fully healthy before they start.  Wait until
 | 
			
		||||
			// the cluster is restored to health.
 | 
			
		||||
			By("waiting for system pods to successfully restart")
 | 
			
		||||
			err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout, ignoreLabels, true)
 | 
			
		||||
			err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout, ignoreLabels, true)
 | 
			
		||||
			Expect(err).NotTo(HaveOccurred())
 | 
			
		||||
			By("waiting for image prepulling pods to complete")
 | 
			
		||||
			framework.WaitForPodsSuccess(c, metav1.NamespaceSystem, framework.ImagePullerLabels, imagePrePullingTimeout)
 | 
			
		||||
 
 | 
			
		||||
@@ -90,7 +90,7 @@ var _ = framework.KubeDescribe("SchedulerPredicates [Serial]", func() {
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		err = framework.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), framework.PodReadyBeforeTimeout, ignoreLabels, true)
 | 
			
		||||
		err = framework.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, ignoreLabels, true)
 | 
			
		||||
		Expect(err).NotTo(HaveOccurred())
 | 
			
		||||
 | 
			
		||||
		for _, node := range nodeList.Items {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user