mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Merge pull request #8442 from mbforbes/e2ePodsReady
Ensure pods both running and ready before starting e2e tests
This commit is contained in:
		@@ -21,7 +21,7 @@ set -o pipefail
 | 
			
		||||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
 | 
			
		||||
 | 
			
		||||
: ${KUBE_VERSION_ROOT:=${KUBE_ROOT}}
 | 
			
		||||
: ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"}
 | 
			
		||||
: ${KUBECTL:=${KUBE_VERSION_ROOT}/cluster/kubectl.sh}
 | 
			
		||||
: ${KUBE_CONFIG_FILE:="config-test.sh"}
 | 
			
		||||
 | 
			
		||||
export KUBECTL KUBE_CONFIG_FILE
 | 
			
		||||
 
 | 
			
		||||
@@ -21,7 +21,7 @@ set -o pipefail
 | 
			
		||||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
 | 
			
		||||
 | 
			
		||||
: ${KUBE_VERSION_ROOT:=${KUBE_ROOT}}
 | 
			
		||||
: ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"}
 | 
			
		||||
: ${KUBECTL:=${KUBE_VERSION_ROOT}/cluster/kubectl.sh}
 | 
			
		||||
: ${KUBE_CONFIG_FILE:="config-test.sh"}
 | 
			
		||||
 | 
			
		||||
export KUBECTL KUBE_CONFIG_FILE
 | 
			
		||||
 
 | 
			
		||||
@@ -21,7 +21,7 @@ set -o pipefail
 | 
			
		||||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
 | 
			
		||||
 | 
			
		||||
: ${KUBE_VERSION_ROOT:=${KUBE_ROOT}}
 | 
			
		||||
: ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"}
 | 
			
		||||
: ${KUBECTL:=${KUBE_VERSION_ROOT}/cluster/kubectl.sh}
 | 
			
		||||
: ${KUBE_CONFIG_FILE:="config-test.sh"}
 | 
			
		||||
 | 
			
		||||
export KUBECTL KUBE_CONFIG_FILE
 | 
			
		||||
 
 | 
			
		||||
@@ -21,7 +21,7 @@ set -o pipefail
 | 
			
		||||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
 | 
			
		||||
 | 
			
		||||
: ${KUBE_VERSION_ROOT:=${KUBE_ROOT}}
 | 
			
		||||
: ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"}
 | 
			
		||||
: ${KUBECTL:=${KUBE_VERSION_ROOT}/cluster/kubectl.sh}
 | 
			
		||||
: ${KUBE_CONFIG_FILE:="config-test.sh"}
 | 
			
		||||
 | 
			
		||||
export KUBECTL KUBE_CONFIG_FILE
 | 
			
		||||
 
 | 
			
		||||
@@ -14,8 +14,6 @@
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
# e2e-status checks that the status of a cluster is acceptable for running
 | 
			
		||||
# e2e tests.
 | 
			
		||||
set -o errexit
 | 
			
		||||
set -o nounset
 | 
			
		||||
set -o pipefail
 | 
			
		||||
@@ -23,7 +21,7 @@ set -o pipefail
 | 
			
		||||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
 | 
			
		||||
 | 
			
		||||
: ${KUBE_VERSION_ROOT:=${KUBE_ROOT}}
 | 
			
		||||
: ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"}
 | 
			
		||||
: ${KUBECTL:=${KUBE_VERSION_ROOT}/cluster/kubectl.sh}
 | 
			
		||||
: ${KUBE_CONFIG_FILE:="config-test.sh"}
 | 
			
		||||
 | 
			
		||||
export KUBECTL KUBE_CONFIG_FILE
 | 
			
		||||
@@ -34,49 +32,3 @@ source "${KUBE_VERSION_ROOT}/cluster/${KUBERNETES_PROVIDER}/util.sh"
 | 
			
		||||
prepare-e2e
 | 
			
		||||
 | 
			
		||||
${KUBECTL} version
 | 
			
		||||
 | 
			
		||||
# Before running tests, ensure that all pods are 'Running'. Tests can timeout
 | 
			
		||||
# and fail because the test pods don't run in time. The problem is that the pods
 | 
			
		||||
# that a cluster runs on startup take too long to start running, with sequential
 | 
			
		||||
# Docker pulls of large images being the culprit. These startup pods block the
 | 
			
		||||
# test pods from running.
 | 
			
		||||
 | 
			
		||||
# Settings:
 | 
			
		||||
# timeout is in seconds; 1200 = 20 minutes.
 | 
			
		||||
timeout=1200
 | 
			
		||||
# pause is how many seconds to sleep between pod get calls.
 | 
			
		||||
pause=5
 | 
			
		||||
# min_pods is the minimum number of pods we require.
 | 
			
		||||
min_pods=1
 | 
			
		||||
 | 
			
		||||
# Check pod statuses.
 | 
			
		||||
deadline=$(($(date '+%s')+${timeout}))
 | 
			
		||||
echo "Waiting at most ${timeout} seconds for all pods to be 'Running'" >&2
 | 
			
		||||
all_running=0
 | 
			
		||||
until [[ ${all_running} == 1 ]]; do
 | 
			
		||||
  if [[ "$(date '+%s')" -ge "${deadline}" ]]; then
 | 
			
		||||
    echo "All pods never 'Running' in time." >&2
 | 
			
		||||
    exit 1
 | 
			
		||||
  fi
 | 
			
		||||
  statuses=($(${KUBECTL} get pods --template='{{range.items}}{{.status.phase}} {{end}}' --api-version=v1beta3))
 | 
			
		||||
 | 
			
		||||
  # Ensure that we have enough pods.
 | 
			
		||||
  echo "Found ${#statuses[@]} pods with statuses: ${statuses[@]}" >&2
 | 
			
		||||
  if [[ ${#statuses[@]} -lt ${min_pods} ]]; then
 | 
			
		||||
    continue
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  # Then, ensure all pods found are 'Running'.
 | 
			
		||||
  found_running=1
 | 
			
		||||
  for status in "${statuses[@]}"; do
 | 
			
		||||
    if [[ "${status}" != "Running" ]]; then
 | 
			
		||||
      # If we find a pod that isn't 'Running', sleep here to avoid delaying
 | 
			
		||||
      # other code paths (where all pods are 'Running').
 | 
			
		||||
      found_running=0
 | 
			
		||||
      sleep ${pause}
 | 
			
		||||
      break
 | 
			
		||||
    fi
 | 
			
		||||
  done
 | 
			
		||||
  all_running=${found_running}
 | 
			
		||||
done
 | 
			
		||||
echo "All pods are 'Running'" >&2
 | 
			
		||||
 
 | 
			
		||||
@@ -21,7 +21,7 @@ set -o pipefail
 | 
			
		||||
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
 | 
			
		||||
 | 
			
		||||
: ${KUBE_VERSION_ROOT:=${KUBE_ROOT}}
 | 
			
		||||
: ${KUBECTL:="${KUBE_VERSION_ROOT}/cluster/kubectl.sh"}
 | 
			
		||||
: ${KUBECTL:=${KUBE_VERSION_ROOT}/cluster/kubectl.sh}
 | 
			
		||||
: ${KUBE_CONFIG_FILE:="config-test.sh"}
 | 
			
		||||
 | 
			
		||||
export KUBECTL KUBE_CONFIG_FILE
 | 
			
		||||
 
 | 
			
		||||
@@ -22,7 +22,9 @@ import (
 | 
			
		||||
	"path"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"testing"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/client/clientcmd"
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 | 
			
		||||
@@ -33,6 +35,28 @@ import (
 | 
			
		||||
	"github.com/onsi/gomega"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	// podStartupTimeout is the time to allow all pods in the cluster to become
 | 
			
		||||
	// running and ready before any e2e tests run. It includes pulling all of
 | 
			
		||||
	// the pods (as of 5/18/15 this is 8 pods).
 | 
			
		||||
	podStartupTimeout = 10 * time.Minute
 | 
			
		||||
 | 
			
		||||
	// minStartupPods is the minimum number of pods that will allow
 | 
			
		||||
	// wiatForPodsRunningReady(...) to succeed. More verbosely, that function
 | 
			
		||||
	// checks that all pods in the cluster are both in a phase of "running" and
 | 
			
		||||
	// have a condition of "ready": "true". It aims to ensure that the cluster's
 | 
			
		||||
	// pods are fully healthy before beginning e2e tests. However, if there were
 | 
			
		||||
	// only 0 pods, it would technically pass if there wasn't a required minimum
 | 
			
		||||
	// number of pods. We expect every cluster to come up with some number of
 | 
			
		||||
	// pods (which in practice is more than this number), so we have this
 | 
			
		||||
	// minimum here as a sanity check to make sure that there are actually pods
 | 
			
		||||
	// on the cluster (i.e. preventing a possible race with kube-addons). This
 | 
			
		||||
	// does *not* mean that the function will succeed as soon as minStartupPods
 | 
			
		||||
	// are found to be running and ready; it ensures that *all* pods it finds
 | 
			
		||||
	// are running and ready. This is the minimum number it must find.
 | 
			
		||||
	minStartupPods = 1
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	cloudConfig = &testContext.CloudConfig
 | 
			
		||||
 | 
			
		||||
@@ -92,6 +116,15 @@ func TestE2E(t *testing.T) {
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	gomega.RegisterFailHandler(ginkgo.Fail)
 | 
			
		||||
 | 
			
		||||
	// Ensure all pods are running and ready before starting tests (otherwise,
 | 
			
		||||
	// cluster infrastructure pods that are being pulled or started can block
 | 
			
		||||
	// test pods from running, and tests that ensure all pods are running and
 | 
			
		||||
	// ready will fail).
 | 
			
		||||
	if err := waitForPodsRunningReady(api.NamespaceDefault, minStartupPods, podStartupTimeout); err != nil {
 | 
			
		||||
		glog.Fatalf("Error waiting for all pods to be running and ready: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Run tests through the Ginkgo runner with output to console + JUnit for Jenkins
 | 
			
		||||
	var r []ginkgo.Reporter
 | 
			
		||||
	if *reportDir != "" {
 | 
			
		||||
 
 | 
			
		||||
@@ -190,25 +190,6 @@ func rebootNode(c *client.Client, provider, name string, result chan bool) {
 | 
			
		||||
	result <- true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// podRunningReady is the checker function passed to waitForPodCondition(...)
 | 
			
		||||
// (found in util.go). It ensures that the pods' phase is running and that the
 | 
			
		||||
// ready condition is true.
 | 
			
		||||
func podRunningReady(p *api.Pod) (bool, error) {
 | 
			
		||||
	// Check the phase is running.
 | 
			
		||||
	if p.Status.Phase != api.PodRunning {
 | 
			
		||||
		return false, fmt.Errorf("want pod %s on %s to be %v but was %v",
 | 
			
		||||
			p.ObjectMeta.Name, p.Spec.Host, api.PodRunning, p.Status.Phase)
 | 
			
		||||
	}
 | 
			
		||||
	// Check the ready condition is true.
 | 
			
		||||
	for _, cond := range p.Status.Conditions {
 | 
			
		||||
		if cond.Type == api.PodReady && cond.Status == api.ConditionTrue {
 | 
			
		||||
			return true, nil
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return false, fmt.Errorf("pod %s on %s didn't have condition %v, %v; conditions: %v",
 | 
			
		||||
		p.ObjectMeta.Name, p.Spec.Host, api.PodReady, api.ConditionTrue, p.Status.Conditions)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// checkPodsRunning returns whether all pods whose names are listed in podNames
 | 
			
		||||
// are running.
 | 
			
		||||
func checkPodsRunning(c *client.Client, podNames []string, timeout time.Duration) bool {
 | 
			
		||||
 
 | 
			
		||||
@@ -105,6 +105,83 @@ func providerIs(providers ...string) bool {
 | 
			
		||||
 | 
			
		||||
type podCondition func(pod *api.Pod) (bool, error)
 | 
			
		||||
 | 
			
		||||
// podReady returns whether pod has a condition of Ready with a status of true.
 | 
			
		||||
func podReady(pod *api.Pod) bool {
 | 
			
		||||
	for _, cond := range pod.Status.Conditions {
 | 
			
		||||
		if cond.Type == api.PodReady && cond.Status == api.ConditionTrue {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// logPodStates logs all pod states for debugging.
 | 
			
		||||
func logPodStates(c *client.Client, ns string) {
 | 
			
		||||
	podList, err := c.Pods(ns).List(labels.Everything(), fields.Everything())
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		Logf("Error getting pods for logPodStates(...): %v", err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	Logf("Phase and conditions for all pods in namespace '%s':", ns)
 | 
			
		||||
	for _, pod := range podList.Items {
 | 
			
		||||
		Logf("- pod '%s' on '%s' has phase '%v' and conditions %v",
 | 
			
		||||
			pod.ObjectMeta.Name, pod.Spec.Host, pod.Status.Phase, pod.Status.Conditions)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// podRunningReady checks whether pod p's phase is running and it has a ready
 | 
			
		||||
// condition of status true.
 | 
			
		||||
func podRunningReady(p *api.Pod) (bool, error) {
 | 
			
		||||
	// Check the phase is running.
 | 
			
		||||
	if p.Status.Phase != api.PodRunning {
 | 
			
		||||
		return false, fmt.Errorf("want pod '%s' on '%s' to be '%v' but was '%v'",
 | 
			
		||||
			p.ObjectMeta.Name, p.Spec.Host, api.PodRunning, p.Status.Phase)
 | 
			
		||||
	}
 | 
			
		||||
	// Check the ready condition is true.
 | 
			
		||||
	if !podReady(p) {
 | 
			
		||||
		return false, fmt.Errorf("pod '%s' on '%s' didn't have condition {%v %v}; conditions: %v",
 | 
			
		||||
			p.ObjectMeta.Name, p.Spec.Host, api.PodReady, api.ConditionTrue, p.Status.Conditions)
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
	return true, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// waitForPodsRunningReady waits up to timeout to ensure that all pods in
 | 
			
		||||
// namespace ns are running and ready, requiring that it finds at least minPods.
 | 
			
		||||
// It has separate behavior from other 'wait for' pods functions in that it re-
 | 
			
		||||
// queries the list of pods on every iteration. This is useful, for example, in
 | 
			
		||||
// cluster startup, because the number of pods increases while waiting.
 | 
			
		||||
func waitForPodsRunningReady(ns string, minPods int, timeout time.Duration) error {
 | 
			
		||||
	c, err := loadClient()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",
 | 
			
		||||
		timeout, minPods, ns)
 | 
			
		||||
	for start := time.Now(); time.Since(start) < timeout; time.Sleep(podPoll) {
 | 
			
		||||
		// We get the new list of pods in every iteration beause more pods come
 | 
			
		||||
		// online during startup and we want to ensure they are also checked.
 | 
			
		||||
		podList, err := c.Pods(ns).List(labels.Everything(), fields.Everything())
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			Logf("Error getting pods in namespace '%s': %v", ns, err)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		nOk := 0
 | 
			
		||||
		for _, pod := range podList.Items {
 | 
			
		||||
			if res, err := podRunningReady(&pod); res && err == nil {
 | 
			
		||||
				nOk++
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		Logf("%d / %d pods in namespace '%s' are running and ready (%v elapsed)",
 | 
			
		||||
			nOk, len(podList.Items), ns, time.Since(start))
 | 
			
		||||
		if nOk == len(podList.Items) && nOk >= minPods {
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	logPodStates(c, ns)
 | 
			
		||||
	return fmt.Errorf("Not all pods in namespace '%s' running and ready within %v", ns, timeout)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func waitForPodCondition(c *client.Client, ns, podName, desc string, poll, timeout time.Duration, condition podCondition) error {
 | 
			
		||||
	Logf("Waiting up to %v for pod %s status to be %s", timeout, podName, desc)
 | 
			
		||||
	for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) {
 | 
			
		||||
@@ -117,9 +194,10 @@ func waitForPodCondition(c *client.Client, ns, podName, desc string, poll, timeo
 | 
			
		||||
		if done {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		Logf("Waiting for pod %s in namespace %s status to be %q (found %q) (%v)", podName, ns, desc, pod.Status.Phase, time.Since(start))
 | 
			
		||||
		Logf("Waiting for pod '%s' in namespace '%s' status to be '%q' (found phase: '%q', readiness: %t) (%v)",
 | 
			
		||||
			podName, ns, desc, pod.Status.Phase, podReady(pod), time.Since(start))
 | 
			
		||||
	}
 | 
			
		||||
	return fmt.Errorf("gave up waiting for pod %s to be %s after %v", podName, desc, timeout)
 | 
			
		||||
	return fmt.Errorf("gave up waiting for pod '%s' to be '%s' after %v", podName, desc, timeout)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// createNS should be used by every test, note that we append a common prefix to the provided test name.
 | 
			
		||||
@@ -149,7 +227,7 @@ func waitForPodRunning(c *client.Client, podName string) error {
 | 
			
		||||
func waitForPodNotPending(c *client.Client, ns, podName string) error {
 | 
			
		||||
	return waitForPodCondition(c, ns, podName, "!pending", podPoll, podStartTimeout, func(pod *api.Pod) (bool, error) {
 | 
			
		||||
		if pod.Status.Phase != api.PodPending {
 | 
			
		||||
			Logf("Saw pod %s in namespace %s out of pending state (found %q)", podName, ns, pod.Status.Phase)
 | 
			
		||||
			Logf("Saw pod '%s' in namespace '%s' out of pending state (found '%q')", podName, ns, pod.Status.Phase)
 | 
			
		||||
			return true, nil
 | 
			
		||||
		}
 | 
			
		||||
		return false, nil
 | 
			
		||||
@@ -162,17 +240,17 @@ func waitForPodSuccessInNamespace(c *client.Client, podName string, contName str
 | 
			
		||||
		// Cannot use pod.Status.Phase == api.PodSucceeded/api.PodFailed due to #2632
 | 
			
		||||
		ci, ok := api.GetContainerStatus(pod.Status.ContainerStatuses, contName)
 | 
			
		||||
		if !ok {
 | 
			
		||||
			Logf("No Status.Info for container %s in pod %s yet", contName, podName)
 | 
			
		||||
			Logf("No Status.Info for container '%s' in pod '%s' yet", contName, podName)
 | 
			
		||||
		} else {
 | 
			
		||||
			if ci.State.Termination != nil {
 | 
			
		||||
				if ci.State.Termination.ExitCode == 0 {
 | 
			
		||||
					By("Saw pod success")
 | 
			
		||||
					return true, nil
 | 
			
		||||
				} else {
 | 
			
		||||
					return true, fmt.Errorf("pod %s terminated with failure: %+v", podName, ci.State.Termination)
 | 
			
		||||
					return true, fmt.Errorf("pod '%s' terminated with failure: %+v", podName, ci.State.Termination)
 | 
			
		||||
				}
 | 
			
		||||
			} else {
 | 
			
		||||
				Logf("Nil State.Termination for container %s in pod %s in namespace %s so far", contName, podName, namespace)
 | 
			
		||||
				Logf("Nil State.Termination for container '%s' in pod '%s' in namespace '%s' so far", contName, podName, namespace)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		return false, nil
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user