mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Add timeouts to HealthChecks and retry checks
Fixes issue #3532. Added timeouts for HTTP and TCP checks and enabled kubelet/probe to kubelet#maxRetries times before declaring Failure. Added Probe.TimeoutSecs to API Probe variants now check container.LivenessProbe.TimeoutSeconds Also added a test for timeouts in http_test.go.
This commit is contained in:
		@@ -279,6 +279,8 @@ type Probe struct {
 | 
			
		||||
	Handler `json:",inline"`
 | 
			
		||||
	// Length of time before health checking is activated.  In seconds.
 | 
			
		||||
	InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"`
 | 
			
		||||
	// Length of time before health checking times out.  In seconds.
 | 
			
		||||
	TimeoutSeconds int64 `json:"timeoutSeconds,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// PullPolicy describes a policy for if/when to pull a container image
 | 
			
		||||
 
 | 
			
		||||
@@ -911,6 +911,7 @@ func init() {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
			out.InitialDelaySeconds = in.InitialDelaySeconds
 | 
			
		||||
			out.TimeoutSeconds = in.TimeoutSeconds
 | 
			
		||||
			return nil
 | 
			
		||||
		},
 | 
			
		||||
		func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error {
 | 
			
		||||
@@ -924,6 +925,7 @@ func init() {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
			out.InitialDelaySeconds = in.InitialDelaySeconds
 | 
			
		||||
			out.TimeoutSeconds = in.TimeoutSeconds
 | 
			
		||||
			return nil
 | 
			
		||||
		},
 | 
			
		||||
	)
 | 
			
		||||
 
 | 
			
		||||
@@ -227,6 +227,8 @@ type LivenessProbe struct {
 | 
			
		||||
	Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"`
 | 
			
		||||
	// Length of time before health checking is activated.  In seconds.
 | 
			
		||||
	InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"`
 | 
			
		||||
	// Length of time before health checking times out.  In seconds.
 | 
			
		||||
	TimeoutSeconds int64 `json:"timeoutSeconds,omitempty" description:"number of seconds after which liveness probes timeout; defaults to 1 second"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// PullPolicy describes a policy for if/when to pull a container image
 | 
			
		||||
 
 | 
			
		||||
@@ -824,6 +824,7 @@ func init() {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
			out.InitialDelaySeconds = in.InitialDelaySeconds
 | 
			
		||||
			out.TimeoutSeconds = in.TimeoutSeconds
 | 
			
		||||
			return nil
 | 
			
		||||
		},
 | 
			
		||||
		func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error {
 | 
			
		||||
@@ -837,6 +838,7 @@ func init() {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
			out.InitialDelaySeconds = in.InitialDelaySeconds
 | 
			
		||||
			out.TimeoutSeconds = in.TimeoutSeconds
 | 
			
		||||
			return nil
 | 
			
		||||
		},
 | 
			
		||||
	)
 | 
			
		||||
 
 | 
			
		||||
@@ -191,6 +191,8 @@ type LivenessProbe struct {
 | 
			
		||||
	Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"`
 | 
			
		||||
	// Length of time before health checking is activated.  In seconds.
 | 
			
		||||
	InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"`
 | 
			
		||||
	// Length of time before health checking times out.  In seconds.
 | 
			
		||||
	TimeoutSeconds int64 `json:"timeoutSeconds,omitempty" description:"number of seconds after which liveness probes timeout; defaults to 1 second"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// PullPolicy describes a policy for if/when to pull a container image
 | 
			
		||||
 
 | 
			
		||||
@@ -297,6 +297,8 @@ type Probe struct {
 | 
			
		||||
	Handler `json:",inline"`
 | 
			
		||||
	// Length of time before health checking is activated.  In seconds.
 | 
			
		||||
	InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"`
 | 
			
		||||
	// Length of time before health checking times out.  In seconds.
 | 
			
		||||
	TimeoutSeconds int64 `json:"timeoutSeconds,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// PullPolicy describes a policy for if/when to pull a container image
 | 
			
		||||
 
 | 
			
		||||
@@ -55,6 +55,7 @@ const defaultChanSize = 1024
 | 
			
		||||
const minShares = 2
 | 
			
		||||
const sharesPerCPU = 1024
 | 
			
		||||
const milliCPUToCPU = 1000
 | 
			
		||||
const maxRetries int = 3
 | 
			
		||||
 | 
			
		||||
// SyncHandler is an interface implemented by Kubelet, for testability
 | 
			
		||||
type SyncHandler interface {
 | 
			
		||||
@@ -1417,7 +1418,7 @@ func (kl *Kubelet) GetPodStatus(podFullName string, uid types.UID) (api.PodStatu
 | 
			
		||||
	return podStatus, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Status, error) {
 | 
			
		||||
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (healthStatus probe.Status, err error) {
 | 
			
		||||
	// Give the container 60 seconds to start up.
 | 
			
		||||
	if container.LivenessProbe == nil {
 | 
			
		||||
		return probe.Success, nil
 | 
			
		||||
@@ -1425,7 +1426,13 @@ func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status ap
 | 
			
		||||
	if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds {
 | 
			
		||||
		return probe.Success, nil
 | 
			
		||||
	}
 | 
			
		||||
	return kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
 | 
			
		||||
	for i := 0; i < maxRetries; i++ {
 | 
			
		||||
		healthStatus, err = kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
 | 
			
		||||
		if healthStatus == probe.Success {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return healthStatus, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Returns logs of current machine.
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,7 @@ package kubelet
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
 | 
			
		||||
@@ -39,6 +40,14 @@ var (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Status, error) {
 | 
			
		||||
	var timeout time.Duration
 | 
			
		||||
	secs := container.LivenessProbe.TimeoutSeconds
 | 
			
		||||
	if secs > 0 {
 | 
			
		||||
		timeout = time.Duration(secs) * time.Second
 | 
			
		||||
	} else {
 | 
			
		||||
		timeout = 1 * time.Second
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if p.Exec != nil {
 | 
			
		||||
		return execprober.Probe(kl.newExecInContainer(podFullName, podUID, container))
 | 
			
		||||
	}
 | 
			
		||||
@@ -47,14 +56,15 @@ func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return probe.Unknown, err
 | 
			
		||||
		}
 | 
			
		||||
		return httprober.Probe(extractGetParams(p.HTTPGet, status, port))
 | 
			
		||||
		host, port, path := extractGetParams(p.HTTPGet, status, port)
 | 
			
		||||
		return httprober.Probe(host, port, path, timeout)
 | 
			
		||||
	}
 | 
			
		||||
	if p.TCPSocket != nil {
 | 
			
		||||
		port, err := extractPort(p.TCPSocket.Port, container)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return probe.Unknown, err
 | 
			
		||||
		}
 | 
			
		||||
		return tcprober.Probe(status.PodIP, port)
 | 
			
		||||
		return tcprober.Probe(status.PodIP, port, timeout)
 | 
			
		||||
	}
 | 
			
		||||
	glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe)
 | 
			
		||||
	return probe.Unknown, nil
 | 
			
		||||
 
 | 
			
		||||
@@ -21,6 +21,7 @@ import (
 | 
			
		||||
	"net/http"
 | 
			
		||||
	"net/url"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
 | 
			
		||||
 | 
			
		||||
@@ -28,16 +29,17 @@ import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func New() HTTPProber {
 | 
			
		||||
	return HTTPProber{&http.Client{}}
 | 
			
		||||
	transport := &http.Transport{}
 | 
			
		||||
	return HTTPProber{transport}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type HTTPProber struct {
 | 
			
		||||
	client HTTPGetInterface
 | 
			
		||||
	transport *http.Transport
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Probe returns a ProbeRunner capable of running an http check.
 | 
			
		||||
func (pr *HTTPProber) Probe(host string, port int, path string) (probe.Status, error) {
 | 
			
		||||
	return DoHTTPProbe(formatURL(host, port, path), pr.client)
 | 
			
		||||
func (pr *HTTPProber) Probe(host string, port int, path string, timeout time.Duration) (probe.Status, error) {
 | 
			
		||||
	return DoHTTPProbe(formatURL(host, port, path), &http.Client{Timeout: timeout, Transport: pr.transport})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type HTTPGetInterface interface {
 | 
			
		||||
 
 | 
			
		||||
@@ -23,6 +23,7 @@ import (
 | 
			
		||||
	"net/url"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"testing"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
 | 
			
		||||
)
 | 
			
		||||
@@ -46,20 +47,25 @@ func TestFormatURL(t *testing.T) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestHTTPProbeChecker(t *testing.T) {
 | 
			
		||||
	handleReq := func(s int) func(w http.ResponseWriter) {
 | 
			
		||||
		return func(w http.ResponseWriter) { w.WriteHeader(s) }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	prober := New()
 | 
			
		||||
	testCases := []struct {
 | 
			
		||||
		status int
 | 
			
		||||
		handler func(w http.ResponseWriter)
 | 
			
		||||
		health  probe.Status
 | 
			
		||||
	}{
 | 
			
		||||
		// The probe will be filled in below.  This is primarily testing that an HTTP GET happens.
 | 
			
		||||
		{http.StatusOK, probe.Success},
 | 
			
		||||
		{-1, probe.Failure},
 | 
			
		||||
		{handleReq(http.StatusOK), probe.Success},
 | 
			
		||||
		{handleReq(-1), probe.Failure},
 | 
			
		||||
		{func(w http.ResponseWriter) { time.Sleep(3 * time.Second) }, probe.Failure},
 | 
			
		||||
	}
 | 
			
		||||
	for _, test := range testCases {
 | 
			
		||||
		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 | 
			
		||||
			w.WriteHeader(test.status)
 | 
			
		||||
		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 | 
			
		||||
			test.handler(w)
 | 
			
		||||
		}))
 | 
			
		||||
		u, err := url.Parse(ts.URL)
 | 
			
		||||
		u, err := url.Parse(server.URL)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			t.Errorf("Unexpected error: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
@@ -71,7 +77,7 @@ func TestHTTPProbeChecker(t *testing.T) {
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			t.Errorf("Unexpected error: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
		health, err := prober.Probe(host, p, "")
 | 
			
		||||
		health, err := prober.Probe(host, p, "", 1*time.Second)
 | 
			
		||||
		if test.health == probe.Unknown && err == nil {
 | 
			
		||||
			t.Errorf("Expected error")
 | 
			
		||||
		}
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,7 @@ package tcp
 | 
			
		||||
import (
 | 
			
		||||
	"net"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
 | 
			
		||||
 | 
			
		||||
@@ -31,16 +32,16 @@ func New() TCPProber {
 | 
			
		||||
 | 
			
		||||
type TCPProber struct{}
 | 
			
		||||
 | 
			
		||||
func (pr TCPProber) Probe(host string, port int) (probe.Status, error) {
 | 
			
		||||
	return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port)))
 | 
			
		||||
func (pr TCPProber) Probe(host string, port int, timeout time.Duration) (probe.Status, error) {
 | 
			
		||||
	return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port)), timeout)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// DoTCPProbe checks that a TCP socket to the address can be opened.
 | 
			
		||||
// If the socket can be opened, it returns Success
 | 
			
		||||
// If the socket fails to open, it returns Failure.
 | 
			
		||||
// This is exported because some other packages may want to do direct TCP probes.
 | 
			
		||||
func DoTCPProbe(addr string) (probe.Status, error) {
 | 
			
		||||
	conn, err := net.Dial("tcp", addr)
 | 
			
		||||
func DoTCPProbe(addr string, timeout time.Duration) (probe.Status, error) {
 | 
			
		||||
	conn, err := net.DialTimeout("tcp", addr, timeout)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return probe.Failure, nil
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -23,6 +23,7 @@ import (
 | 
			
		||||
	"net/url"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"testing"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
 | 
			
		||||
)
 | 
			
		||||
@@ -59,7 +60,7 @@ func TestTcpHealthChecker(t *testing.T) {
 | 
			
		||||
		if !test.usePort {
 | 
			
		||||
			p = -1
 | 
			
		||||
		}
 | 
			
		||||
		status, err := prober.Probe(host, p)
 | 
			
		||||
		status, err := prober.Probe(host, p, 1*time.Second)
 | 
			
		||||
		if status != test.expectedStatus {
 | 
			
		||||
			t.Errorf("expected: %v, got: %v", test.expectedStatus, status)
 | 
			
		||||
		}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user