Refactor liveness probing

This commit builds on previous work and creates an independent
worker for every liveness probe. Liveness probes behave largely the same
as readiness probes, so much of the code is shared by introducing a
probeType paramater to distinguish the type when it matters. The
circular dependency between the runtime and the prober is broken by
exposing a shared liveness ResultsManager, owned by the
kubelet. Finally, an Updates channel is introduced to the ResultsManager
so the kubelet can react to unhealthy containers immediately.
This commit is contained in:
Tim St. Clair
2015-10-19 15:15:59 -07:00
parent 0d7b53a201
commit a263c77b65
16 changed files with 510 additions and 396 deletions

View File

@@ -17,14 +17,19 @@ limitations under the License.
package prober
import (
"fmt"
"testing"
"time"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/client/record"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/probe"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/exec"
"k8s.io/kubernetes/pkg/util/wait"
)
const (
@@ -52,12 +57,11 @@ func TestDoProbe(t *testing.T) {
failedStatus.Phase = api.PodFailed
tests := []struct {
probe api.Probe
podStatus *api.PodStatus
expectContinue bool
expectReadySet bool
expectedReadiness results.Result
probe api.Probe
podStatus *api.PodStatus
expectContinue bool
expectSet bool
expectedResult results.Result
}{
{ // No status.
expectContinue: true,
@@ -72,136 +76,158 @@ func TestDoProbe(t *testing.T) {
{ // Container waiting
podStatus: &pendingStatus,
expectContinue: true,
expectReadySet: true,
expectSet: true,
},
{ // Container terminated
podStatus: &terminatedStatus,
expectReadySet: true,
podStatus: &terminatedStatus,
expectSet: true,
},
{ // Probe successful.
podStatus: &runningStatus,
expectContinue: true,
expectReadySet: true,
expectedReadiness: results.Success,
podStatus: &runningStatus,
expectContinue: true,
expectSet: true,
expectedResult: results.Success,
},
{ // Initial delay passed
podStatus: &runningStatus,
probe: api.Probe{
InitialDelaySeconds: -100,
},
expectContinue: true,
expectReadySet: true,
expectedReadiness: results.Success,
expectContinue: true,
expectSet: true,
expectedResult: results.Success,
},
}
for i, test := range tests {
w := newTestWorker(test.probe)
if test.podStatus != nil {
m.statusManager.SetPodStatus(w.pod, *test.podStatus)
}
if c := doProbe(m, w); c != test.expectContinue {
t.Errorf("[%d] Expected continue to be %v but got %v", i, test.expectContinue, c)
}
ready, ok := m.readinessCache.Get(containerID)
if ok != test.expectReadySet {
t.Errorf("[%d] Expected to have readiness: %v but got %v", i, test.expectReadySet, ok)
}
if ready != test.expectedReadiness {
t.Errorf("[%d] Expected readiness: %v but got %v", i, test.expectedReadiness, ready)
}
for _, probeType := range [...]probeType{liveness, readiness} {
for i, test := range tests {
w := newTestWorker(m, probeType, test.probe)
if test.podStatus != nil {
m.statusManager.SetPodStatus(w.pod, *test.podStatus)
}
if c := w.doProbe(); c != test.expectContinue {
t.Errorf("[%s-%d] Expected continue to be %v but got %v", probeType, i, test.expectContinue, c)
}
result, ok := resultsManager(m, probeType).Get(containerID)
if ok != test.expectSet {
t.Errorf("[%s-%d] Expected to have result: %v but got %v", probeType, i, test.expectSet, ok)
}
if result != test.expectedResult {
t.Errorf("[%s-%d] Expected result: %v but got %v", probeType, i, test.expectedResult, result)
}
// Clean up.
m.statusManager.DeletePodStatus(podUID)
m.readinessCache.Remove(containerID)
// Clean up.
m.statusManager.DeletePodStatus(podUID)
resultsManager(m, probeType).Remove(containerID)
}
}
}
func TestInitialDelay(t *testing.T) {
m := newTestManager()
w := newTestWorker(api.Probe{
InitialDelaySeconds: 10,
})
m.statusManager.SetPodStatus(w.pod, getRunningStatus())
if !doProbe(m, w) {
t.Errorf("Expected to continue, but did not")
}
for _, probeType := range [...]probeType{liveness, readiness} {
w := newTestWorker(m, probeType, api.Probe{
InitialDelaySeconds: 10,
})
m.statusManager.SetPodStatus(w.pod, getRunningStatus())
ready, ok := m.readinessCache.Get(containerID)
if !ok {
t.Errorf("Expected readiness to be false, but was not set")
} else if ready {
t.Errorf("Expected readiness to be false, but was true")
}
if !w.doProbe() {
t.Errorf("[%s] Expected to continue, but did not", probeType)
}
// 100 seconds later...
laterStatus := getRunningStatus()
laterStatus.ContainerStatuses[0].State.Running.StartedAt.Time =
time.Now().Add(-100 * time.Second)
m.statusManager.SetPodStatus(w.pod, laterStatus)
expectedResult := results.Result(probeType == liveness)
result, ok := resultsManager(m, probeType).Get(containerID)
if !ok {
t.Errorf("[%s] Expected result to be set during initial delay, but was not set", probeType)
} else if result != expectedResult {
t.Errorf("[%s] Expected result to be %v during initial delay, but was %v",
probeType, expectedResult, result)
}
// Second call should succeed (already waited).
if !doProbe(m, w) {
t.Errorf("Expected to continue, but did not")
}
// 100 seconds later...
laterStatus := getRunningStatus()
laterStatus.ContainerStatuses[0].State.Running.StartedAt.Time =
time.Now().Add(-100 * time.Second)
m.statusManager.SetPodStatus(w.pod, laterStatus)
ready, ok = m.readinessCache.Get(containerID)
if !ok {
t.Errorf("Expected readiness to be true, but was not set")
} else if !ready {
t.Errorf("Expected readiness to be true, but was false")
// Second call should succeed (already waited).
if !w.doProbe() {
t.Errorf("[%s] Expected to continue, but did not", probeType)
}
result, ok = resultsManager(m, probeType).Get(containerID)
if !ok {
t.Errorf("[%s] Expected result to be true, but was not set", probeType)
} else if !result {
t.Errorf("[%s] Expected result to be true, but was false", probeType)
}
}
}
func TestCleanUp(t *testing.T) {
m := newTestManager()
pod := getTestPod(api.Probe{})
m.statusManager.SetPodStatus(&pod, getRunningStatus())
m.readinessCache.Set(containerID, results.Success)
w := m.newWorker(&pod, pod.Spec.Containers[0])
m.readinessProbes[containerPath{podUID, containerName}] = w
if ready, _ := m.readinessCache.Get(containerID); !ready {
t.Fatal("Expected readiness to be true.")
}
for _, probeType := range [...]probeType{liveness, readiness} {
key := probeKey{podUID, containerName, probeType}
w := newTestWorker(m, probeType, api.Probe{})
m.statusManager.SetPodStatus(w.pod, getRunningStatus())
go w.run()
m.workers[key] = w
close(w.stop)
if err := waitForWorkerExit(m, []containerPath{{podUID, containerName}}); err != nil {
t.Fatal(err)
}
// Wait for worker to run.
condition := func() (bool, error) {
ready, _ := resultsManager(m, probeType).Get(containerID)
return ready == results.Success, nil
}
if ready, _ := condition(); !ready {
if err := wait.Poll(100*time.Millisecond, util.ForeverTestTimeout, condition); err != nil {
t.Fatalf("[%s] Error waiting for worker ready: %v", probeType, err)
}
}
if _, ok := m.readinessCache.Get(containerID); ok {
t.Error("Expected readiness to be cleared.")
}
if _, ok := m.readinessProbes[containerPath{podUID, containerName}]; ok {
t.Error("Expected worker to be cleared.")
close(w.stop)
if err := waitForWorkerExit(m, []probeKey{key}); err != nil {
t.Fatalf("[%s] error waiting for worker exit: %v", probeType, err)
}
if _, ok := resultsManager(m, probeType).Get(containerID); ok {
t.Errorf("[%s] Expected result to be cleared.", probeType)
}
if _, ok := m.workers[key]; ok {
t.Errorf("[%s] Expected worker to be cleared.", probeType)
}
}
}
func TestHandleCrash(t *testing.T) {
m := newTestManager()
m.prober = CrashingProber{}
w := newTestWorker(api.Probe{})
m.prober = &prober{
refManager: kubecontainer.NewRefManager(),
recorder: &record.FakeRecorder{},
exec: crashingExecProber{},
}
w := newTestWorker(m, readiness, api.Probe{})
m.statusManager.SetPodStatus(w.pod, getRunningStatus())
// doProbe should recover from the crash, and keep going.
if !doProbe(m, w) {
if !w.doProbe() {
t.Error("Expected to keep going, but terminated.")
}
if _, ok := m.readinessCache.Get(containerID); ok {
if _, ok := m.readinessManager.Get(containerID); ok {
t.Error("Expected readiness to be unchanged from crash.")
}
}
func newTestWorker(probeSpec api.Probe) *worker {
pod := getTestPod(probeSpec)
return &worker{
stop: make(chan struct{}),
pod: &pod,
container: pod.Spec.Containers[0],
spec: &probeSpec,
func newTestWorker(m *manager, probeType probeType, probeSpec api.Probe) *worker {
// All tests rely on the fake exec prober.
probeSpec.Handler = api.Handler{
Exec: &api.ExecAction{},
}
pod := getTestPod(probeType, probeSpec)
return newWorker(m, probeType, &pod, pod.Spec.Containers[0])
}
func getRunningStatus() api.PodStatus {
@@ -217,10 +243,15 @@ func getRunningStatus() api.PodStatus {
return podStatus
}
func getTestPod(probeSpec api.Probe) api.Pod {
func getTestPod(probeType probeType, probeSpec api.Probe) api.Pod {
container := api.Container{
Name: containerName,
ReadinessProbe: &probeSpec,
Name: containerName,
}
switch probeType {
case readiness:
container.ReadinessProbe = &probeSpec
case liveness:
container.LivenessProbe = &probeSpec
}
pod := api.Pod{
Spec: api.PodSpec{
@@ -232,12 +263,18 @@ func getTestPod(probeSpec api.Probe) api.Pod {
return pod
}
type CrashingProber struct{}
func (f CrashingProber) ProbeLiveness(_ *api.Pod, _ api.PodStatus, c api.Container, _ kubecontainer.ContainerID, _ int64) (probe.Result, error) {
panic("Intentional ProbeLiveness crash.")
func resultsManager(m *manager, probeType probeType) results.Manager {
switch probeType {
case readiness:
return m.readinessManager
case liveness:
return m.livenessManager
}
panic(fmt.Errorf("Unhandled case: %v", probeType))
}
func (f CrashingProber) ProbeReadiness(_ *api.Pod, _ api.PodStatus, c api.Container, _ kubecontainer.ContainerID) (probe.Result, error) {
panic("Intentional ProbeReadiness crash.")
type crashingExecProber struct{}
func (p crashingExecProber) Probe(_ exec.Cmd) (probe.Result, string, error) {
panic("Intentional Probe crash.")
}