mirror of
https://github.com/outbackdingo/kubernetes.git
synced 2026-01-27 10:19:35 +00:00
Implement restart rules in kubelet
This commit is contained in:
@@ -29,10 +29,12 @@ import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
sc "k8s.io/kubernetes/pkg/securitycontext"
|
||||
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
||||
"k8s.io/kubernetes/third_party/forked/golang/expansion"
|
||||
@@ -98,6 +100,9 @@ func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus
|
||||
return true
|
||||
}
|
||||
// Check RestartPolicy for dead container
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
return podutil.ContainerShouldRestart(*container, pod.Spec, int32(status.ExitCode))
|
||||
}
|
||||
if pod.Spec.RestartPolicy == v1.RestartPolicyNever {
|
||||
klog.V(4).InfoS("Already ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
return false
|
||||
|
||||
@@ -2029,7 +2029,16 @@ func (kl *Kubelet) SyncPod(ctx context.Context, updateType kubetypes.SyncPodType
|
||||
// expected to run only once and if the kubelet is restarted then
|
||||
// they are not expected to run again.
|
||||
// We don't create and apply updates to cgroup if its a run once pod and was killed above
|
||||
if !(podKilled && pod.Spec.RestartPolicy == v1.RestartPolicyNever) {
|
||||
runOnce := pod.Spec.RestartPolicy == v1.RestartPolicyNever
|
||||
// With ContainerRestartRules, if any container is restartable, the pod should be restarted.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
for _, c := range pod.Spec.Containers {
|
||||
if podutil.IsContainerRestartable(pod.Spec, c) {
|
||||
runOnce = false
|
||||
}
|
||||
}
|
||||
}
|
||||
if !podKilled || !runOnce {
|
||||
if !pcm.Exists(pod) {
|
||||
if err := kl.containerManager.UpdateQOSCgroups(); err != nil {
|
||||
klog.V(2).InfoS("Failed to update QoS cgroups while syncing pod", "pod", klog.KObj(pod), "err", err)
|
||||
|
||||
@@ -1644,6 +1644,7 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti
|
||||
pendingRestartableInitContainers := 0
|
||||
pendingRegularInitContainers := 0
|
||||
failedInitialization := 0
|
||||
failedInitializationNotRestartable := 0
|
||||
|
||||
// regular init containers
|
||||
for _, container := range spec.InitContainers {
|
||||
@@ -1664,13 +1665,25 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti
|
||||
case containerStatus.State.Running != nil:
|
||||
pendingRegularInitContainers++
|
||||
case containerStatus.State.Terminated != nil:
|
||||
if containerStatus.State.Terminated.ExitCode != 0 {
|
||||
exitCode := containerStatus.State.Terminated.ExitCode
|
||||
if exitCode != 0 {
|
||||
failedInitialization++
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
if !podutil.ContainerShouldRestart(container, pod.Spec, exitCode) {
|
||||
failedInitializationNotRestartable++
|
||||
}
|
||||
}
|
||||
}
|
||||
case containerStatus.State.Waiting != nil:
|
||||
if containerStatus.LastTerminationState.Terminated != nil {
|
||||
if containerStatus.LastTerminationState.Terminated.ExitCode != 0 {
|
||||
exitCode := containerStatus.LastTerminationState.Terminated.ExitCode
|
||||
if exitCode != 0 {
|
||||
failedInitialization++
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
if !podutil.ContainerShouldRestart(container, pod.Spec, exitCode) {
|
||||
failedInitializationNotRestartable++
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pendingRegularInitContainers++
|
||||
@@ -1685,9 +1698,10 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti
|
||||
running := 0
|
||||
waiting := 0
|
||||
stopped := 0
|
||||
stoppedNotRestartable := 0
|
||||
succeeded := 0
|
||||
|
||||
// restartable init containers
|
||||
// sidecar init containers
|
||||
for _, container := range spec.InitContainers {
|
||||
if !podutil.IsRestartableInitContainer(&container) {
|
||||
// Skip the regular init containers, as they have been handled above.
|
||||
@@ -1734,12 +1748,24 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti
|
||||
running++
|
||||
case containerStatus.State.Terminated != nil:
|
||||
stopped++
|
||||
if containerStatus.State.Terminated.ExitCode == 0 {
|
||||
exitCode := containerStatus.State.Terminated.ExitCode
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
if !podutil.ContainerShouldRestart(container, pod.Spec, exitCode) {
|
||||
stoppedNotRestartable++
|
||||
}
|
||||
}
|
||||
if exitCode == 0 {
|
||||
succeeded++
|
||||
}
|
||||
case containerStatus.State.Waiting != nil:
|
||||
if containerStatus.LastTerminationState.Terminated != nil {
|
||||
stopped++
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
exitCode := containerStatus.LastTerminationState.Terminated.ExitCode
|
||||
if !podutil.ContainerShouldRestart(container, pod.Spec, exitCode) {
|
||||
stoppedNotRestartable++
|
||||
}
|
||||
}
|
||||
} else {
|
||||
waiting++
|
||||
}
|
||||
@@ -1748,8 +1774,14 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti
|
||||
}
|
||||
}
|
||||
|
||||
if failedInitialization > 0 && spec.RestartPolicy == v1.RestartPolicyNever {
|
||||
return v1.PodFailed
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
if failedInitializationNotRestartable > 0 {
|
||||
return v1.PodFailed
|
||||
}
|
||||
} else {
|
||||
if failedInitialization > 0 && spec.RestartPolicy == v1.RestartPolicyNever {
|
||||
return v1.PodFailed
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
@@ -1784,6 +1816,16 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti
|
||||
}
|
||||
}
|
||||
// All containers are terminated
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
if stopped != stoppedNotRestartable {
|
||||
// At least one containers are in the process of restarting
|
||||
return v1.PodRunning
|
||||
}
|
||||
if stopped == succeeded {
|
||||
return v1.PodSucceeded
|
||||
}
|
||||
return v1.PodFailed
|
||||
}
|
||||
if spec.RestartPolicy == v1.RestartPolicyAlways {
|
||||
// All containers are in the process of restarting
|
||||
return v1.PodRunning
|
||||
|
||||
@@ -3299,6 +3299,210 @@ func TestPodPhaseWithRestartOnFailure(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodPhaseWithContainerRestartPolicy(t *testing.T) {
|
||||
var (
|
||||
containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways
|
||||
containerRestartPolicyOnFailure = v1.ContainerRestartPolicyOnFailure
|
||||
containerRestartPolicyNever = v1.ContainerRestartPolicyNever
|
||||
)
|
||||
tests := []struct {
|
||||
name string
|
||||
spec *v1.PodSpec
|
||||
statuses []v1.ContainerStatus
|
||||
podIsTerminal bool
|
||||
expectedPhase v1.PodPhase
|
||||
}{
|
||||
{
|
||||
name: "container with restart policy Never failed",
|
||||
spec: &v1.PodSpec{
|
||||
Containers: []v1.Container{{
|
||||
Name: "failed-container",
|
||||
RestartPolicy: &containerRestartPolicyNever,
|
||||
}},
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{failedState("failed-container")},
|
||||
expectedPhase: v1.PodFailed,
|
||||
},
|
||||
{
|
||||
name: "container with restart policy OnFailure failed",
|
||||
spec: &v1.PodSpec{
|
||||
Containers: []v1.Container{{
|
||||
Name: "failed-container",
|
||||
RestartPolicy: &containerRestartPolicyOnFailure,
|
||||
}},
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{
|
||||
failedState("failed-container"),
|
||||
},
|
||||
expectedPhase: v1.PodRunning,
|
||||
},
|
||||
{
|
||||
name: "container with restart policy Always failed",
|
||||
spec: &v1.PodSpec{
|
||||
Containers: []v1.Container{{
|
||||
Name: "failed-container",
|
||||
RestartPolicy: &containerRestartPolicyAlways,
|
||||
}},
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{
|
||||
failedState("failed-container"),
|
||||
},
|
||||
expectedPhase: v1.PodRunning,
|
||||
},
|
||||
{
|
||||
name: "At least one container with restartable container-level restart policy failed",
|
||||
// Spec to simulate containerB having RestartPolicy: Always
|
||||
spec: &v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "containerA",
|
||||
RestartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
{Name: "containerB"},
|
||||
},
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{
|
||||
succeededState("containerA"),
|
||||
failedState("containerB"),
|
||||
},
|
||||
expectedPhase: v1.PodRunning,
|
||||
},
|
||||
{
|
||||
name: "All containers without restartable container-level restart policy failed",
|
||||
spec: &v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "containerA",
|
||||
RestartPolicy: &containerRestartPolicyNever,
|
||||
},
|
||||
{
|
||||
Name: "containerB",
|
||||
RestartPolicy: &containerRestartPolicyOnFailure,
|
||||
},
|
||||
},
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{
|
||||
failedState("containerA"),
|
||||
succeededState("containerB"),
|
||||
},
|
||||
expectedPhase: v1.PodFailed,
|
||||
},
|
||||
{
|
||||
name: "All containers succeeded",
|
||||
spec: &v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "containerA",
|
||||
RestartPolicy: &containerRestartPolicyNever,
|
||||
},
|
||||
{
|
||||
Name: "containerB",
|
||||
RestartPolicy: &containerRestartPolicyOnFailure,
|
||||
},
|
||||
},
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{
|
||||
succeededState("containerA"),
|
||||
succeededState("containerB"),
|
||||
},
|
||||
expectedPhase: v1.PodSucceeded,
|
||||
},
|
||||
}
|
||||
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true)
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
pod := &v1.Pod{
|
||||
Spec: *tc.spec,
|
||||
Status: v1.PodStatus{
|
||||
ContainerStatuses: tc.statuses,
|
||||
},
|
||||
}
|
||||
phase := getPhase(pod, tc.statuses, tc.podIsTerminal, true)
|
||||
assert.Equal(t, tc.expectedPhase, phase)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodPhaseWithContainerRestartPolicyInitContainers(t *testing.T) {
|
||||
var (
|
||||
containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways
|
||||
containerRestartPolicyOnFailure = v1.ContainerRestartPolicyOnFailure
|
||||
containerRestartPolicyNever = v1.ContainerRestartPolicyNever
|
||||
)
|
||||
tests := []struct {
|
||||
name string
|
||||
spec *v1.PodSpec
|
||||
statuses []v1.ContainerStatus
|
||||
podIsTerminal bool
|
||||
expectedPhase v1.PodPhase
|
||||
}{
|
||||
{
|
||||
name: "init container with restart policy Never failed",
|
||||
spec: &v1.PodSpec{
|
||||
InitContainers: []v1.Container{{
|
||||
Name: "failed-container",
|
||||
RestartPolicy: &containerRestartPolicyNever,
|
||||
}},
|
||||
Containers: []v1.Container{{Name: "container"}},
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{failedState("failed-container")},
|
||||
expectedPhase: v1.PodFailed,
|
||||
},
|
||||
{
|
||||
name: "init container with restart policy OnFailure failed",
|
||||
spec: &v1.PodSpec{
|
||||
InitContainers: []v1.Container{{
|
||||
Name: "failed-container",
|
||||
RestartPolicy: &containerRestartPolicyOnFailure,
|
||||
}},
|
||||
Containers: []v1.Container{{Name: "container"}},
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{
|
||||
failedState("failed-container"),
|
||||
},
|
||||
expectedPhase: v1.PodPending,
|
||||
},
|
||||
{
|
||||
name: "container with restart policy Always failed",
|
||||
spec: &v1.PodSpec{
|
||||
InitContainers: []v1.Container{{
|
||||
Name: "failed-container",
|
||||
RestartPolicy: &containerRestartPolicyAlways,
|
||||
}},
|
||||
Containers: []v1.Container{{Name: "container"}},
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
},
|
||||
statuses: []v1.ContainerStatus{
|
||||
failedState("failed-container"),
|
||||
},
|
||||
expectedPhase: v1.PodPending,
|
||||
},
|
||||
}
|
||||
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true)
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
pod := &v1.Pod{
|
||||
Spec: *tc.spec,
|
||||
Status: v1.PodStatus{
|
||||
ContainerStatuses: tc.statuses,
|
||||
},
|
||||
}
|
||||
phase := getPhase(pod, tc.statuses, tc.podIsTerminal, true)
|
||||
assert.Equal(t, tc.expectedPhase, phase)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// No special init-specific logic for this, see RestartAlways case
|
||||
// func TestPodPhaseWithRestartOnFailureInitContainers(t *testing.T) {
|
||||
// }
|
||||
|
||||
@@ -2371,6 +2371,210 @@ func TestGenerateAPIPodStatusWithDifferentRestartPolicies(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Test generateAPIPodStatus with different container-level restart policies.
|
||||
func TestGenerateAPIPodStatusWithContainerRestartPolicies(t *testing.T) {
|
||||
var (
|
||||
containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways
|
||||
containerRestartPolicyOnFailure = v1.ContainerRestartPolicyOnFailure
|
||||
containerRestartPolicyNever = v1.ContainerRestartPolicyNever
|
||||
)
|
||||
testErrorReason := fmt.Errorf("test-error")
|
||||
emptyContainerID := (&kubecontainer.ContainerID{}).String()
|
||||
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||
defer testKubelet.Cleanup()
|
||||
kubelet := testKubelet.kubelet
|
||||
pod := podWithUIDNameNs("12345678", "foo", "new")
|
||||
podStatus := &kubecontainer.PodStatus{
|
||||
ID: pod.UID,
|
||||
Name: pod.Name,
|
||||
Namespace: pod.Namespace,
|
||||
ContainerStatuses: []*kubecontainer.Status{
|
||||
{
|
||||
Name: "succeed",
|
||||
State: kubecontainer.ContainerStateExited,
|
||||
ExitCode: 0,
|
||||
},
|
||||
{
|
||||
Name: "failed",
|
||||
State: kubecontainer.ContainerStateExited,
|
||||
ExitCode: 1,
|
||||
},
|
||||
{
|
||||
Name: "succeed",
|
||||
State: kubecontainer.ContainerStateExited,
|
||||
ExitCode: 2,
|
||||
},
|
||||
{
|
||||
Name: "failed",
|
||||
State: kubecontainer.ContainerStateExited,
|
||||
ExitCode: 3,
|
||||
},
|
||||
},
|
||||
}
|
||||
kubelet.reasonCache.add(pod.UID, "succeed", testErrorReason, "")
|
||||
kubelet.reasonCache.add(pod.UID, "failed", testErrorReason, "")
|
||||
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true)
|
||||
for _, test := range []struct {
|
||||
desc string
|
||||
containers []v1.Container
|
||||
expectedState map[string]v1.ContainerState
|
||||
expectedLastTerminationState map[string]v1.ContainerState
|
||||
}{
|
||||
{
|
||||
desc: "container restart policy rules match",
|
||||
containers: []v1.Container{
|
||||
{
|
||||
Name: "failed",
|
||||
RestartPolicy: &containerRestartPolicyNever,
|
||||
RestartPolicyRules: []v1.ContainerRestartRule{{
|
||||
Action: v1.ContainerRestartRuleActionRestart,
|
||||
ExitCodes: &v1.ContainerRestartRuleOnExitCodes{
|
||||
Operator: v1.ContainerRestartRuleOnExitCodesOpIn,
|
||||
Values: []int32{1},
|
||||
},
|
||||
}},
|
||||
},
|
||||
},
|
||||
expectedState: map[string]v1.ContainerState{
|
||||
"failed": {Waiting: &v1.ContainerStateWaiting{Reason: testErrorReason.Error()}},
|
||||
},
|
||||
expectedLastTerminationState: map[string]v1.ContainerState{
|
||||
"failed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "container restart policy rules not match",
|
||||
containers: []v1.Container{
|
||||
{
|
||||
Name: "failed",
|
||||
RestartPolicy: &containerRestartPolicyNever,
|
||||
RestartPolicyRules: []v1.ContainerRestartRule{{
|
||||
Action: v1.ContainerRestartRuleActionRestart,
|
||||
ExitCodes: &v1.ContainerRestartRuleOnExitCodes{
|
||||
Operator: v1.ContainerRestartRuleOnExitCodesOpIn,
|
||||
Values: []int32{2},
|
||||
},
|
||||
}},
|
||||
},
|
||||
},
|
||||
expectedState: map[string]v1.ContainerState{
|
||||
"failed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
},
|
||||
expectedLastTerminationState: map[string]v1.ContainerState{
|
||||
"failed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 3,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "container restart policy never",
|
||||
containers: []v1.Container{
|
||||
{
|
||||
Name: "succeed",
|
||||
RestartPolicy: &containerRestartPolicyNever,
|
||||
},
|
||||
{
|
||||
Name: "failed",
|
||||
RestartPolicy: &containerRestartPolicyNever,
|
||||
},
|
||||
},
|
||||
expectedState: map[string]v1.ContainerState{
|
||||
"succeed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 0,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
"failed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
},
|
||||
expectedLastTerminationState: map[string]v1.ContainerState{
|
||||
"succeed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 2,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
"failed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 3,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "container restart policy OnFailure",
|
||||
containers: []v1.Container{
|
||||
{
|
||||
Name: "succeed",
|
||||
RestartPolicy: &containerRestartPolicyOnFailure,
|
||||
},
|
||||
{
|
||||
Name: "failed",
|
||||
RestartPolicy: &containerRestartPolicyOnFailure,
|
||||
},
|
||||
},
|
||||
expectedState: map[string]v1.ContainerState{
|
||||
"succeed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 0,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
"failed": {Waiting: &v1.ContainerStateWaiting{Reason: testErrorReason.Error()}},
|
||||
},
|
||||
expectedLastTerminationState: map[string]v1.ContainerState{
|
||||
"succeed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 2,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
"failed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "container restart policy Always",
|
||||
containers: []v1.Container{
|
||||
{
|
||||
Name: "succeed",
|
||||
RestartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
{
|
||||
Name: "failed",
|
||||
RestartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
},
|
||||
expectedState: map[string]v1.ContainerState{
|
||||
"succeed": {Waiting: &v1.ContainerStateWaiting{Reason: testErrorReason.Error()}},
|
||||
"failed": {Waiting: &v1.ContainerStateWaiting{Reason: testErrorReason.Error()}},
|
||||
},
|
||||
expectedLastTerminationState: map[string]v1.ContainerState{
|
||||
"succeed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 0,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
"failed": {Terminated: &v1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
ContainerID: emptyContainerID,
|
||||
}},
|
||||
},
|
||||
},
|
||||
} {
|
||||
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
|
||||
// Test normal containers
|
||||
pod.Spec.Containers = test.containers
|
||||
apiStatus := kubelet.generateAPIPodStatus(pod, podStatus, false)
|
||||
expectedState, expectedLastTerminationState := test.expectedState, test.expectedLastTerminationState
|
||||
verifyContainerStatuses(t, apiStatus.ContainerStatuses, expectedState, expectedLastTerminationState, test.desc)
|
||||
pod.Spec.Containers = nil
|
||||
}
|
||||
}
|
||||
|
||||
// testPodAdmitHandler is a lifecycle.PodAdmitHandler for testing.
|
||||
type testPodAdmitHandler struct {
|
||||
// list of pods to reject.
|
||||
|
||||
@@ -1053,6 +1053,10 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(ctx context.Cont
|
||||
// isPreviouslyInitialized indicates if the current init container is
|
||||
// previously initialized.
|
||||
isPreviouslyInitialized := podHasInitialized
|
||||
// Feature gate ContainerRestartRules makes it possible for individual init
|
||||
// containers to restart even if pod-level restart policy is Never, which is
|
||||
// checked at container-level. This `restartOnFailure` is overridden if feature
|
||||
// gate ContainerRestartRules is enabled.
|
||||
restartOnFailure := shouldRestartOnFailure(pod)
|
||||
|
||||
// Note that we iterate through the init containers in reverse order to find
|
||||
@@ -1178,6 +1182,10 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(ctx context.Cont
|
||||
changes.InitContainersToStart = append(changes.InitContainersToStart, i)
|
||||
} else { // init container
|
||||
if isInitContainerFailed(status) {
|
||||
restartOnFailure := restartOnFailure
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
restartOnFailure = kubecontainer.ShouldContainerBeRestarted(container, pod, podStatus)
|
||||
}
|
||||
if !restartOnFailure {
|
||||
changes.KillPod = true
|
||||
changes.InitContainersToStart = nil
|
||||
@@ -1212,6 +1220,10 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(ctx context.Cont
|
||||
logger.V(4).Info("This should not happen, init container is in unknown state but not failed", "pod", klog.KObj(pod), "containerStatus", status)
|
||||
}
|
||||
|
||||
restartOnFailure := restartOnFailure
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
restartOnFailure = kubecontainer.ShouldContainerBeRestarted(container, pod, podStatus)
|
||||
}
|
||||
if !restartOnFailure {
|
||||
changes.KillPod = true
|
||||
changes.InitContainersToStart = nil
|
||||
|
||||
@@ -589,6 +589,16 @@ func containerChanged(container *v1.Container, containerStatus *kubecontainer.St
|
||||
}
|
||||
|
||||
func shouldRestartOnFailure(pod *v1.Pod) bool {
|
||||
// With feature ContainerRestartRules enabled, the pod should be restarted
|
||||
// on failure if any of its containers have container-level restart policy
|
||||
// that is restartable.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
for _, c := range pod.Spec.Containers {
|
||||
if podutil.IsContainerRestartable(pod.Spec, c) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return pod.Spec.RestartPolicy != v1.RestartPolicyNever
|
||||
}
|
||||
|
||||
@@ -1027,9 +1037,20 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *
|
||||
// Get the containers to start, excluding the ones that succeeded if RestartPolicy is OnFailure.
|
||||
var containersToStart []int
|
||||
for idx, c := range pod.Spec.Containers {
|
||||
if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure && containerSucceeded(&c, podStatus) {
|
||||
runOnce := pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
if c.RestartPolicy != nil {
|
||||
runOnce = *c.RestartPolicy == v1.ContainerRestartPolicyOnFailure
|
||||
}
|
||||
}
|
||||
if runOnce && containerSucceeded(&c, podStatus) {
|
||||
continue
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
if c.RestartPolicy != nil && *c.RestartPolicy == v1.ContainerRestartPolicyOnFailure && containerSucceeded(&c, podStatus) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
containersToStart = append(containersToStart, idx)
|
||||
}
|
||||
|
||||
@@ -1075,6 +1096,8 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *
|
||||
}
|
||||
|
||||
// Check initialization progress.
|
||||
// TODO: Remove this code path as logically it is the subset of the next
|
||||
// code path.
|
||||
hasInitialized := m.computeInitContainerActions(ctx, pod, podStatus, &changes)
|
||||
if changes.KillPod || !hasInitialized {
|
||||
// Initialization failed or still in progress. Skip inspecting non-init
|
||||
@@ -1123,6 +1146,9 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *
|
||||
var message string
|
||||
var reason containerKillReason
|
||||
restart := shouldRestartOnFailure(pod)
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
restart = kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus)
|
||||
}
|
||||
if _, _, changed := containerChanged(&container, containerStatus); changed {
|
||||
message = fmt.Sprintf("Container %s definition changed", container.Name)
|
||||
// Restart regardless of the restart policy because the container
|
||||
|
||||
@@ -2109,6 +2109,145 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputePodActionsWithContainerRestartRules(t *testing.T) {
|
||||
var (
|
||||
containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways
|
||||
containerRestartPolicyOnFailure = v1.ContainerRestartPolicyOnFailure
|
||||
containerRestartPolicyNever = v1.ContainerRestartPolicyNever
|
||||
)
|
||||
ctx := context.Background()
|
||||
_, _, m, err := createTestRuntimeManager(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Creating a pair reference pod and status for the test cases to refer
|
||||
// the specific fields.
|
||||
basePod, baseStatus := makeBasePodAndStatus()
|
||||
noAction := podActions{
|
||||
SandboxID: baseStatus.SandboxStatuses[0].Id,
|
||||
ContainersToStart: []int{},
|
||||
ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{},
|
||||
}
|
||||
|
||||
for desc, test := range map[string]struct {
|
||||
mutatePodFn func(*v1.Pod)
|
||||
mutateStatusFn func(*kubecontainer.PodStatus)
|
||||
actions podActions
|
||||
resetStatusFn func(*kubecontainer.PodStatus)
|
||||
}{
|
||||
"restart exited containers if RestartPolicy == Always": {
|
||||
mutatePodFn: func(pod *v1.Pod) {
|
||||
pod.Spec.Containers[0].RestartPolicy = &containerRestartPolicyAlways
|
||||
pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyAlways
|
||||
pod.Spec.RestartPolicy = v1.RestartPolicyNever
|
||||
},
|
||||
mutateStatusFn: func(status *kubecontainer.PodStatus) {
|
||||
// The first container completed, restart it,
|
||||
status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited
|
||||
status.ContainerStatuses[0].ExitCode = 0
|
||||
|
||||
// The second container exited with failure, restart it,
|
||||
status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited
|
||||
status.ContainerStatuses[1].ExitCode = 111
|
||||
},
|
||||
actions: podActions{
|
||||
SandboxID: baseStatus.SandboxStatuses[0].Id,
|
||||
ContainersToStart: []int{0, 1},
|
||||
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
|
||||
},
|
||||
},
|
||||
"restart failed containers if RestartPolicy == OnFailure": {
|
||||
mutatePodFn: func(pod *v1.Pod) {
|
||||
pod.Spec.Containers[0].RestartPolicy = &containerRestartPolicyOnFailure
|
||||
pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyOnFailure
|
||||
pod.Spec.RestartPolicy = v1.RestartPolicyNever
|
||||
},
|
||||
mutateStatusFn: func(status *kubecontainer.PodStatus) {
|
||||
// The first container completed, don't restart it,
|
||||
status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited
|
||||
status.ContainerStatuses[0].ExitCode = 0
|
||||
|
||||
// The second container exited with failure, restart it,
|
||||
status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited
|
||||
status.ContainerStatuses[1].ExitCode = 111
|
||||
},
|
||||
actions: podActions{
|
||||
SandboxID: baseStatus.SandboxStatuses[0].Id,
|
||||
ContainersToStart: []int{1},
|
||||
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
|
||||
},
|
||||
},
|
||||
"restart created but not started containers if RestartPolicy == OnFailure": {
|
||||
mutatePodFn: func(pod *v1.Pod) {
|
||||
pod.Spec.Containers[0].RestartPolicy = &containerRestartPolicyOnFailure
|
||||
pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyOnFailure
|
||||
pod.Spec.RestartPolicy = v1.RestartPolicyNever
|
||||
},
|
||||
mutateStatusFn: func(status *kubecontainer.PodStatus) {
|
||||
// The first container completed, don't restart it.
|
||||
status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited
|
||||
status.ContainerStatuses[0].ExitCode = 0
|
||||
|
||||
// The second container was created, but never started.
|
||||
status.ContainerStatuses[1].State = kubecontainer.ContainerStateCreated
|
||||
},
|
||||
actions: podActions{
|
||||
SandboxID: baseStatus.SandboxStatuses[0].Id,
|
||||
ContainersToStart: []int{1},
|
||||
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
|
||||
},
|
||||
},
|
||||
"don't restart containers if RestartPolicy == Never": {
|
||||
mutatePodFn: func(pod *v1.Pod) {
|
||||
pod.Spec.Containers[0].RestartPolicy = &containerRestartPolicyNever
|
||||
pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyNever
|
||||
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
|
||||
},
|
||||
mutateStatusFn: func(status *kubecontainer.PodStatus) {
|
||||
// Don't restart any containers.
|
||||
status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited
|
||||
status.ContainerStatuses[0].ExitCode = 0
|
||||
status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited
|
||||
status.ContainerStatuses[1].ExitCode = 111
|
||||
},
|
||||
actions: noAction,
|
||||
},
|
||||
"Kill pod and recreate all containers (except for the succeeded one) if the pod sandbox is dead": {
|
||||
mutatePodFn: func(pod *v1.Pod) {
|
||||
pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyOnFailure
|
||||
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
|
||||
},
|
||||
mutateStatusFn: func(status *kubecontainer.PodStatus) {
|
||||
status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
|
||||
status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited
|
||||
status.ContainerStatuses[1].ExitCode = 0
|
||||
},
|
||||
actions: podActions{
|
||||
KillPod: true,
|
||||
CreateSandbox: true,
|
||||
SandboxID: baseStatus.SandboxStatuses[0].Id,
|
||||
Attempt: uint32(1),
|
||||
ContainersToStart: []int{0, 2},
|
||||
ContainersToKill: getKillMap(basePod, baseStatus, []int{}),
|
||||
},
|
||||
},
|
||||
} {
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true)
|
||||
pod, status := makeBasePodAndStatus()
|
||||
if test.mutatePodFn != nil {
|
||||
test.mutatePodFn(pod)
|
||||
}
|
||||
if test.mutateStatusFn != nil {
|
||||
test.mutateStatusFn(status)
|
||||
}
|
||||
ctx := context.Background()
|
||||
actions := m.computePodActions(ctx, pod, status)
|
||||
verifyActions(t, &test.actions, &actions, desc)
|
||||
if test.resetStatusFn != nil {
|
||||
test.resetStatusFn(status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSyncPodWithSandboxAndDeletedPod(t *testing.T) {
|
||||
tCtx := ktesting.Init(t)
|
||||
fakeRuntime, _, m, err := createTestRuntimeManager(tCtx)
|
||||
|
||||
@@ -23,9 +23,11 @@ import (
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/runtime"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/prober/results"
|
||||
)
|
||||
@@ -252,6 +254,9 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
|
||||
w.resultsManager.Set(w.containerID, results.Failure, w.pod)
|
||||
}
|
||||
// Abort if the container will not be restarted.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
return c.State.Terminated != nil || podutil.IsContainerRestartable(w.pod.Spec, w.container)
|
||||
}
|
||||
return c.State.Terminated == nil ||
|
||||
w.pod.Spec.RestartPolicy != v1.RestartPolicyNever
|
||||
}
|
||||
|
||||
@@ -692,6 +692,18 @@ func checkContainerStateTransition(oldStatuses, newStatuses *v1.PodStatus, podSp
|
||||
if oldStatus.State.Terminated.ExitCode != 0 && podSpec.RestartPolicy == v1.RestartPolicyOnFailure {
|
||||
continue
|
||||
}
|
||||
// Skip any container that is allowed to restart by container restart policy
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
restartable := false
|
||||
for _, container := range podSpec.Containers {
|
||||
if container.Name == oldStatus.Name && podutil.ContainerShouldRestart(container, *podSpec, oldStatus.State.Terminated.ExitCode) {
|
||||
restartable = true
|
||||
}
|
||||
}
|
||||
if restartable {
|
||||
continue
|
||||
}
|
||||
}
|
||||
for _, newStatus := range newStatuses.ContainerStatuses {
|
||||
if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil {
|
||||
return fmt.Errorf("terminated container %v attempted illegal transition to non-terminated state", newStatus.Name)
|
||||
@@ -716,6 +728,18 @@ func checkContainerStateTransition(oldStatuses, newStatuses *v1.PodStatus, podSp
|
||||
if oldStatus.State.Terminated.ExitCode != 0 && podSpec.RestartPolicy == v1.RestartPolicyOnFailure {
|
||||
continue
|
||||
}
|
||||
// Skip any container that is allowed to restart by container restart policy
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
restartable := false
|
||||
for _, container := range podSpec.InitContainers {
|
||||
if container.Name == oldStatus.Name && podutil.ContainerShouldRestart(container, *podSpec, oldStatus.State.Terminated.ExitCode) {
|
||||
restartable = true
|
||||
}
|
||||
}
|
||||
if restartable {
|
||||
continue
|
||||
}
|
||||
}
|
||||
for _, newStatus := range newStatuses.InitContainerStatuses {
|
||||
if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil {
|
||||
return fmt.Errorf("terminated init container %v attempted illegal transition to non-terminated state", newStatus.Name)
|
||||
|
||||
Reference in New Issue
Block a user