diff --git a/pkg/kubelet/container/helpers.go b/pkg/kubelet/container/helpers.go index 308fc40fed6..8e791d465a3 100644 --- a/pkg/kubelet/container/helpers.go +++ b/pkg/kubelet/container/helpers.go @@ -29,10 +29,12 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/record" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" podutil "k8s.io/kubernetes/pkg/api/v1/pod" + "k8s.io/kubernetes/pkg/features" sc "k8s.io/kubernetes/pkg/securitycontext" hashutil "k8s.io/kubernetes/pkg/util/hash" "k8s.io/kubernetes/third_party/forked/golang/expansion" @@ -98,6 +100,9 @@ func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus return true } // Check RestartPolicy for dead container + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + return podutil.ContainerShouldRestart(*container, pod.Spec, int32(status.ExitCode)) + } if pod.Spec.RestartPolicy == v1.RestartPolicyNever { klog.V(4).InfoS("Already ran container, do nothing", "pod", klog.KObj(pod), "containerName", container.Name) return false diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 6c6c230db40..2a22b3ba713 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2029,7 +2029,16 @@ func (kl *Kubelet) SyncPod(ctx context.Context, updateType kubetypes.SyncPodType // expected to run only once and if the kubelet is restarted then // they are not expected to run again. // We don't create and apply updates to cgroup if its a run once pod and was killed above - if !(podKilled && pod.Spec.RestartPolicy == v1.RestartPolicyNever) { + runOnce := pod.Spec.RestartPolicy == v1.RestartPolicyNever + // With ContainerRestartRules, if any container is restartable, the pod should be restarted. + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + for _, c := range pod.Spec.Containers { + if podutil.IsContainerRestartable(pod.Spec, c) { + runOnce = false + } + } + } + if !podKilled || !runOnce { if !pcm.Exists(pod) { if err := kl.containerManager.UpdateQOSCgroups(); err != nil { klog.V(2).InfoS("Failed to update QoS cgroups while syncing pod", "pod", klog.KObj(pod), "err", err) diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 852d6ddb439..7bc35753173 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -1644,6 +1644,7 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti pendingRestartableInitContainers := 0 pendingRegularInitContainers := 0 failedInitialization := 0 + failedInitializationNotRestartable := 0 // regular init containers for _, container := range spec.InitContainers { @@ -1664,13 +1665,25 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti case containerStatus.State.Running != nil: pendingRegularInitContainers++ case containerStatus.State.Terminated != nil: - if containerStatus.State.Terminated.ExitCode != 0 { + exitCode := containerStatus.State.Terminated.ExitCode + if exitCode != 0 { failedInitialization++ + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + if !podutil.ContainerShouldRestart(container, pod.Spec, exitCode) { + failedInitializationNotRestartable++ + } + } } case containerStatus.State.Waiting != nil: if containerStatus.LastTerminationState.Terminated != nil { - if containerStatus.LastTerminationState.Terminated.ExitCode != 0 { + exitCode := containerStatus.LastTerminationState.Terminated.ExitCode + if exitCode != 0 { failedInitialization++ + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + if !podutil.ContainerShouldRestart(container, pod.Spec, exitCode) { + failedInitializationNotRestartable++ + } + } } } else { pendingRegularInitContainers++ @@ -1685,9 +1698,10 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti running := 0 waiting := 0 stopped := 0 + stoppedNotRestartable := 0 succeeded := 0 - // restartable init containers + // sidecar init containers for _, container := range spec.InitContainers { if !podutil.IsRestartableInitContainer(&container) { // Skip the regular init containers, as they have been handled above. @@ -1734,12 +1748,24 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti running++ case containerStatus.State.Terminated != nil: stopped++ - if containerStatus.State.Terminated.ExitCode == 0 { + exitCode := containerStatus.State.Terminated.ExitCode + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + if !podutil.ContainerShouldRestart(container, pod.Spec, exitCode) { + stoppedNotRestartable++ + } + } + if exitCode == 0 { succeeded++ } case containerStatus.State.Waiting != nil: if containerStatus.LastTerminationState.Terminated != nil { stopped++ + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + exitCode := containerStatus.LastTerminationState.Terminated.ExitCode + if !podutil.ContainerShouldRestart(container, pod.Spec, exitCode) { + stoppedNotRestartable++ + } + } } else { waiting++ } @@ -1748,8 +1774,14 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti } } - if failedInitialization > 0 && spec.RestartPolicy == v1.RestartPolicyNever { - return v1.PodFailed + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + if failedInitializationNotRestartable > 0 { + return v1.PodFailed + } + } else { + if failedInitialization > 0 && spec.RestartPolicy == v1.RestartPolicyNever { + return v1.PodFailed + } } switch { @@ -1784,6 +1816,16 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal, podHasIniti } } // All containers are terminated + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + if stopped != stoppedNotRestartable { + // At least one containers are in the process of restarting + return v1.PodRunning + } + if stopped == succeeded { + return v1.PodSucceeded + } + return v1.PodFailed + } if spec.RestartPolicy == v1.RestartPolicyAlways { // All containers are in the process of restarting return v1.PodRunning diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 2c20f7d3828..56e81ef8810 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -3299,6 +3299,210 @@ func TestPodPhaseWithRestartOnFailure(t *testing.T) { } } +func TestPodPhaseWithContainerRestartPolicy(t *testing.T) { + var ( + containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways + containerRestartPolicyOnFailure = v1.ContainerRestartPolicyOnFailure + containerRestartPolicyNever = v1.ContainerRestartPolicyNever + ) + tests := []struct { + name string + spec *v1.PodSpec + statuses []v1.ContainerStatus + podIsTerminal bool + expectedPhase v1.PodPhase + }{ + { + name: "container with restart policy Never failed", + spec: &v1.PodSpec{ + Containers: []v1.Container{{ + Name: "failed-container", + RestartPolicy: &containerRestartPolicyNever, + }}, + RestartPolicy: v1.RestartPolicyAlways, + }, + statuses: []v1.ContainerStatus{failedState("failed-container")}, + expectedPhase: v1.PodFailed, + }, + { + name: "container with restart policy OnFailure failed", + spec: &v1.PodSpec{ + Containers: []v1.Container{{ + Name: "failed-container", + RestartPolicy: &containerRestartPolicyOnFailure, + }}, + RestartPolicy: v1.RestartPolicyAlways, + }, + statuses: []v1.ContainerStatus{ + failedState("failed-container"), + }, + expectedPhase: v1.PodRunning, + }, + { + name: "container with restart policy Always failed", + spec: &v1.PodSpec{ + Containers: []v1.Container{{ + Name: "failed-container", + RestartPolicy: &containerRestartPolicyAlways, + }}, + RestartPolicy: v1.RestartPolicyAlways, + }, + statuses: []v1.ContainerStatus{ + failedState("failed-container"), + }, + expectedPhase: v1.PodRunning, + }, + { + name: "At least one container with restartable container-level restart policy failed", + // Spec to simulate containerB having RestartPolicy: Always + spec: &v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "containerA", + RestartPolicy: &containerRestartPolicyAlways, + }, + {Name: "containerB"}, + }, + RestartPolicy: v1.RestartPolicyNever, + }, + statuses: []v1.ContainerStatus{ + succeededState("containerA"), + failedState("containerB"), + }, + expectedPhase: v1.PodRunning, + }, + { + name: "All containers without restartable container-level restart policy failed", + spec: &v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "containerA", + RestartPolicy: &containerRestartPolicyNever, + }, + { + Name: "containerB", + RestartPolicy: &containerRestartPolicyOnFailure, + }, + }, + RestartPolicy: v1.RestartPolicyAlways, + }, + statuses: []v1.ContainerStatus{ + failedState("containerA"), + succeededState("containerB"), + }, + expectedPhase: v1.PodFailed, + }, + { + name: "All containers succeeded", + spec: &v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "containerA", + RestartPolicy: &containerRestartPolicyNever, + }, + { + Name: "containerB", + RestartPolicy: &containerRestartPolicyOnFailure, + }, + }, + RestartPolicy: v1.RestartPolicyAlways, + }, + statuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + expectedPhase: v1.PodSucceeded, + }, + } + + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + pod := &v1.Pod{ + Spec: *tc.spec, + Status: v1.PodStatus{ + ContainerStatuses: tc.statuses, + }, + } + phase := getPhase(pod, tc.statuses, tc.podIsTerminal, true) + assert.Equal(t, tc.expectedPhase, phase) + }) + } +} + +func TestPodPhaseWithContainerRestartPolicyInitContainers(t *testing.T) { + var ( + containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways + containerRestartPolicyOnFailure = v1.ContainerRestartPolicyOnFailure + containerRestartPolicyNever = v1.ContainerRestartPolicyNever + ) + tests := []struct { + name string + spec *v1.PodSpec + statuses []v1.ContainerStatus + podIsTerminal bool + expectedPhase v1.PodPhase + }{ + { + name: "init container with restart policy Never failed", + spec: &v1.PodSpec{ + InitContainers: []v1.Container{{ + Name: "failed-container", + RestartPolicy: &containerRestartPolicyNever, + }}, + Containers: []v1.Container{{Name: "container"}}, + RestartPolicy: v1.RestartPolicyAlways, + }, + statuses: []v1.ContainerStatus{failedState("failed-container")}, + expectedPhase: v1.PodFailed, + }, + { + name: "init container with restart policy OnFailure failed", + spec: &v1.PodSpec{ + InitContainers: []v1.Container{{ + Name: "failed-container", + RestartPolicy: &containerRestartPolicyOnFailure, + }}, + Containers: []v1.Container{{Name: "container"}}, + RestartPolicy: v1.RestartPolicyNever, + }, + statuses: []v1.ContainerStatus{ + failedState("failed-container"), + }, + expectedPhase: v1.PodPending, + }, + { + name: "container with restart policy Always failed", + spec: &v1.PodSpec{ + InitContainers: []v1.Container{{ + Name: "failed-container", + RestartPolicy: &containerRestartPolicyAlways, + }}, + Containers: []v1.Container{{Name: "container"}}, + RestartPolicy: v1.RestartPolicyNever, + }, + statuses: []v1.ContainerStatus{ + failedState("failed-container"), + }, + expectedPhase: v1.PodPending, + }, + } + + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + pod := &v1.Pod{ + Spec: *tc.spec, + Status: v1.PodStatus{ + ContainerStatuses: tc.statuses, + }, + } + phase := getPhase(pod, tc.statuses, tc.podIsTerminal, true) + assert.Equal(t, tc.expectedPhase, phase) + }) + } +} + // No special init-specific logic for this, see RestartAlways case // func TestPodPhaseWithRestartOnFailureInitContainers(t *testing.T) { // } diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 2aec758299b..94c7785224a 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -2371,6 +2371,210 @@ func TestGenerateAPIPodStatusWithDifferentRestartPolicies(t *testing.T) { } } +// Test generateAPIPodStatus with different container-level restart policies. +func TestGenerateAPIPodStatusWithContainerRestartPolicies(t *testing.T) { + var ( + containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways + containerRestartPolicyOnFailure = v1.ContainerRestartPolicyOnFailure + containerRestartPolicyNever = v1.ContainerRestartPolicyNever + ) + testErrorReason := fmt.Errorf("test-error") + emptyContainerID := (&kubecontainer.ContainerID{}).String() + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + defer testKubelet.Cleanup() + kubelet := testKubelet.kubelet + pod := podWithUIDNameNs("12345678", "foo", "new") + podStatus := &kubecontainer.PodStatus{ + ID: pod.UID, + Name: pod.Name, + Namespace: pod.Namespace, + ContainerStatuses: []*kubecontainer.Status{ + { + Name: "succeed", + State: kubecontainer.ContainerStateExited, + ExitCode: 0, + }, + { + Name: "failed", + State: kubecontainer.ContainerStateExited, + ExitCode: 1, + }, + { + Name: "succeed", + State: kubecontainer.ContainerStateExited, + ExitCode: 2, + }, + { + Name: "failed", + State: kubecontainer.ContainerStateExited, + ExitCode: 3, + }, + }, + } + kubelet.reasonCache.add(pod.UID, "succeed", testErrorReason, "") + kubelet.reasonCache.add(pod.UID, "failed", testErrorReason, "") + + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true) + for _, test := range []struct { + desc string + containers []v1.Container + expectedState map[string]v1.ContainerState + expectedLastTerminationState map[string]v1.ContainerState + }{ + { + desc: "container restart policy rules match", + containers: []v1.Container{ + { + Name: "failed", + RestartPolicy: &containerRestartPolicyNever, + RestartPolicyRules: []v1.ContainerRestartRule{{ + Action: v1.ContainerRestartRuleActionRestart, + ExitCodes: &v1.ContainerRestartRuleOnExitCodes{ + Operator: v1.ContainerRestartRuleOnExitCodesOpIn, + Values: []int32{1}, + }, + }}, + }, + }, + expectedState: map[string]v1.ContainerState{ + "failed": {Waiting: &v1.ContainerStateWaiting{Reason: testErrorReason.Error()}}, + }, + expectedLastTerminationState: map[string]v1.ContainerState{ + "failed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 1, + ContainerID: emptyContainerID, + }}, + }, + }, + { + desc: "container restart policy rules not match", + containers: []v1.Container{ + { + Name: "failed", + RestartPolicy: &containerRestartPolicyNever, + RestartPolicyRules: []v1.ContainerRestartRule{{ + Action: v1.ContainerRestartRuleActionRestart, + ExitCodes: &v1.ContainerRestartRuleOnExitCodes{ + Operator: v1.ContainerRestartRuleOnExitCodesOpIn, + Values: []int32{2}, + }, + }}, + }, + }, + expectedState: map[string]v1.ContainerState{ + "failed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 1, + ContainerID: emptyContainerID, + }}, + }, + expectedLastTerminationState: map[string]v1.ContainerState{ + "failed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 3, + ContainerID: emptyContainerID, + }}, + }, + }, + { + desc: "container restart policy never", + containers: []v1.Container{ + { + Name: "succeed", + RestartPolicy: &containerRestartPolicyNever, + }, + { + Name: "failed", + RestartPolicy: &containerRestartPolicyNever, + }, + }, + expectedState: map[string]v1.ContainerState{ + "succeed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + ContainerID: emptyContainerID, + }}, + "failed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 1, + ContainerID: emptyContainerID, + }}, + }, + expectedLastTerminationState: map[string]v1.ContainerState{ + "succeed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 2, + ContainerID: emptyContainerID, + }}, + "failed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 3, + ContainerID: emptyContainerID, + }}, + }, + }, + { + desc: "container restart policy OnFailure", + containers: []v1.Container{ + { + Name: "succeed", + RestartPolicy: &containerRestartPolicyOnFailure, + }, + { + Name: "failed", + RestartPolicy: &containerRestartPolicyOnFailure, + }, + }, + expectedState: map[string]v1.ContainerState{ + "succeed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + ContainerID: emptyContainerID, + }}, + "failed": {Waiting: &v1.ContainerStateWaiting{Reason: testErrorReason.Error()}}, + }, + expectedLastTerminationState: map[string]v1.ContainerState{ + "succeed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 2, + ContainerID: emptyContainerID, + }}, + "failed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 1, + ContainerID: emptyContainerID, + }}, + }, + }, + { + desc: "container restart policy Always", + containers: []v1.Container{ + { + Name: "succeed", + RestartPolicy: &containerRestartPolicyAlways, + }, + { + Name: "failed", + RestartPolicy: &containerRestartPolicyAlways, + }, + }, + expectedState: map[string]v1.ContainerState{ + "succeed": {Waiting: &v1.ContainerStateWaiting{Reason: testErrorReason.Error()}}, + "failed": {Waiting: &v1.ContainerStateWaiting{Reason: testErrorReason.Error()}}, + }, + expectedLastTerminationState: map[string]v1.ContainerState{ + "succeed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + ContainerID: emptyContainerID, + }}, + "failed": {Terminated: &v1.ContainerStateTerminated{ + ExitCode: 1, + ContainerID: emptyContainerID, + }}, + }, + }, + } { + pod.Spec.RestartPolicy = v1.RestartPolicyAlways + // Test normal containers + pod.Spec.Containers = test.containers + apiStatus := kubelet.generateAPIPodStatus(pod, podStatus, false) + expectedState, expectedLastTerminationState := test.expectedState, test.expectedLastTerminationState + verifyContainerStatuses(t, apiStatus.ContainerStatuses, expectedState, expectedLastTerminationState, test.desc) + pod.Spec.Containers = nil + } +} + // testPodAdmitHandler is a lifecycle.PodAdmitHandler for testing. type testPodAdmitHandler struct { // list of pods to reject. diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 0df5e966290..3075566105f 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -1053,6 +1053,10 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(ctx context.Cont // isPreviouslyInitialized indicates if the current init container is // previously initialized. isPreviouslyInitialized := podHasInitialized + // Feature gate ContainerRestartRules makes it possible for individual init + // containers to restart even if pod-level restart policy is Never, which is + // checked at container-level. This `restartOnFailure` is overridden if feature + // gate ContainerRestartRules is enabled. restartOnFailure := shouldRestartOnFailure(pod) // Note that we iterate through the init containers in reverse order to find @@ -1178,6 +1182,10 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(ctx context.Cont changes.InitContainersToStart = append(changes.InitContainersToStart, i) } else { // init container if isInitContainerFailed(status) { + restartOnFailure := restartOnFailure + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + restartOnFailure = kubecontainer.ShouldContainerBeRestarted(container, pod, podStatus) + } if !restartOnFailure { changes.KillPod = true changes.InitContainersToStart = nil @@ -1212,6 +1220,10 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(ctx context.Cont logger.V(4).Info("This should not happen, init container is in unknown state but not failed", "pod", klog.KObj(pod), "containerStatus", status) } + restartOnFailure := restartOnFailure + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + restartOnFailure = kubecontainer.ShouldContainerBeRestarted(container, pod, podStatus) + } if !restartOnFailure { changes.KillPod = true changes.InitContainersToStart = nil diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 273463c7362..b66c2bedadb 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -589,6 +589,16 @@ func containerChanged(container *v1.Container, containerStatus *kubecontainer.St } func shouldRestartOnFailure(pod *v1.Pod) bool { + // With feature ContainerRestartRules enabled, the pod should be restarted + // on failure if any of its containers have container-level restart policy + // that is restartable. + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + for _, c := range pod.Spec.Containers { + if podutil.IsContainerRestartable(pod.Spec, c) { + return true + } + } + } return pod.Spec.RestartPolicy != v1.RestartPolicyNever } @@ -1027,9 +1037,20 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * // Get the containers to start, excluding the ones that succeeded if RestartPolicy is OnFailure. var containersToStart []int for idx, c := range pod.Spec.Containers { - if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure && containerSucceeded(&c, podStatus) { + runOnce := pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + if c.RestartPolicy != nil { + runOnce = *c.RestartPolicy == v1.ContainerRestartPolicyOnFailure + } + } + if runOnce && containerSucceeded(&c, podStatus) { continue } + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + if c.RestartPolicy != nil && *c.RestartPolicy == v1.ContainerRestartPolicyOnFailure && containerSucceeded(&c, podStatus) { + continue + } + } containersToStart = append(containersToStart, idx) } @@ -1075,6 +1096,8 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * } // Check initialization progress. + // TODO: Remove this code path as logically it is the subset of the next + // code path. hasInitialized := m.computeInitContainerActions(ctx, pod, podStatus, &changes) if changes.KillPod || !hasInitialized { // Initialization failed or still in progress. Skip inspecting non-init @@ -1123,6 +1146,9 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * var message string var reason containerKillReason restart := shouldRestartOnFailure(pod) + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + restart = kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) + } if _, _, changed := containerChanged(&container, containerStatus); changed { message = fmt.Sprintf("Container %s definition changed", container.Name) // Restart regardless of the restart policy because the container diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index b11ca2300fe..118590b7f9d 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -2109,6 +2109,145 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) { } } +func TestComputePodActionsWithContainerRestartRules(t *testing.T) { + var ( + containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways + containerRestartPolicyOnFailure = v1.ContainerRestartPolicyOnFailure + containerRestartPolicyNever = v1.ContainerRestartPolicyNever + ) + ctx := context.Background() + _, _, m, err := createTestRuntimeManager(ctx) + require.NoError(t, err) + + // Creating a pair reference pod and status for the test cases to refer + // the specific fields. + basePod, baseStatus := makeBasePodAndStatus() + noAction := podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, + } + + for desc, test := range map[string]struct { + mutatePodFn func(*v1.Pod) + mutateStatusFn func(*kubecontainer.PodStatus) + actions podActions + resetStatusFn func(*kubecontainer.PodStatus) + }{ + "restart exited containers if RestartPolicy == Always": { + mutatePodFn: func(pod *v1.Pod) { + pod.Spec.Containers[0].RestartPolicy = &containerRestartPolicyAlways + pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyAlways + pod.Spec.RestartPolicy = v1.RestartPolicyNever + }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // The first container completed, restart it, + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[0].ExitCode = 0 + + // The second container exited with failure, restart it, + status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[1].ExitCode = 111 + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{0, 1}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "restart failed containers if RestartPolicy == OnFailure": { + mutatePodFn: func(pod *v1.Pod) { + pod.Spec.Containers[0].RestartPolicy = &containerRestartPolicyOnFailure + pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyOnFailure + pod.Spec.RestartPolicy = v1.RestartPolicyNever + }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // The first container completed, don't restart it, + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[0].ExitCode = 0 + + // The second container exited with failure, restart it, + status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[1].ExitCode = 111 + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{1}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "restart created but not started containers if RestartPolicy == OnFailure": { + mutatePodFn: func(pod *v1.Pod) { + pod.Spec.Containers[0].RestartPolicy = &containerRestartPolicyOnFailure + pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyOnFailure + pod.Spec.RestartPolicy = v1.RestartPolicyNever + }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // The first container completed, don't restart it. + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[0].ExitCode = 0 + + // The second container was created, but never started. + status.ContainerStatuses[1].State = kubecontainer.ContainerStateCreated + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{1}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + "don't restart containers if RestartPolicy == Never": { + mutatePodFn: func(pod *v1.Pod) { + pod.Spec.Containers[0].RestartPolicy = &containerRestartPolicyNever + pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyNever + pod.Spec.RestartPolicy = v1.RestartPolicyAlways + }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // Don't restart any containers. + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[0].ExitCode = 0 + status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[1].ExitCode = 111 + }, + actions: noAction, + }, + "Kill pod and recreate all containers (except for the succeeded one) if the pod sandbox is dead": { + mutatePodFn: func(pod *v1.Pod) { + pod.Spec.Containers[1].RestartPolicy = &containerRestartPolicyOnFailure + pod.Spec.RestartPolicy = v1.RestartPolicyAlways + }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.ContainerStatuses[1].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[1].ExitCode = 0 + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + ContainersToStart: []int{0, 2}, + ContainersToKill: getKillMap(basePod, baseStatus, []int{}), + }, + }, + } { + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true) + pod, status := makeBasePodAndStatus() + if test.mutatePodFn != nil { + test.mutatePodFn(pod) + } + if test.mutateStatusFn != nil { + test.mutateStatusFn(status) + } + ctx := context.Background() + actions := m.computePodActions(ctx, pod, status) + verifyActions(t, &test.actions, &actions, desc) + if test.resetStatusFn != nil { + test.resetStatusFn(status) + } + } +} + func TestSyncPodWithSandboxAndDeletedPod(t *testing.T) { tCtx := ktesting.Init(t) fakeRuntime, _, m, err := createTestRuntimeManager(tCtx) diff --git a/pkg/kubelet/prober/worker.go b/pkg/kubelet/prober/worker.go index aea276de6c7..8578c415d44 100644 --- a/pkg/kubelet/prober/worker.go +++ b/pkg/kubelet/prober/worker.go @@ -23,9 +23,11 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/runtime" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/component-base/metrics" "k8s.io/klog/v2" podutil "k8s.io/kubernetes/pkg/api/v1/pod" + "k8s.io/kubernetes/pkg/features" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/prober/results" ) @@ -252,6 +254,9 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) { w.resultsManager.Set(w.containerID, results.Failure, w.pod) } // Abort if the container will not be restarted. + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + return c.State.Terminated != nil || podutil.IsContainerRestartable(w.pod.Spec, w.container) + } return c.State.Terminated == nil || w.pod.Spec.RestartPolicy != v1.RestartPolicyNever } diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index 20f636084a5..c9af9c6868e 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -692,6 +692,18 @@ func checkContainerStateTransition(oldStatuses, newStatuses *v1.PodStatus, podSp if oldStatus.State.Terminated.ExitCode != 0 && podSpec.RestartPolicy == v1.RestartPolicyOnFailure { continue } + // Skip any container that is allowed to restart by container restart policy + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + restartable := false + for _, container := range podSpec.Containers { + if container.Name == oldStatus.Name && podutil.ContainerShouldRestart(container, *podSpec, oldStatus.State.Terminated.ExitCode) { + restartable = true + } + } + if restartable { + continue + } + } for _, newStatus := range newStatuses.ContainerStatuses { if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil { return fmt.Errorf("terminated container %v attempted illegal transition to non-terminated state", newStatus.Name) @@ -716,6 +728,18 @@ func checkContainerStateTransition(oldStatuses, newStatuses *v1.PodStatus, podSp if oldStatus.State.Terminated.ExitCode != 0 && podSpec.RestartPolicy == v1.RestartPolicyOnFailure { continue } + // Skip any container that is allowed to restart by container restart policy + if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { + restartable := false + for _, container := range podSpec.InitContainers { + if container.Name == oldStatus.Name && podutil.ContainerShouldRestart(container, *podSpec, oldStatus.State.Terminated.ExitCode) { + restartable = true + } + } + if restartable { + continue + } + } for _, newStatus := range newStatuses.InitContainerStatuses { if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil { return fmt.Errorf("terminated init container %v attempted illegal transition to non-terminated state", newStatus.Name)