mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Merge pull request #95561 from deads2k/container-status
kubelet container status calculation doesn't handle suddenly missing data properly
This commit is contained in:
		@@ -1616,7 +1616,7 @@ func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontaine
 | 
				
			|||||||
// convertToAPIContainerStatuses converts the given internal container
 | 
					// convertToAPIContainerStatuses converts the given internal container
 | 
				
			||||||
// statuses into API container statuses.
 | 
					// statuses into API container statuses.
 | 
				
			||||||
func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecontainer.PodStatus, previousStatus []v1.ContainerStatus, containers []v1.Container, hasInitContainers, isInitContainer bool) []v1.ContainerStatus {
 | 
					func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecontainer.PodStatus, previousStatus []v1.ContainerStatus, containers []v1.Container, hasInitContainers, isInitContainer bool) []v1.ContainerStatus {
 | 
				
			||||||
	convertContainerStatus := func(cs *kubecontainer.Status) *v1.ContainerStatus {
 | 
						convertContainerStatus := func(cs *kubecontainer.Status, oldStatus *v1.ContainerStatus) *v1.ContainerStatus {
 | 
				
			||||||
		cid := cs.ID.String()
 | 
							cid := cs.ID.String()
 | 
				
			||||||
		status := &v1.ContainerStatus{
 | 
							status := &v1.ContainerStatus{
 | 
				
			||||||
			Name:         cs.Name,
 | 
								Name:         cs.Name,
 | 
				
			||||||
@@ -1625,17 +1625,17 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
 | 
				
			|||||||
			ImageID:      cs.ImageID,
 | 
								ImageID:      cs.ImageID,
 | 
				
			||||||
			ContainerID:  cid,
 | 
								ContainerID:  cid,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		switch cs.State {
 | 
							switch {
 | 
				
			||||||
		case kubecontainer.ContainerStateRunning:
 | 
							case cs.State == kubecontainer.ContainerStateRunning:
 | 
				
			||||||
			status.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.NewTime(cs.StartedAt)}
 | 
								status.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.NewTime(cs.StartedAt)}
 | 
				
			||||||
		case kubecontainer.ContainerStateCreated:
 | 
							case cs.State == kubecontainer.ContainerStateCreated:
 | 
				
			||||||
			// Treat containers in the "created" state as if they are exited.
 | 
								// Treat containers in the "created" state as if they are exited.
 | 
				
			||||||
			// The pod workers are supposed start all containers it creates in
 | 
								// The pod workers are supposed start all containers it creates in
 | 
				
			||||||
			// one sync (syncPod) iteration. There should not be any normal
 | 
								// one sync (syncPod) iteration. There should not be any normal
 | 
				
			||||||
			// "created" containers when the pod worker generates the status at
 | 
								// "created" containers when the pod worker generates the status at
 | 
				
			||||||
			// the beginning of a sync iteration.
 | 
								// the beginning of a sync iteration.
 | 
				
			||||||
			fallthrough
 | 
								fallthrough
 | 
				
			||||||
		case kubecontainer.ContainerStateExited:
 | 
							case cs.State == kubecontainer.ContainerStateExited:
 | 
				
			||||||
			status.State.Terminated = &v1.ContainerStateTerminated{
 | 
								status.State.Terminated = &v1.ContainerStateTerminated{
 | 
				
			||||||
				ExitCode:    int32(cs.ExitCode),
 | 
									ExitCode:    int32(cs.ExitCode),
 | 
				
			||||||
				Reason:      cs.Reason,
 | 
									Reason:      cs.Reason,
 | 
				
			||||||
@@ -1644,6 +1644,24 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
 | 
				
			|||||||
				FinishedAt:  metav1.NewTime(cs.FinishedAt),
 | 
									FinishedAt:  metav1.NewTime(cs.FinishedAt),
 | 
				
			||||||
				ContainerID: cid,
 | 
									ContainerID: cid,
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							case cs.State == kubecontainer.ContainerStateUnknown &&
 | 
				
			||||||
 | 
								oldStatus != nil && // we have an old status
 | 
				
			||||||
 | 
								oldStatus.State.Running != nil: // our previous status was running
 | 
				
			||||||
 | 
								// if this happens, then we know that this container was previously running and isn't anymore (assuming the CRI isn't failing to return running containers).
 | 
				
			||||||
 | 
								// you can imagine this happening in cases where a container failed and the kubelet didn't ask about it in time to see the result.
 | 
				
			||||||
 | 
								// in this case, the container should not to into waiting state immediately because that can make cases like runonce pods actually run
 | 
				
			||||||
 | 
								// twice. "container never ran" is different than "container ran and failed".  This is handled differently in the kubelet
 | 
				
			||||||
 | 
								// and it is handled differently in higher order logic like crashloop detection and handling
 | 
				
			||||||
 | 
								status.State.Terminated = &v1.ContainerStateTerminated{
 | 
				
			||||||
 | 
									Reason:   "ContainerStatusUnknown",
 | 
				
			||||||
 | 
									Message:  "The container could not be located when the pod was terminated",
 | 
				
			||||||
 | 
									ExitCode: 137, // this code indicates an error
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								// the restart count normally comes from the CRI (see near the top of this method), but since this is being added explicitly
 | 
				
			||||||
 | 
								// for the case where the CRI did not return a status, we need to manually increment the restart count to be accurate.
 | 
				
			||||||
 | 
								status.RestartCount = oldStatus.RestartCount + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		default:
 | 
							default:
 | 
				
			||||||
			// this collapses any unknown state to container waiting.  If any container is waiting, then the pod status moves to pending even if it is running.
 | 
								// this collapses any unknown state to container waiting.  If any container is waiting, then the pod status moves to pending even if it is running.
 | 
				
			||||||
			// if I'm reading this correctly, then any failure to read status on any container results in the entire pod going pending even if the containers
 | 
								// if I'm reading this correctly, then any failure to read status on any container results in the entire pod going pending even if the containers
 | 
				
			||||||
@@ -1767,7 +1785,11 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
 | 
				
			|||||||
		if containerSeen[cName] >= 2 {
 | 
							if containerSeen[cName] >= 2 {
 | 
				
			||||||
			continue
 | 
								continue
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		status := convertContainerStatus(cStatus)
 | 
							var oldStatusPtr *v1.ContainerStatus
 | 
				
			||||||
 | 
							if oldStatus, ok := oldStatuses[cName]; ok {
 | 
				
			||||||
 | 
								oldStatusPtr = &oldStatus
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							status := convertContainerStatus(cStatus, oldStatusPtr)
 | 
				
			||||||
		if containerSeen[cName] == 0 {
 | 
							if containerSeen[cName] == 0 {
 | 
				
			||||||
			statuses[cName] = status
 | 
								statuses[cName] = status
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1607,6 +1607,38 @@ func TestGenerateAPIPodStatusWithReasonCache(t *testing.T) {
 | 
				
			|||||||
				}},
 | 
									}},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
							// For Unknown Container Status:
 | 
				
			||||||
 | 
							// * In certain situations a container can be running and fail to retrieve the status which results in
 | 
				
			||||||
 | 
							// * a transition to the Unknown state. Prior to this fix, a container would make an invalid transition
 | 
				
			||||||
 | 
							// * from Running->Waiting. This test validates the correct behavior of transitioning from Running->Terminated.
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								containers: []v1.Container{{Name: "unknown"}},
 | 
				
			||||||
 | 
								statuses: []*kubecontainer.Status{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										Name:  "unknown",
 | 
				
			||||||
 | 
										State: kubecontainer.ContainerStateUnknown,
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										Name:  "unknown",
 | 
				
			||||||
 | 
										State: kubecontainer.ContainerStateRunning,
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								reasons: map[string]error{},
 | 
				
			||||||
 | 
								oldStatuses: []v1.ContainerStatus{{
 | 
				
			||||||
 | 
									Name:  "unknown",
 | 
				
			||||||
 | 
									State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
 | 
				
			||||||
 | 
								}},
 | 
				
			||||||
 | 
								expectedState: map[string]v1.ContainerState{
 | 
				
			||||||
 | 
									"unknown": {Terminated: &v1.ContainerStateTerminated{
 | 
				
			||||||
 | 
										ExitCode: 137,
 | 
				
			||||||
 | 
										Message:  "The container could not be located when the pod was terminated",
 | 
				
			||||||
 | 
										Reason:   "ContainerStatusUnknown",
 | 
				
			||||||
 | 
									}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedLastTerminationState: map[string]v1.ContainerState{
 | 
				
			||||||
 | 
									"unknown": {Running: &v1.ContainerStateRunning{}},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for i, test := range tests {
 | 
						for i, test := range tests {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user