mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	Merge pull request #37865 from yujuhong/decouple_lifecycle
Automatic merge from submit-queue kubelet: remove the pleg health check from healthz This prevents kubelet from being killed when docker hangs. Also, kubelet will report node not ready if PLEG hangs (`docker ps` + `docker inspect`).
This commit is contained in:
		@@ -2026,11 +2026,6 @@ func (kl *Kubelet) LatestLoopEntryTime() time.Time {
 | 
				
			|||||||
	return val.(time.Time)
 | 
						return val.(time.Time)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// PLEGHealthCheck returns whether the PLEG is healthy.
 | 
					 | 
				
			||||||
func (kl *Kubelet) PLEGHealthCheck() (bool, error) {
 | 
					 | 
				
			||||||
	return kl.pleg.Healthy()
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// updateRuntimeUp calls the container runtime status callback, initializing
 | 
					// updateRuntimeUp calls the container runtime status callback, initializing
 | 
				
			||||||
// the runtime dependent modules when the container runtime first comes up,
 | 
					// the runtime dependent modules when the container runtime first comes up,
 | 
				
			||||||
// and returns an error if the status check fails.  If the status check is OK,
 | 
					// and returns an error if the status check fails.  If the status check is OK,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -75,6 +75,11 @@ const (
 | 
				
			|||||||
	plegContainerExited      plegContainerState = "exited"
 | 
						plegContainerExited      plegContainerState = "exited"
 | 
				
			||||||
	plegContainerUnknown     plegContainerState = "unknown"
 | 
						plegContainerUnknown     plegContainerState = "unknown"
 | 
				
			||||||
	plegContainerNonExistent plegContainerState = "non-existent"
 | 
						plegContainerNonExistent plegContainerState = "non-existent"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// The threshold needs to be greater than the relisting period + the
 | 
				
			||||||
 | 
						// relisting time, which can vary significantly. Set a conservative
 | 
				
			||||||
 | 
						// threshold to avoid flipping between healthy and unhealthy.
 | 
				
			||||||
 | 
						relistThreshold = 3 * time.Minute
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func convertState(state kubecontainer.ContainerState) plegContainerState {
 | 
					func convertState(state kubecontainer.ContainerState) plegContainerState {
 | 
				
			||||||
@@ -126,13 +131,9 @@ func (g *GenericPLEG) Start() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
func (g *GenericPLEG) Healthy() (bool, error) {
 | 
					func (g *GenericPLEG) Healthy() (bool, error) {
 | 
				
			||||||
	relistTime := g.getRelistTime()
 | 
						relistTime := g.getRelistTime()
 | 
				
			||||||
	// TODO: Evaluate if we can reduce this threshold.
 | 
						elapsed := g.clock.Since(relistTime)
 | 
				
			||||||
	// The threshold needs to be greater than the relisting period + the
 | 
						if elapsed > relistThreshold {
 | 
				
			||||||
	// relisting time, which can vary significantly. Set a conservative
 | 
							return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold)
 | 
				
			||||||
	// threshold so that we don't cause kubelet to be restarted unnecessarily.
 | 
					 | 
				
			||||||
	threshold := 2 * time.Minute
 | 
					 | 
				
			||||||
	if g.clock.Since(relistTime) > threshold {
 | 
					 | 
				
			||||||
		return false, fmt.Errorf("pleg was last seen active at %v", relistTime)
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return true, nil
 | 
						return true, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,6 +30,22 @@ type runtimeState struct {
 | 
				
			|||||||
	internalError            error
 | 
						internalError            error
 | 
				
			||||||
	cidr                     string
 | 
						cidr                     string
 | 
				
			||||||
	initError                error
 | 
						initError                error
 | 
				
			||||||
 | 
						healthChecks             []*healthCheck
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// A health check function should be efficient and not rely on external
 | 
				
			||||||
 | 
					// components (e.g., container runtime).
 | 
				
			||||||
 | 
					type healthCheckFnType func() (bool, error)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type healthCheck struct {
 | 
				
			||||||
 | 
						name string
 | 
				
			||||||
 | 
						fn   healthCheckFnType
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (s *runtimeState) addHealthCheck(name string, f healthCheckFnType) {
 | 
				
			||||||
 | 
						s.Lock()
 | 
				
			||||||
 | 
						defer s.Unlock()
 | 
				
			||||||
 | 
						s.healthChecks = append(s.healthChecks, &healthCheck{name: name, fn: f})
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (s *runtimeState) setRuntimeSync(t time.Time) {
 | 
					func (s *runtimeState) setRuntimeSync(t time.Time) {
 | 
				
			||||||
@@ -81,6 +97,12 @@ func (s *runtimeState) runtimeErrors() []string {
 | 
				
			|||||||
	if s.internalError != nil {
 | 
						if s.internalError != nil {
 | 
				
			||||||
		ret = append(ret, s.internalError.Error())
 | 
							ret = append(ret, s.internalError.Error())
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						for _, hc := range s.healthChecks {
 | 
				
			||||||
 | 
							if ok, err := hc.fn(); !ok {
 | 
				
			||||||
 | 
								ret = append(ret, fmt.Sprintf("%s is not healthy: %v", hc.name, err))
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return ret
 | 
						return ret
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -182,7 +182,6 @@ type HostInterface interface {
 | 
				
			|||||||
	ImagesFsInfo() (cadvisorapiv2.FsInfo, error)
 | 
						ImagesFsInfo() (cadvisorapiv2.FsInfo, error)
 | 
				
			||||||
	RootFsInfo() (cadvisorapiv2.FsInfo, error)
 | 
						RootFsInfo() (cadvisorapiv2.FsInfo, error)
 | 
				
			||||||
	ListVolumesForPod(podUID types.UID) (map[string]volume.Volume, bool)
 | 
						ListVolumesForPod(podUID types.UID) (map[string]volume.Volume, bool)
 | 
				
			||||||
	PLEGHealthCheck() (bool, error)
 | 
					 | 
				
			||||||
	GetExec(podFullName string, podUID types.UID, containerName string, cmd []string, streamOpts remotecommand.Options) (*url.URL, error)
 | 
						GetExec(podFullName string, podUID types.UID, containerName string, cmd []string, streamOpts remotecommand.Options) (*url.URL, error)
 | 
				
			||||||
	GetAttach(podFullName string, podUID types.UID, containerName string, streamOpts remotecommand.Options) (*url.URL, error)
 | 
						GetAttach(podFullName string, podUID types.UID, containerName string, streamOpts remotecommand.Options) (*url.URL, error)
 | 
				
			||||||
	GetPortForward(podName, podNamespace string, podUID types.UID) (*url.URL, error)
 | 
						GetPortForward(podName, podNamespace string, podUID types.UID) (*url.URL, error)
 | 
				
			||||||
@@ -257,7 +256,6 @@ func (s *Server) InstallDefaultHandlers() {
 | 
				
			|||||||
	healthz.InstallHandler(s.restfulCont,
 | 
						healthz.InstallHandler(s.restfulCont,
 | 
				
			||||||
		healthz.PingHealthz,
 | 
							healthz.PingHealthz,
 | 
				
			||||||
		healthz.NamedCheck("syncloop", s.syncLoopHealthCheck),
 | 
							healthz.NamedCheck("syncloop", s.syncLoopHealthCheck),
 | 
				
			||||||
		healthz.NamedCheck("pleg", s.plegHealthCheck),
 | 
					 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
	var ws *restful.WebService
 | 
						var ws *restful.WebService
 | 
				
			||||||
	ws = new(restful.WebService)
 | 
						ws = new(restful.WebService)
 | 
				
			||||||
@@ -417,14 +415,6 @@ func (s *Server) syncLoopHealthCheck(req *http.Request) error {
 | 
				
			|||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Checks if pleg, which lists pods periodically, is healthy.
 | 
					 | 
				
			||||||
func (s *Server) plegHealthCheck(req *http.Request) error {
 | 
					 | 
				
			||||||
	if ok, err := s.host.PLEGHealthCheck(); !ok {
 | 
					 | 
				
			||||||
		return fmt.Errorf("PLEG took longer than expected: %v", err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// getContainerLogs handles containerLogs request against the Kubelet
 | 
					// getContainerLogs handles containerLogs request against the Kubelet
 | 
				
			||||||
func (s *Server) getContainerLogs(request *restful.Request, response *restful.Response) {
 | 
					func (s *Server) getContainerLogs(request *restful.Request, response *restful.Response) {
 | 
				
			||||||
	podNamespace := request.PathParameter("podNamespace")
 | 
						podNamespace := request.PathParameter("podNamespace")
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -159,8 +159,6 @@ func (fk *fakeKubelet) StreamingConnectionIdleTimeout() time.Duration {
 | 
				
			|||||||
	return fk.streamingConnectionIdleTimeoutFunc()
 | 
						return fk.streamingConnectionIdleTimeoutFunc()
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (fk *fakeKubelet) PLEGHealthCheck() (bool, error) { return fk.plegHealth, nil }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Unused functions
 | 
					// Unused functions
 | 
				
			||||||
func (_ *fakeKubelet) GetContainerInfoV2(_ string, _ cadvisorapiv2.RequestOptions) (map[string]cadvisorapiv2.ContainerInfo, error) {
 | 
					func (_ *fakeKubelet) GetContainerInfoV2(_ string, _ cadvisorapiv2.RequestOptions) (map[string]cadvisorapiv2.ContainerInfo, error) {
 | 
				
			||||||
	return nil, nil
 | 
						return nil, nil
 | 
				
			||||||
@@ -869,18 +867,6 @@ func TestSyncLoopCheck(t *testing.T) {
 | 
				
			|||||||
	assertHealthFails(t, fw.testHTTPServer.URL+"/healthz", http.StatusInternalServerError)
 | 
						assertHealthFails(t, fw.testHTTPServer.URL+"/healthz", http.StatusInternalServerError)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func TestPLEGHealthCheck(t *testing.T) {
 | 
					 | 
				
			||||||
	fw := newServerTest()
 | 
					 | 
				
			||||||
	defer fw.testHTTPServer.Close()
 | 
					 | 
				
			||||||
	fw.fakeKubelet.hostnameFunc = func() string {
 | 
					 | 
				
			||||||
		return "127.0.0.1"
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Test with failed pleg health check.
 | 
					 | 
				
			||||||
	fw.fakeKubelet.plegHealth = false
 | 
					 | 
				
			||||||
	assertHealthFails(t, fw.testHTTPServer.URL+"/healthz", http.StatusInternalServerError)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// returns http response status code from the HTTP GET
 | 
					// returns http response status code from the HTTP GET
 | 
				
			||||||
func assertHealthIsOk(t *testing.T, httpURL string) {
 | 
					func assertHealthIsOk(t *testing.T, httpURL string) {
 | 
				
			||||||
	resp, err := http.Get(httpURL)
 | 
						resp, err := http.Get(httpURL)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user