mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 02:08:13 +00:00 
			
		
		
		
	Check the health of PLEG when updating the node status
This commit is contained in:
		| @@ -75,6 +75,11 @@ const ( | |||||||
| 	plegContainerExited      plegContainerState = "exited" | 	plegContainerExited      plegContainerState = "exited" | ||||||
| 	plegContainerUnknown     plegContainerState = "unknown" | 	plegContainerUnknown     plegContainerState = "unknown" | ||||||
| 	plegContainerNonExistent plegContainerState = "non-existent" | 	plegContainerNonExistent plegContainerState = "non-existent" | ||||||
|  |  | ||||||
|  | 	// The threshold needs to be greater than the relisting period + the | ||||||
|  | 	// relisting time, which can vary significantly. Set a conservative | ||||||
|  | 	// threshold to avoid flipping between healthy and unhealthy. | ||||||
|  | 	relistThreshold = 3 * time.Minute | ||||||
| ) | ) | ||||||
|  |  | ||||||
| func convertState(state kubecontainer.ContainerState) plegContainerState { | func convertState(state kubecontainer.ContainerState) plegContainerState { | ||||||
| @@ -126,13 +131,9 @@ func (g *GenericPLEG) Start() { | |||||||
|  |  | ||||||
| func (g *GenericPLEG) Healthy() (bool, error) { | func (g *GenericPLEG) Healthy() (bool, error) { | ||||||
| 	relistTime := g.getRelistTime() | 	relistTime := g.getRelistTime() | ||||||
| 	// TODO: Evaluate if we can reduce this threshold. | 	elapsed := g.clock.Since(relistTime) | ||||||
| 	// The threshold needs to be greater than the relisting period + the | 	if elapsed > relistThreshold { | ||||||
| 	// relisting time, which can vary significantly. Set a conservative | 		return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold) | ||||||
| 	// threshold so that we don't cause kubelet to be restarted unnecessarily. |  | ||||||
| 	threshold := 2 * time.Minute |  | ||||||
| 	if g.clock.Since(relistTime) > threshold { |  | ||||||
| 		return false, fmt.Errorf("pleg was last seen active at %v", relistTime) |  | ||||||
| 	} | 	} | ||||||
| 	return true, nil | 	return true, nil | ||||||
| } | } | ||||||
|   | |||||||
| @@ -30,6 +30,22 @@ type runtimeState struct { | |||||||
| 	internalError            error | 	internalError            error | ||||||
| 	cidr                     string | 	cidr                     string | ||||||
| 	initError                error | 	initError                error | ||||||
|  | 	healthChecks             []*healthCheck | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // A health check function should be efficient and not rely on external | ||||||
|  | // components (e.g., container runtime). | ||||||
|  | type healthCheckFnType func() (bool, error) | ||||||
|  |  | ||||||
|  | type healthCheck struct { | ||||||
|  | 	name string | ||||||
|  | 	fn   healthCheckFnType | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *runtimeState) addHealthCheck(name string, f healthCheckFnType) { | ||||||
|  | 	s.Lock() | ||||||
|  | 	defer s.Unlock() | ||||||
|  | 	s.healthChecks = append(s.healthChecks, &healthCheck{name: name, fn: f}) | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *runtimeState) setRuntimeSync(t time.Time) { | func (s *runtimeState) setRuntimeSync(t time.Time) { | ||||||
| @@ -81,6 +97,12 @@ func (s *runtimeState) runtimeErrors() []string { | |||||||
| 	if s.internalError != nil { | 	if s.internalError != nil { | ||||||
| 		ret = append(ret, s.internalError.Error()) | 		ret = append(ret, s.internalError.Error()) | ||||||
| 	} | 	} | ||||||
|  | 	for _, hc := range s.healthChecks { | ||||||
|  | 		if ok, err := hc.fn(); !ok { | ||||||
|  | 			ret = append(ret, fmt.Sprintf("%s is not healthy: %v", hc.name, err)) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	return ret | 	return ret | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yu-Ju Hong
					Yu-Ju Hong