mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-30 17:58:14 +00:00 
			
		
		
		
	Check the health of PLEG when updating the node status
This commit is contained in:
		| @@ -75,6 +75,11 @@ const ( | ||||
| 	plegContainerExited      plegContainerState = "exited" | ||||
| 	plegContainerUnknown     plegContainerState = "unknown" | ||||
| 	plegContainerNonExistent plegContainerState = "non-existent" | ||||
|  | ||||
| 	// The threshold needs to be greater than the relisting period + the | ||||
| 	// relisting time, which can vary significantly. Set a conservative | ||||
| 	// threshold to avoid flipping between healthy and unhealthy. | ||||
| 	relistThreshold = 3 * time.Minute | ||||
| ) | ||||
|  | ||||
| func convertState(state kubecontainer.ContainerState) plegContainerState { | ||||
| @@ -126,13 +131,9 @@ func (g *GenericPLEG) Start() { | ||||
|  | ||||
| func (g *GenericPLEG) Healthy() (bool, error) { | ||||
| 	relistTime := g.getRelistTime() | ||||
| 	// TODO: Evaluate if we can reduce this threshold. | ||||
| 	// The threshold needs to be greater than the relisting period + the | ||||
| 	// relisting time, which can vary significantly. Set a conservative | ||||
| 	// threshold so that we don't cause kubelet to be restarted unnecessarily. | ||||
| 	threshold := 2 * time.Minute | ||||
| 	if g.clock.Since(relistTime) > threshold { | ||||
| 		return false, fmt.Errorf("pleg was last seen active at %v", relistTime) | ||||
| 	elapsed := g.clock.Since(relistTime) | ||||
| 	if elapsed > relistThreshold { | ||||
| 		return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold) | ||||
| 	} | ||||
| 	return true, nil | ||||
| } | ||||
|   | ||||
| @@ -30,6 +30,22 @@ type runtimeState struct { | ||||
| 	internalError            error | ||||
| 	cidr                     string | ||||
| 	initError                error | ||||
| 	healthChecks             []*healthCheck | ||||
| } | ||||
|  | ||||
| // A health check function should be efficient and not rely on external | ||||
| // components (e.g., container runtime). | ||||
| type healthCheckFnType func() (bool, error) | ||||
|  | ||||
| type healthCheck struct { | ||||
| 	name string | ||||
| 	fn   healthCheckFnType | ||||
| } | ||||
|  | ||||
| func (s *runtimeState) addHealthCheck(name string, f healthCheckFnType) { | ||||
| 	s.Lock() | ||||
| 	defer s.Unlock() | ||||
| 	s.healthChecks = append(s.healthChecks, &healthCheck{name: name, fn: f}) | ||||
| } | ||||
|  | ||||
| func (s *runtimeState) setRuntimeSync(t time.Time) { | ||||
| @@ -81,6 +97,12 @@ func (s *runtimeState) runtimeErrors() []string { | ||||
| 	if s.internalError != nil { | ||||
| 		ret = append(ret, s.internalError.Error()) | ||||
| 	} | ||||
| 	for _, hc := range s.healthChecks { | ||||
| 		if ok, err := hc.fn(); !ok { | ||||
| 			ret = append(ret, fmt.Sprintf("%s is not healthy: %v", hc.name, err)) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return ret | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Yu-Ju Hong
					Yu-Ju Hong