mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-10-31 10:18:13 +00:00 
			
		
		
		
	 f2bd94a0de
			
		
	
	f2bd94a0de
	
	
	
		
			
			1. Core Kubelet changes to implement In-place Pod Vertical Scaling. 2. E2E tests for In-place Pod Vertical Scaling. 3. Refactor kubelet code and add missing tests (Derek's kubelet review) 4. Add a new hash over container fields without Resources field to allow feature gate toggling without restarting containers not using the feature. 5. Fix corner-case where resize A->B->A gets ignored 6. Add cgroup v2 support to pod resize E2E test. KEP: /enhancements/keps/sig-node/1287-in-place-update-pod-resources Co-authored-by: Chen Wang <Chen.Wang1@ibm.com>
		
			
				
	
	
		
			597 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			597 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
| Copyright 2015 The Kubernetes Authors.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| */
 | |
| 
 | |
| package pleg
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"fmt"
 | |
| 	"sync"
 | |
| 	"sync/atomic"
 | |
| 	"time"
 | |
| 
 | |
| 	"k8s.io/apimachinery/pkg/types"
 | |
| 	"k8s.io/apimachinery/pkg/util/sets"
 | |
| 	"k8s.io/apimachinery/pkg/util/wait"
 | |
| 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | |
| 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
 | |
| 	"k8s.io/klog/v2"
 | |
| 	"k8s.io/kubernetes/pkg/features"
 | |
| 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | |
| 	"k8s.io/kubernetes/pkg/kubelet/metrics"
 | |
| 	"k8s.io/utils/clock"
 | |
| )
 | |
| 
 | |
| // GenericPLEG is an extremely simple generic PLEG that relies solely on
 | |
| // periodic listing to discover container changes. It should be used
 | |
| // as temporary replacement for container runtimes do not support a proper
 | |
| // event generator yet.
 | |
| //
 | |
| // Note that GenericPLEG assumes that a container would not be created,
 | |
| // terminated, and garbage collected within one relist period. If such an
 | |
| // incident happens, GenenricPLEG would miss all events regarding this
 | |
| // container. In the case of relisting failure, the window may become longer.
 | |
| // Note that this assumption is not unique -- many kubelet internal components
 | |
| // rely on terminated containers as tombstones for bookkeeping purposes. The
 | |
| // garbage collector is implemented to work with such situations. However, to
 | |
| // guarantee that kubelet can handle missing container events, it is
 | |
| // recommended to set the relist period short and have an auxiliary, longer
 | |
| // periodic sync in kubelet as the safety net.
 | |
| type GenericPLEG struct {
 | |
| 	// The container runtime.
 | |
| 	runtime kubecontainer.Runtime
 | |
| 	// The channel from which the subscriber listens events.
 | |
| 	eventChannel chan *PodLifecycleEvent
 | |
| 	// The internal cache for pod/container information.
 | |
| 	podRecords podRecords
 | |
| 	// Time of the last relisting.
 | |
| 	relistTime atomic.Value
 | |
| 	// Cache for storing the runtime states required for syncing pods.
 | |
| 	cache kubecontainer.Cache
 | |
| 	// For testability.
 | |
| 	clock clock.Clock
 | |
| 	// Pods that failed to have their status retrieved during a relist. These pods will be
 | |
| 	// retried during the next relisting.
 | |
| 	podsToReinspect map[types.UID]*kubecontainer.Pod
 | |
| 	// Stop the Generic PLEG by closing the channel.
 | |
| 	stopCh chan struct{}
 | |
| 	// Locks the relisting of the Generic PLEG
 | |
| 	relistLock sync.Mutex
 | |
| 	// Indicates if the Generic PLEG is running or not
 | |
| 	isRunning bool
 | |
| 	// Locks the start/stop operation of Generic PLEG
 | |
| 	runningMu sync.Mutex
 | |
| 	// Indicates relisting related parameters
 | |
| 	relistDuration *RelistDuration
 | |
| 	// Mutex to serialize updateCache called by relist vs UpdateCache interface
 | |
| 	podCacheMutex sync.Mutex
 | |
| }
 | |
| 
 | |
| // plegContainerState has a one-to-one mapping to the
 | |
| // kubecontainer.State except for the non-existent state. This state
 | |
| // is introduced here to complete the state transition scenarios.
 | |
| type plegContainerState string
 | |
| 
 | |
| const (
 | |
| 	plegContainerRunning     plegContainerState = "running"
 | |
| 	plegContainerExited      plegContainerState = "exited"
 | |
| 	plegContainerUnknown     plegContainerState = "unknown"
 | |
| 	plegContainerNonExistent plegContainerState = "non-existent"
 | |
| )
 | |
| 
 | |
| func convertState(state kubecontainer.State) plegContainerState {
 | |
| 	switch state {
 | |
| 	case kubecontainer.ContainerStateCreated:
 | |
| 		// kubelet doesn't use the "created" state yet, hence convert it to "unknown".
 | |
| 		return plegContainerUnknown
 | |
| 	case kubecontainer.ContainerStateRunning:
 | |
| 		return plegContainerRunning
 | |
| 	case kubecontainer.ContainerStateExited:
 | |
| 		return plegContainerExited
 | |
| 	case kubecontainer.ContainerStateUnknown:
 | |
| 		return plegContainerUnknown
 | |
| 	default:
 | |
| 		panic(fmt.Sprintf("unrecognized container state: %v", state))
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type podRecord struct {
 | |
| 	old     *kubecontainer.Pod
 | |
| 	current *kubecontainer.Pod
 | |
| }
 | |
| 
 | |
| type podRecords map[types.UID]*podRecord
 | |
| 
 | |
| // NewGenericPLEG instantiates a new GenericPLEG object and return it.
 | |
| func NewGenericPLEG(runtime kubecontainer.Runtime, eventChannel chan *PodLifecycleEvent,
 | |
| 	relistDuration *RelistDuration, cache kubecontainer.Cache,
 | |
| 	clock clock.Clock) PodLifecycleEventGenerator {
 | |
| 	return &GenericPLEG{
 | |
| 		relistDuration: relistDuration,
 | |
| 		runtime:        runtime,
 | |
| 		eventChannel:   eventChannel,
 | |
| 		podRecords:     make(podRecords),
 | |
| 		cache:          cache,
 | |
| 		clock:          clock,
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Watch returns a channel from which the subscriber can receive PodLifecycleEvent
 | |
| // events.
 | |
| // TODO: support multiple subscribers.
 | |
| func (g *GenericPLEG) Watch() chan *PodLifecycleEvent {
 | |
| 	return g.eventChannel
 | |
| }
 | |
| 
 | |
| // Start spawns a goroutine to relist periodically.
 | |
| func (g *GenericPLEG) Start() {
 | |
| 	g.runningMu.Lock()
 | |
| 	defer g.runningMu.Unlock()
 | |
| 	if !g.isRunning {
 | |
| 		g.isRunning = true
 | |
| 		g.stopCh = make(chan struct{})
 | |
| 		go wait.Until(g.Relist, g.relistDuration.RelistPeriod, g.stopCh)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (g *GenericPLEG) Stop() {
 | |
| 	g.runningMu.Lock()
 | |
| 	defer g.runningMu.Unlock()
 | |
| 	if g.isRunning {
 | |
| 		close(g.stopCh)
 | |
| 		g.isRunning = false
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (g *GenericPLEG) Update(relistDuration *RelistDuration) {
 | |
| 	g.relistDuration = relistDuration
 | |
| }
 | |
| 
 | |
| // Healthy check if PLEG work properly.
 | |
| // relistThreshold is the maximum interval between two relist.
 | |
| func (g *GenericPLEG) Healthy() (bool, error) {
 | |
| 	relistTime := g.getRelistTime()
 | |
| 	if relistTime.IsZero() {
 | |
| 		return false, fmt.Errorf("pleg has yet to be successful")
 | |
| 	}
 | |
| 	// Expose as metric so you can alert on `time()-pleg_last_seen_seconds > nn`
 | |
| 	metrics.PLEGLastSeen.Set(float64(relistTime.Unix()))
 | |
| 	elapsed := g.clock.Since(relistTime)
 | |
| 	if elapsed > g.relistDuration.RelistThreshold {
 | |
| 		return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, g.relistDuration.RelistThreshold)
 | |
| 	}
 | |
| 	return true, nil
 | |
| }
 | |
| 
 | |
| func generateEvents(podID types.UID, cid string, oldState, newState plegContainerState) []*PodLifecycleEvent {
 | |
| 	if newState == oldState {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	klog.V(4).InfoS("GenericPLEG", "podUID", podID, "containerID", cid, "oldState", oldState, "newState", newState)
 | |
| 	switch newState {
 | |
| 	case plegContainerRunning:
 | |
| 		return []*PodLifecycleEvent{{ID: podID, Type: ContainerStarted, Data: cid}}
 | |
| 	case plegContainerExited:
 | |
| 		return []*PodLifecycleEvent{{ID: podID, Type: ContainerDied, Data: cid}}
 | |
| 	case plegContainerUnknown:
 | |
| 		return []*PodLifecycleEvent{{ID: podID, Type: ContainerChanged, Data: cid}}
 | |
| 	case plegContainerNonExistent:
 | |
| 		switch oldState {
 | |
| 		case plegContainerExited:
 | |
| 			// We already reported that the container died before.
 | |
| 			return []*PodLifecycleEvent{{ID: podID, Type: ContainerRemoved, Data: cid}}
 | |
| 		default:
 | |
| 			return []*PodLifecycleEvent{{ID: podID, Type: ContainerDied, Data: cid}, {ID: podID, Type: ContainerRemoved, Data: cid}}
 | |
| 		}
 | |
| 	default:
 | |
| 		panic(fmt.Sprintf("unrecognized container state: %v", newState))
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (g *GenericPLEG) getRelistTime() time.Time {
 | |
| 	val := g.relistTime.Load()
 | |
| 	if val == nil {
 | |
| 		return time.Time{}
 | |
| 	}
 | |
| 	return val.(time.Time)
 | |
| }
 | |
| 
 | |
| func (g *GenericPLEG) updateRelistTime(timestamp time.Time) {
 | |
| 	g.relistTime.Store(timestamp)
 | |
| }
 | |
| 
 | |
| // Relist queries the container runtime for list of pods/containers, compare
 | |
| // with the internal pods/containers, and generates events accordingly.
 | |
| func (g *GenericPLEG) Relist() {
 | |
| 	g.relistLock.Lock()
 | |
| 	defer g.relistLock.Unlock()
 | |
| 
 | |
| 	ctx := context.Background()
 | |
| 	klog.V(5).InfoS("GenericPLEG: Relisting")
 | |
| 
 | |
| 	if lastRelistTime := g.getRelistTime(); !lastRelistTime.IsZero() {
 | |
| 		metrics.PLEGRelistInterval.Observe(metrics.SinceInSeconds(lastRelistTime))
 | |
| 	}
 | |
| 
 | |
| 	timestamp := g.clock.Now()
 | |
| 	defer func() {
 | |
| 		metrics.PLEGRelistDuration.Observe(metrics.SinceInSeconds(timestamp))
 | |
| 	}()
 | |
| 
 | |
| 	// Get all the pods.
 | |
| 	podList, err := g.runtime.GetPods(ctx, true)
 | |
| 	if err != nil {
 | |
| 		klog.ErrorS(err, "GenericPLEG: Unable to retrieve pods")
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	g.updateRelistTime(timestamp)
 | |
| 
 | |
| 	pods := kubecontainer.Pods(podList)
 | |
| 	// update running pod and container count
 | |
| 	updateRunningPodAndContainerMetrics(pods)
 | |
| 	g.podRecords.setCurrent(pods)
 | |
| 
 | |
| 	// Compare the old and the current pods, and generate events.
 | |
| 	eventsByPodID := map[types.UID][]*PodLifecycleEvent{}
 | |
| 	for pid := range g.podRecords {
 | |
| 		oldPod := g.podRecords.getOld(pid)
 | |
| 		pod := g.podRecords.getCurrent(pid)
 | |
| 		// Get all containers in the old and the new pod.
 | |
| 		allContainers := getContainersFromPods(oldPod, pod)
 | |
| 		for _, container := range allContainers {
 | |
| 			events := computeEvents(oldPod, pod, &container.ID)
 | |
| 			for _, e := range events {
 | |
| 				updateEvents(eventsByPodID, e)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	var needsReinspection map[types.UID]*kubecontainer.Pod
 | |
| 	if g.cacheEnabled() {
 | |
| 		needsReinspection = make(map[types.UID]*kubecontainer.Pod)
 | |
| 	}
 | |
| 
 | |
| 	// If there are events associated with a pod, we should update the
 | |
| 	// podCache.
 | |
| 	for pid, events := range eventsByPodID {
 | |
| 		pod := g.podRecords.getCurrent(pid)
 | |
| 		if g.cacheEnabled() {
 | |
| 			// updateCache() will inspect the pod and update the cache. If an
 | |
| 			// error occurs during the inspection, we want PLEG to retry again
 | |
| 			// in the next relist. To achieve this, we do not update the
 | |
| 			// associated podRecord of the pod, so that the change will be
 | |
| 			// detect again in the next relist.
 | |
| 			// TODO: If many pods changed during the same relist period,
 | |
| 			// inspecting the pod and getting the PodStatus to update the cache
 | |
| 			// serially may take a while. We should be aware of this and
 | |
| 			// parallelize if needed.
 | |
| 			if err, updated := g.updateCache(ctx, pod, pid); err != nil {
 | |
| 				// Rely on updateCache calling GetPodStatus to log the actual error.
 | |
| 				klog.V(4).ErrorS(err, "PLEG: Ignoring events for pod", "pod", klog.KRef(pod.Namespace, pod.Name))
 | |
| 
 | |
| 				// make sure we try to reinspect the pod during the next relisting
 | |
| 				needsReinspection[pid] = pod
 | |
| 
 | |
| 				continue
 | |
| 			} else {
 | |
| 				// this pod was in the list to reinspect and we did so because it had events, so remove it
 | |
| 				// from the list (we don't want the reinspection code below to inspect it a second time in
 | |
| 				// this relist execution)
 | |
| 				delete(g.podsToReinspect, pid)
 | |
| 				if utilfeature.DefaultFeatureGate.Enabled(features.EventedPLEG) {
 | |
| 					if !updated {
 | |
| 						continue
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 		// Update the internal storage and send out the events.
 | |
| 		g.podRecords.update(pid)
 | |
| 
 | |
| 		// Map from containerId to exit code; used as a temporary cache for lookup
 | |
| 		containerExitCode := make(map[string]int)
 | |
| 
 | |
| 		for i := range events {
 | |
| 			// Filter out events that are not reliable and no other components use yet.
 | |
| 			if events[i].Type == ContainerChanged {
 | |
| 				continue
 | |
| 			}
 | |
| 			select {
 | |
| 			case g.eventChannel <- events[i]:
 | |
| 			default:
 | |
| 				metrics.PLEGDiscardEvents.Inc()
 | |
| 				klog.ErrorS(nil, "Event channel is full, discard this relist() cycle event")
 | |
| 			}
 | |
| 			// Log exit code of containers when they finished in a particular event
 | |
| 			if events[i].Type == ContainerDied {
 | |
| 				// Fill up containerExitCode map for ContainerDied event when first time appeared
 | |
| 				if len(containerExitCode) == 0 && pod != nil && g.cache != nil {
 | |
| 					// Get updated podStatus
 | |
| 					status, err := g.cache.Get(pod.ID)
 | |
| 					if err == nil {
 | |
| 						for _, containerStatus := range status.ContainerStatuses {
 | |
| 							containerExitCode[containerStatus.ID.ID] = containerStatus.ExitCode
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 				if containerID, ok := events[i].Data.(string); ok {
 | |
| 					if exitCode, ok := containerExitCode[containerID]; ok && pod != nil {
 | |
| 						klog.V(2).InfoS("Generic (PLEG): container finished", "podID", pod.ID, "containerID", containerID, "exitCode", exitCode)
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if g.cacheEnabled() {
 | |
| 		// reinspect any pods that failed inspection during the previous relist
 | |
| 		if len(g.podsToReinspect) > 0 {
 | |
| 			klog.V(5).InfoS("GenericPLEG: Reinspecting pods that previously failed inspection")
 | |
| 			for pid, pod := range g.podsToReinspect {
 | |
| 				if err, _ := g.updateCache(ctx, pod, pid); err != nil {
 | |
| 					// Rely on updateCache calling GetPodStatus to log the actual error.
 | |
| 					klog.V(5).ErrorS(err, "PLEG: pod failed reinspection", "pod", klog.KRef(pod.Namespace, pod.Name))
 | |
| 					needsReinspection[pid] = pod
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// Update the cache timestamp.  This needs to happen *after*
 | |
| 		// all pods have been properly updated in the cache.
 | |
| 		g.cache.UpdateTime(timestamp)
 | |
| 	}
 | |
| 
 | |
| 	// make sure we retain the list of pods that need reinspecting the next time relist is called
 | |
| 	g.podsToReinspect = needsReinspection
 | |
| }
 | |
| 
 | |
| func getContainersFromPods(pods ...*kubecontainer.Pod) []*kubecontainer.Container {
 | |
| 	cidSet := sets.NewString()
 | |
| 	var containers []*kubecontainer.Container
 | |
| 	fillCidSet := func(cs []*kubecontainer.Container) {
 | |
| 		for _, c := range cs {
 | |
| 			cid := c.ID.ID
 | |
| 			if cidSet.Has(cid) {
 | |
| 				continue
 | |
| 			}
 | |
| 			cidSet.Insert(cid)
 | |
| 			containers = append(containers, c)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	for _, p := range pods {
 | |
| 		if p == nil {
 | |
| 			continue
 | |
| 		}
 | |
| 		fillCidSet(p.Containers)
 | |
| 		// Update sandboxes as containers
 | |
| 		// TODO: keep track of sandboxes explicitly.
 | |
| 		fillCidSet(p.Sandboxes)
 | |
| 	}
 | |
| 	return containers
 | |
| }
 | |
| 
 | |
| func computeEvents(oldPod, newPod *kubecontainer.Pod, cid *kubecontainer.ContainerID) []*PodLifecycleEvent {
 | |
| 	var pid types.UID
 | |
| 	if oldPod != nil {
 | |
| 		pid = oldPod.ID
 | |
| 	} else if newPod != nil {
 | |
| 		pid = newPod.ID
 | |
| 	}
 | |
| 	oldState := getContainerState(oldPod, cid)
 | |
| 	newState := getContainerState(newPod, cid)
 | |
| 	return generateEvents(pid, cid.ID, oldState, newState)
 | |
| }
 | |
| 
 | |
| func (g *GenericPLEG) cacheEnabled() bool {
 | |
| 	return g.cache != nil
 | |
| }
 | |
| 
 | |
| // getPodIP preserves an older cached status' pod IP if the new status has no pod IPs
 | |
| // and its sandboxes have exited
 | |
| func (g *GenericPLEG) getPodIPs(pid types.UID, status *kubecontainer.PodStatus) []string {
 | |
| 	if len(status.IPs) != 0 {
 | |
| 		return status.IPs
 | |
| 	}
 | |
| 
 | |
| 	oldStatus, err := g.cache.Get(pid)
 | |
| 	if err != nil || len(oldStatus.IPs) == 0 {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	for _, sandboxStatus := range status.SandboxStatuses {
 | |
| 		// If at least one sandbox is ready, then use this status update's pod IP
 | |
| 		if sandboxStatus.State == runtimeapi.PodSandboxState_SANDBOX_READY {
 | |
| 			return status.IPs
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// For pods with no ready containers or sandboxes (like exited pods)
 | |
| 	// use the old status' pod IP
 | |
| 	return oldStatus.IPs
 | |
| }
 | |
| 
 | |
| // updateCache tries to update the pod status in the kubelet cache and returns true if the
 | |
| // pod status was actually updated in the cache. It will return false if the pod status
 | |
| // was ignored by the cache.
 | |
| func (g *GenericPLEG) updateCache(ctx context.Context, pod *kubecontainer.Pod, pid types.UID) (error, bool) {
 | |
| 	if pod == nil {
 | |
| 		// The pod is missing in the current relist. This means that
 | |
| 		// the pod has no visible (active or inactive) containers.
 | |
| 		klog.V(4).InfoS("PLEG: Delete status for pod", "podUID", string(pid))
 | |
| 		g.cache.Delete(pid)
 | |
| 		return nil, true
 | |
| 	}
 | |
| 
 | |
| 	g.podCacheMutex.Lock()
 | |
| 	defer g.podCacheMutex.Unlock()
 | |
| 	timestamp := g.clock.Now()
 | |
| 
 | |
| 	status, err := g.runtime.GetPodStatus(ctx, pod.ID, pod.Name, pod.Namespace)
 | |
| 	if err != nil {
 | |
| 		// nolint:logcheck // Not using the result of klog.V inside the
 | |
| 		// if branch is okay, we just use it to determine whether the
 | |
| 		// additional "podStatus" key and its value should be added.
 | |
| 		if klog.V(6).Enabled() {
 | |
| 			klog.ErrorS(err, "PLEG: Write status", "pod", klog.KRef(pod.Namespace, pod.Name), "podStatus", status)
 | |
| 		} else {
 | |
| 			klog.ErrorS(err, "PLEG: Write status", "pod", klog.KRef(pod.Namespace, pod.Name))
 | |
| 		}
 | |
| 	} else {
 | |
| 		if klogV := klog.V(6); klogV.Enabled() {
 | |
| 			klogV.InfoS("PLEG: Write status", "pod", klog.KRef(pod.Namespace, pod.Name), "podStatus", status)
 | |
| 		} else {
 | |
| 			klog.V(4).InfoS("PLEG: Write status", "pod", klog.KRef(pod.Namespace, pod.Name))
 | |
| 		}
 | |
| 		// Preserve the pod IP across cache updates if the new IP is empty.
 | |
| 		// When a pod is torn down, kubelet may race with PLEG and retrieve
 | |
| 		// a pod status after network teardown, but the kubernetes API expects
 | |
| 		// the completed pod's IP to be available after the pod is dead.
 | |
| 		status.IPs = g.getPodIPs(pid, status)
 | |
| 	}
 | |
| 
 | |
| 	// When we use Generic PLEG only, the PodStatus is saved in the cache without
 | |
| 	// any validation of the existing status against the current timestamp.
 | |
| 	// This works well when there is only Generic PLEG setting the PodStatus in the cache however,
 | |
| 	// if we have multiple entities, such as Evented PLEG, while trying to set the PodStatus in the
 | |
| 	// cache we may run into the racy timestamps given each of them were to calculate the timestamps
 | |
| 	// in their respective execution flow. While Generic PLEG calculates this timestamp and gets
 | |
| 	// the PodStatus, we can only calculate the corresponding timestamp in
 | |
| 	// Evented PLEG after the event has been received by the Kubelet.
 | |
| 	// For more details refer to:
 | |
| 	// https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/3386-kubelet-evented-pleg#timestamp-of-the-pod-status
 | |
| 	if utilfeature.DefaultFeatureGate.Enabled(features.EventedPLEG) && isEventedPLEGInUse() {
 | |
| 		timestamp = status.TimeStamp
 | |
| 	}
 | |
| 
 | |
| 	return err, g.cache.Set(pod.ID, status, err, timestamp)
 | |
| }
 | |
| 
 | |
| func (g *GenericPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) error {
 | |
| 	if !g.cacheEnabled() {
 | |
| 		return fmt.Errorf("pod cache disabled")
 | |
| 	}
 | |
| 	if pod == nil {
 | |
| 		return fmt.Errorf("pod cannot be nil")
 | |
| 	}
 | |
| 	return g.updateCache(pod, pid)
 | |
| }
 | |
| 
 | |
| func updateEvents(eventsByPodID map[types.UID][]*PodLifecycleEvent, e *PodLifecycleEvent) {
 | |
| 	if e == nil {
 | |
| 		return
 | |
| 	}
 | |
| 	eventsByPodID[e.ID] = append(eventsByPodID[e.ID], e)
 | |
| }
 | |
| 
 | |
| func getContainerState(pod *kubecontainer.Pod, cid *kubecontainer.ContainerID) plegContainerState {
 | |
| 	// Default to the non-existent state.
 | |
| 	state := plegContainerNonExistent
 | |
| 	if pod == nil {
 | |
| 		return state
 | |
| 	}
 | |
| 	c := pod.FindContainerByID(*cid)
 | |
| 	if c != nil {
 | |
| 		return convertState(c.State)
 | |
| 	}
 | |
| 	// Search through sandboxes too.
 | |
| 	c = pod.FindSandboxByID(*cid)
 | |
| 	if c != nil {
 | |
| 		return convertState(c.State)
 | |
| 	}
 | |
| 
 | |
| 	return state
 | |
| }
 | |
| 
 | |
| func updateRunningPodAndContainerMetrics(pods []*kubecontainer.Pod) {
 | |
| 	runningSandboxNum := 0
 | |
| 	// intermediate map to store the count of each "container_state"
 | |
| 	containerStateCount := make(map[string]int)
 | |
| 
 | |
| 	for _, pod := range pods {
 | |
| 		containers := pod.Containers
 | |
| 		for _, container := range containers {
 | |
| 			// update the corresponding "container_state" in map to set value for the gaugeVec metrics
 | |
| 			containerStateCount[string(container.State)]++
 | |
| 		}
 | |
| 
 | |
| 		sandboxes := pod.Sandboxes
 | |
| 
 | |
| 		for _, sandbox := range sandboxes {
 | |
| 			if sandbox.State == kubecontainer.ContainerStateRunning {
 | |
| 				runningSandboxNum++
 | |
| 				// every pod should only have one running sandbox
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	for key, value := range containerStateCount {
 | |
| 		metrics.RunningContainerCount.WithLabelValues(key).Set(float64(value))
 | |
| 	}
 | |
| 
 | |
| 	// Set the number of running pods in the parameter
 | |
| 	metrics.RunningPodCount.Set(float64(runningSandboxNum))
 | |
| }
 | |
| 
 | |
| func (pr podRecords) getOld(id types.UID) *kubecontainer.Pod {
 | |
| 	r, ok := pr[id]
 | |
| 	if !ok {
 | |
| 		return nil
 | |
| 	}
 | |
| 	return r.old
 | |
| }
 | |
| 
 | |
| func (pr podRecords) getCurrent(id types.UID) *kubecontainer.Pod {
 | |
| 	r, ok := pr[id]
 | |
| 	if !ok {
 | |
| 		return nil
 | |
| 	}
 | |
| 	return r.current
 | |
| }
 | |
| 
 | |
| func (pr podRecords) setCurrent(pods []*kubecontainer.Pod) {
 | |
| 	for i := range pr {
 | |
| 		pr[i].current = nil
 | |
| 	}
 | |
| 	for _, pod := range pods {
 | |
| 		if r, ok := pr[pod.ID]; ok {
 | |
| 			r.current = pod
 | |
| 		} else {
 | |
| 			pr[pod.ID] = &podRecord{current: pod}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (pr podRecords) update(id types.UID) {
 | |
| 	r, ok := pr[id]
 | |
| 	if !ok {
 | |
| 		return
 | |
| 	}
 | |
| 	pr.updateInternal(id, r)
 | |
| }
 | |
| 
 | |
| func (pr podRecords) updateInternal(id types.UID, r *podRecord) {
 | |
| 	if r.current == nil {
 | |
| 		// Pod no longer exists; delete the entry.
 | |
| 		delete(pr, id)
 | |
| 		return
 | |
| 	}
 | |
| 	r.old = r.current
 | |
| 	r.current = nil
 | |
| }
 |