mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 04:08:16 +00:00 
			
		
		
		
	If a pod is already in terminated and the housekeeping loop sees an out of date cache entry for a running container, the pod worker should ignore that running pod termination request. Once the worker completes, a subsequent housekeeping invocation will then invoke terminating because the worker is no longer processing any pod with that UID. This does leave the possibility of syncTerminatedPod being blocked if a container in the pod is started after killPod successfully completes but before syncTerminatedPod can exit successfully, perhaps because the terminated flow (detach volumes) is blocked on that running container. A future change will address that issue.
		
			
				
	
	
		
			1044 lines
		
	
	
		
			44 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			1044 lines
		
	
	
		
			44 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2014 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package kubelet
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"fmt"
 | 
						|
	"strings"
 | 
						|
	"sync"
 | 
						|
	"time"
 | 
						|
 | 
						|
	v1 "k8s.io/api/core/v1"
 | 
						|
	"k8s.io/apimachinery/pkg/types"
 | 
						|
	"k8s.io/apimachinery/pkg/util/runtime"
 | 
						|
	"k8s.io/apimachinery/pkg/util/wait"
 | 
						|
	"k8s.io/client-go/tools/record"
 | 
						|
	"k8s.io/klog/v2"
 | 
						|
	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | 
						|
	"k8s.io/kubernetes/pkg/kubelet/events"
 | 
						|
	"k8s.io/kubernetes/pkg/kubelet/eviction"
 | 
						|
	"k8s.io/kubernetes/pkg/kubelet/metrics"
 | 
						|
	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
						|
	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
 | 
						|
	"k8s.io/kubernetes/pkg/kubelet/util/queue"
 | 
						|
)
 | 
						|
 | 
						|
// OnCompleteFunc is a function that is invoked when an operation completes.
 | 
						|
// If err is non-nil, the operation did not complete successfully.
 | 
						|
type OnCompleteFunc func(err error)
 | 
						|
 | 
						|
// PodStatusFunc is a function that is invoked to override the pod status when a pod is killed.
 | 
						|
type PodStatusFunc func(podStatus *v1.PodStatus)
 | 
						|
 | 
						|
// KillPodOptions are options when performing a pod update whose update type is kill.
 | 
						|
type KillPodOptions struct {
 | 
						|
	// CompletedCh is closed when the kill request completes (syncTerminatingPod has completed
 | 
						|
	// without error) or if the pod does not exist, or if the pod has already terminated. This
 | 
						|
	// could take an arbitrary amount of time to be closed, but is never left open once
 | 
						|
	// CouldHaveRunningContainers() returns false.
 | 
						|
	CompletedCh chan<- struct{}
 | 
						|
	// Evict is true if this is a pod triggered eviction - once a pod is evicted some resources are
 | 
						|
	// more aggressively reaped than during normal pod operation (stopped containers).
 | 
						|
	Evict bool
 | 
						|
	// PodStatusFunc is invoked (if set) and overrides the status of the pod at the time the pod is killed.
 | 
						|
	// The provided status is populated from the latest state.
 | 
						|
	PodStatusFunc PodStatusFunc
 | 
						|
	// PodTerminationGracePeriodSecondsOverride is optional override to use if a pod is being killed as part of kill operation.
 | 
						|
	PodTerminationGracePeriodSecondsOverride *int64
 | 
						|
}
 | 
						|
 | 
						|
// UpdatePodOptions is an options struct to pass to a UpdatePod operation.
 | 
						|
type UpdatePodOptions struct {
 | 
						|
	// The type of update (create, update, sync, kill).
 | 
						|
	UpdateType kubetypes.SyncPodType
 | 
						|
	// StartTime is an optional timestamp for when this update was created. If set,
 | 
						|
	// when this update is fully realized by the pod worker it will be recorded in
 | 
						|
	// the PodWorkerDuration metric.
 | 
						|
	StartTime time.Time
 | 
						|
	// Pod to update. Required.
 | 
						|
	Pod *v1.Pod
 | 
						|
	// MirrorPod is the mirror pod if Pod is a static pod. Optional when UpdateType
 | 
						|
	// is kill or terminated.
 | 
						|
	MirrorPod *v1.Pod
 | 
						|
	// RunningPod is a runtime pod that is no longer present in config. Required
 | 
						|
	// if Pod is nil, ignored if Pod is set.
 | 
						|
	RunningPod *kubecontainer.Pod
 | 
						|
	// KillPodOptions is used to override the default termination behavior of the
 | 
						|
	// pod or to update the pod status after an operation is completed. Since a
 | 
						|
	// pod can be killed for multiple reasons, PodStatusFunc is invoked in order
 | 
						|
	// and later kills have an opportunity to override the status (i.e. a preemption
 | 
						|
	// may be later turned into an eviction).
 | 
						|
	KillPodOptions *KillPodOptions
 | 
						|
}
 | 
						|
 | 
						|
// PodWorkType classifies the three phases of pod lifecycle - setup (sync),
 | 
						|
// teardown of containers (terminating), cleanup (terminated).
 | 
						|
type PodWorkType int
 | 
						|
 | 
						|
const (
 | 
						|
	// SyncPodSync is when the pod is expected to be started and running.
 | 
						|
	SyncPodWork PodWorkType = iota
 | 
						|
	// TerminatingPodWork is when the pod is no longer being set up, but some
 | 
						|
	// containers may be running and are being torn down.
 | 
						|
	TerminatingPodWork
 | 
						|
	// TerminatedPodWork indicates the pod is stopped, can have no more running
 | 
						|
	// containers, and any foreground cleanup can be executed.
 | 
						|
	TerminatedPodWork
 | 
						|
)
 | 
						|
 | 
						|
// podWork is the internal changes
 | 
						|
type podWork struct {
 | 
						|
	// WorkType is the type of sync to perform - sync (create), terminating (stop
 | 
						|
	// containers), terminated (clean up and write status).
 | 
						|
	WorkType PodWorkType
 | 
						|
 | 
						|
	// Options contains the data to sync.
 | 
						|
	Options UpdatePodOptions
 | 
						|
}
 | 
						|
 | 
						|
// PodWorkers is an abstract interface for testability.
 | 
						|
type PodWorkers interface {
 | 
						|
	// UpdatePod notifies the pod worker of a change to a pod, which will then
 | 
						|
	// be processed in FIFO order by a goroutine per pod UID. The state of the
 | 
						|
	// pod will be passed to the syncPod method until either the pod is marked
 | 
						|
	// as deleted, it reaches a terminal phase (Succeeded/Failed), or the pod
 | 
						|
	// is evicted by the kubelet. Once that occurs the syncTerminatingPod method
 | 
						|
	// will be called until it exits successfully, and after that all further
 | 
						|
	// UpdatePod() calls will be ignored for that pod until it has been forgotten
 | 
						|
	// due to significant time passing. A pod that is terminated will never be
 | 
						|
	// restarted.
 | 
						|
	UpdatePod(options UpdatePodOptions)
 | 
						|
	// SyncKnownPods removes workers for pods that are not in the desiredPods set
 | 
						|
	// and have been terminated for a significant period of time. Once this method
 | 
						|
	// has been called once, the workers are assumed to be fully initialized and
 | 
						|
	// subsequent calls to ShouldPodContentBeRemoved on unknown pods will return
 | 
						|
	// true.
 | 
						|
	SyncKnownPods(desiredPods []*v1.Pod) map[types.UID]PodWorkType
 | 
						|
 | 
						|
	// CouldHaveRunningContainers returns true before the pod workers have synced,
 | 
						|
	// once the pod workers see the pod (syncPod could be called), and returns false
 | 
						|
	// after the pod has been terminated (running containers guaranteed stopped).
 | 
						|
	//
 | 
						|
	// Intended for use by the kubelet config loops, but not subsystems, which should
 | 
						|
	// use ShouldPod*().
 | 
						|
	CouldHaveRunningContainers(uid types.UID) bool
 | 
						|
	// IsPodTerminationRequested returns true when pod termination has been requested
 | 
						|
	// until the termination completes and the pod is removed from config. This should
 | 
						|
	// not be used in cleanup loops because it will return false if the pod has already
 | 
						|
	// been cleaned up - use ShouldPodContainersBeTerminating instead. Also, this method
 | 
						|
	// may return true while containers are still being initialized by the pod worker.
 | 
						|
	//
 | 
						|
	// Intended for use by the kubelet sync* methods, but not subsystems, which should
 | 
						|
	// use ShouldPod*().
 | 
						|
	IsPodTerminationRequested(uid types.UID) bool
 | 
						|
 | 
						|
	// ShouldPodContainersBeTerminating returns false before pod workers have synced,
 | 
						|
	// or once a pod has started terminating. This check is similar to
 | 
						|
	// ShouldPodRuntimeBeRemoved but is also true after pod termination is requested.
 | 
						|
	//
 | 
						|
	// Intended for use by subsystem sync loops to avoid performing background setup
 | 
						|
	// after termination has been requested for a pod. Callers must ensure that the
 | 
						|
	// syncPod method is non-blocking when their data is absent.
 | 
						|
	ShouldPodContainersBeTerminating(uid types.UID) bool
 | 
						|
	// ShouldPodRuntimeBeRemoved returns true if runtime managers within the Kubelet
 | 
						|
	// should aggressively cleanup pod resources that are not containers or on disk
 | 
						|
	// content, like attached volumes. This is true when a pod is not yet observed
 | 
						|
	// by a worker after the first sync (meaning it can't be running yet) or after
 | 
						|
	// all running containers are stopped.
 | 
						|
	// TODO: Once pod logs are separated from running containers, this method should
 | 
						|
	// be used to gate whether containers are kept.
 | 
						|
	//
 | 
						|
	// Intended for use by subsystem sync loops to know when to start tearing down
 | 
						|
	// resources that are used by running containers. Callers should ensure that
 | 
						|
	// runtime content they own is not required for post-termination - for instance
 | 
						|
	// containers are required in docker to preserve pod logs until after the pod
 | 
						|
	// is deleted.
 | 
						|
	ShouldPodRuntimeBeRemoved(uid types.UID) bool
 | 
						|
	// ShouldPodContentBeRemoved returns true if resource managers within the Kubelet
 | 
						|
	// should aggressively cleanup all content related to the pod. This is true
 | 
						|
	// during pod eviction (when we wish to remove that content to free resources)
 | 
						|
	// as well as after the request to delete a pod has resulted in containers being
 | 
						|
	// stopped (which is a more graceful action). Note that a deleting pod can still
 | 
						|
	// be evicted.
 | 
						|
	//
 | 
						|
	// Intended for use by subsystem sync loops to know when to start tearing down
 | 
						|
	// resources that are used by non-deleted pods. Content is generally preserved
 | 
						|
	// until deletion+removal_from_etcd or eviction, although garbage collection
 | 
						|
	// can free content when this method returns false.
 | 
						|
	ShouldPodContentBeRemoved(uid types.UID) bool
 | 
						|
	// IsPodForMirrorPodTerminatingByFullName returns true if a static pod with the
 | 
						|
	// provided pod name is currently terminating and has yet to complete. It is
 | 
						|
	// intended to be used only during orphan mirror pod cleanup to prevent us from
 | 
						|
	// deleting a terminating static pod from the apiserver before the pod is shut
 | 
						|
	// down.
 | 
						|
	IsPodForMirrorPodTerminatingByFullName(podFullname string) bool
 | 
						|
}
 | 
						|
 | 
						|
// the function to invoke to perform a sync (reconcile the kubelet state to the desired shape of the pod)
 | 
						|
type syncPodFnType func(ctx context.Context, updateType kubetypes.SyncPodType, pod *v1.Pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error
 | 
						|
 | 
						|
// the function to invoke to terminate a pod (ensure no running processes are present)
 | 
						|
type syncTerminatingPodFnType func(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, runningPod *kubecontainer.Pod, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error
 | 
						|
 | 
						|
// the function to invoke to cleanup a pod that is terminated
 | 
						|
type syncTerminatedPodFnType func(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) error
 | 
						|
 | 
						|
const (
 | 
						|
	// jitter factor for resyncInterval
 | 
						|
	workerResyncIntervalJitterFactor = 0.5
 | 
						|
 | 
						|
	// jitter factor for backOffPeriod and backOffOnTransientErrorPeriod
 | 
						|
	workerBackOffPeriodJitterFactor = 0.5
 | 
						|
 | 
						|
	// backoff period when transient error occurred.
 | 
						|
	backOffOnTransientErrorPeriod = time.Second
 | 
						|
)
 | 
						|
 | 
						|
// podSyncStatus tracks per-pod transitions through the three phases of pod
 | 
						|
// worker sync (setup, terminating, terminated).
 | 
						|
type podSyncStatus struct {
 | 
						|
	// ctx is the context that is associated with the current pod sync.
 | 
						|
	ctx context.Context
 | 
						|
	// cancelFn if set is expected to cancel the current sync*Pod operation.
 | 
						|
	cancelFn context.CancelFunc
 | 
						|
	// working is true if a pod worker is currently in a sync method.
 | 
						|
	working bool
 | 
						|
 | 
						|
	// syncedAt is the time at which the pod worker first observed this pod.
 | 
						|
	syncedAt time.Time
 | 
						|
	// terminatingAt is set once the pod is requested to be killed - note that
 | 
						|
	// this can be set before the pod worker starts terminating the pod, see
 | 
						|
	// terminating.
 | 
						|
	terminatingAt time.Time
 | 
						|
	// startedTerminating is true once the pod worker has observed the request to
 | 
						|
	// stop a pod (exited syncPod and observed a podWork with WorkType
 | 
						|
	// TerminatingPodWork). Once this is set, it is safe for other components
 | 
						|
	// of the kubelet to assume that no other containers may be started.
 | 
						|
	startedTerminating bool
 | 
						|
	// deleted is true if the pod has been marked for deletion on the apiserver
 | 
						|
	// or has no configuration represented (was deleted before).
 | 
						|
	deleted bool
 | 
						|
	// gracePeriod is the requested gracePeriod once terminatingAt is nonzero.
 | 
						|
	gracePeriod int64
 | 
						|
	// evicted is true if the kill indicated this was an eviction (an evicted
 | 
						|
	// pod can be more aggressively cleaned up).
 | 
						|
	evicted bool
 | 
						|
	// terminatedAt is set once the pod worker has completed a successful
 | 
						|
	// syncTerminatingPod call and means all running containers are stopped.
 | 
						|
	terminatedAt time.Time
 | 
						|
	// finished is true once the pod worker completes for a pod
 | 
						|
	// (syncTerminatedPod exited with no errors) until SyncKnownPods is invoked
 | 
						|
	// to remove the pod. A terminal pod (Succeeded/Failed) will have
 | 
						|
	// termination status until the pod is deleted.
 | 
						|
	finished bool
 | 
						|
	// notifyPostTerminating will be closed once the pod transitions to
 | 
						|
	// terminated. After the pod is in terminated state, nothing should be
 | 
						|
	// added to this list.
 | 
						|
	notifyPostTerminating []chan<- struct{}
 | 
						|
	// statusPostTerminating is a list of the status changes associated
 | 
						|
	// with kill pod requests. After the pod is in terminated state, nothing
 | 
						|
	// should be added to this list. The worker will execute the last function
 | 
						|
	// in this list on each termination attempt.
 | 
						|
	statusPostTerminating []PodStatusFunc
 | 
						|
}
 | 
						|
 | 
						|
func (s *podSyncStatus) IsWorking() bool              { return s.working }
 | 
						|
func (s *podSyncStatus) IsTerminationRequested() bool { return !s.terminatingAt.IsZero() }
 | 
						|
func (s *podSyncStatus) IsTerminationStarted() bool   { return s.startedTerminating }
 | 
						|
func (s *podSyncStatus) IsTerminated() bool           { return !s.terminatedAt.IsZero() }
 | 
						|
func (s *podSyncStatus) IsFinished() bool             { return s.finished }
 | 
						|
func (s *podSyncStatus) IsEvicted() bool              { return s.evicted }
 | 
						|
func (s *podSyncStatus) IsDeleted() bool              { return s.deleted }
 | 
						|
 | 
						|
// podWorkers keeps track of operations on pods and ensures each pod is
 | 
						|
// reconciled with the container runtime and other subsystems. The worker
 | 
						|
// also tracks which pods are in flight for starting, which pods are
 | 
						|
// shutting down but still have running containers, and which pods have
 | 
						|
// terminated recently and are guaranteed to have no running containers.
 | 
						|
//
 | 
						|
// A pod passed to a pod worker is either being synced (expected to be
 | 
						|
// running), terminating (has running containers but no new containers are
 | 
						|
// expected to start), terminated (has no running containers but may still
 | 
						|
// have resources being consumed), or cleaned up (no resources remaining).
 | 
						|
// Once a pod is set to be "torn down" it cannot be started again for that
 | 
						|
// UID (corresponding to a delete or eviction) until:
 | 
						|
//
 | 
						|
// 1. The pod worker is finalized (syncTerminatingPod and
 | 
						|
//    syncTerminatedPod exit without error sequentially)
 | 
						|
// 2. The SyncKnownPods method is invoked by kubelet housekeeping and the pod
 | 
						|
//    is not part of the known config.
 | 
						|
//
 | 
						|
// Pod workers provide a consistent source of information to other kubelet
 | 
						|
// loops about the status of the pod and whether containers can be
 | 
						|
// running. The ShouldPodContentBeRemoved() method tracks whether a pod's
 | 
						|
// contents should still exist, which includes non-existent pods after
 | 
						|
// SyncKnownPods() has been called once (as per the contract, all existing
 | 
						|
// pods should be provided via UpdatePod before SyncKnownPods is invoked).
 | 
						|
// Generally other sync loops are expected to separate "setup" and
 | 
						|
// "teardown" responsibilities and the information methods here assist in
 | 
						|
// each by centralizing that state. A simple visualization of the time
 | 
						|
// intervals involved might look like:
 | 
						|
//
 | 
						|
// ---|                                         = kubelet config has synced at least once
 | 
						|
// -------|                                  |- = pod exists in apiserver config
 | 
						|
// --------|                  |---------------- = CouldHaveRunningContainers() is true
 | 
						|
//         ^- pod is observed by pod worker  .
 | 
						|
//         .                                 .
 | 
						|
// ----------|       |------------------------- = syncPod is running
 | 
						|
//         . ^- pod worker loop sees change and invokes syncPod
 | 
						|
//         . .                               .
 | 
						|
// --------------|                     |------- = ShouldPodContainersBeTerminating() returns true
 | 
						|
// --------------|                     |------- = IsPodTerminationRequested() returns true (pod is known)
 | 
						|
//         . .   ^- Kubelet evicts pod       .
 | 
						|
//         . .                               .
 | 
						|
// -------------------|       |---------------- = syncTerminatingPod runs then exits without error
 | 
						|
//         . .        ^ pod worker loop exits syncPod, sees pod is terminating,
 | 
						|
// 				 . .          invokes syncTerminatingPod
 | 
						|
//         . .                               .
 | 
						|
// ---|    |------------------|              .  = ShouldPodRuntimeBeRemoved() returns true (post-sync)
 | 
						|
//           .                ^ syncTerminatingPod has exited successfully
 | 
						|
//           .                               .
 | 
						|
// ----------------------------|       |------- = syncTerminatedPod runs then exits without error
 | 
						|
//           .                         ^ other loops can tear down
 | 
						|
//           .                               .
 | 
						|
// ------------------------------------|  |---- = status manager is waiting for PodResourcesAreReclaimed()
 | 
						|
//           .                         ^     .
 | 
						|
// ----------|                               |- = status manager can be writing pod status
 | 
						|
//                                           ^ status manager deletes pod because no longer exists in config
 | 
						|
//
 | 
						|
// Other components in the Kubelet can request a termination of the pod
 | 
						|
// via the UpdatePod method or the killPodNow wrapper - this will ensure
 | 
						|
// the components of the pod are stopped until the kubelet is restarted
 | 
						|
// or permanently (if the phase of the pod is set to a terminal phase
 | 
						|
// in the pod status change).
 | 
						|
//
 | 
						|
type podWorkers struct {
 | 
						|
	// Protects all per worker fields.
 | 
						|
	podLock sync.Mutex
 | 
						|
	// podsSynced is true once the pod worker has been synced at least once,
 | 
						|
	// which means that all working pods have been started via UpdatePod().
 | 
						|
	podsSynced bool
 | 
						|
	// Tracks all running per-pod goroutines - per-pod goroutine will be
 | 
						|
	// processing updates received through its corresponding channel.
 | 
						|
	podUpdates map[types.UID]chan podWork
 | 
						|
	// Tracks the last undelivered work item for this pod - a work item is
 | 
						|
	// undelivered if it comes in while the worker is working.
 | 
						|
	lastUndeliveredWorkUpdate map[types.UID]podWork
 | 
						|
	// Tracks by UID the termination status of a pod - syncing, terminating,
 | 
						|
	// terminated, and evicted.
 | 
						|
	podSyncStatuses map[types.UID]*podSyncStatus
 | 
						|
	// Tracks when a static pod is being killed and is removed when the
 | 
						|
	// static pod transitions to the killed state.
 | 
						|
	terminatingStaticPodFullnames map[string]struct{}
 | 
						|
 | 
						|
	workQueue queue.WorkQueue
 | 
						|
 | 
						|
	// This function is run to sync the desired state of pod.
 | 
						|
	// NOTE: This function has to be thread-safe - it can be called for
 | 
						|
	// different pods at the same time.
 | 
						|
 | 
						|
	syncPodFn            syncPodFnType
 | 
						|
	syncTerminatingPodFn syncTerminatingPodFnType
 | 
						|
	syncTerminatedPodFn  syncTerminatedPodFnType
 | 
						|
 | 
						|
	// The EventRecorder to use
 | 
						|
	recorder record.EventRecorder
 | 
						|
 | 
						|
	// backOffPeriod is the duration to back off when there is a sync error.
 | 
						|
	backOffPeriod time.Duration
 | 
						|
 | 
						|
	// resyncInterval is the duration to wait until the next sync.
 | 
						|
	resyncInterval time.Duration
 | 
						|
 | 
						|
	// podCache stores kubecontainer.PodStatus for all pods.
 | 
						|
	podCache kubecontainer.Cache
 | 
						|
}
 | 
						|
 | 
						|
func newPodWorkers(
 | 
						|
	syncPodFn syncPodFnType,
 | 
						|
	syncTerminatingPodFn syncTerminatingPodFnType,
 | 
						|
	syncTerminatedPodFn syncTerminatedPodFnType,
 | 
						|
	recorder record.EventRecorder,
 | 
						|
	workQueue queue.WorkQueue,
 | 
						|
	resyncInterval, backOffPeriod time.Duration,
 | 
						|
	podCache kubecontainer.Cache,
 | 
						|
) PodWorkers {
 | 
						|
	return &podWorkers{
 | 
						|
		podSyncStatuses:               map[types.UID]*podSyncStatus{},
 | 
						|
		podUpdates:                    map[types.UID]chan podWork{},
 | 
						|
		lastUndeliveredWorkUpdate:     map[types.UID]podWork{},
 | 
						|
		terminatingStaticPodFullnames: map[string]struct{}{},
 | 
						|
		syncPodFn:                     syncPodFn,
 | 
						|
		syncTerminatingPodFn:          syncTerminatingPodFn,
 | 
						|
		syncTerminatedPodFn:           syncTerminatedPodFn,
 | 
						|
		recorder:                      recorder,
 | 
						|
		workQueue:                     workQueue,
 | 
						|
		resyncInterval:                resyncInterval,
 | 
						|
		backOffPeriod:                 backOffPeriod,
 | 
						|
		podCache:                      podCache,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (p *podWorkers) CouldHaveRunningContainers(uid types.UID) bool {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
	if status, ok := p.podSyncStatuses[uid]; ok {
 | 
						|
		return !status.IsTerminated()
 | 
						|
	}
 | 
						|
	// once all pods are synced, any pod without sync status is known to not be running.
 | 
						|
	return !p.podsSynced
 | 
						|
}
 | 
						|
 | 
						|
func (p *podWorkers) IsPodTerminationRequested(uid types.UID) bool {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
	if status, ok := p.podSyncStatuses[uid]; ok {
 | 
						|
		// the pod may still be setting up at this point.
 | 
						|
		return status.IsTerminationRequested()
 | 
						|
	}
 | 
						|
	// an unknown pod is considered not to be terminating (use ShouldPodContainersBeTerminating in
 | 
						|
	// cleanup loops to avoid failing to cleanup pods that have already been removed from config)
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func (p *podWorkers) ShouldPodContainersBeTerminating(uid types.UID) bool {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
	if status, ok := p.podSyncStatuses[uid]; ok {
 | 
						|
		// we wait until the pod worker goroutine observes the termination, which means syncPod will not
 | 
						|
		// be executed again, which means no new containers can be started
 | 
						|
		return status.IsTerminationStarted()
 | 
						|
	}
 | 
						|
	// once we've synced, if the pod isn't known to the workers we should be tearing them
 | 
						|
	// down
 | 
						|
	return p.podsSynced
 | 
						|
}
 | 
						|
 | 
						|
func (p *podWorkers) ShouldPodRuntimeBeRemoved(uid types.UID) bool {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
	if status, ok := p.podSyncStatuses[uid]; ok {
 | 
						|
		return status.IsTerminated()
 | 
						|
	}
 | 
						|
	// a pod that hasn't been sent to the pod worker yet should have no runtime components once we have
 | 
						|
	// synced all content.
 | 
						|
	return p.podsSynced
 | 
						|
}
 | 
						|
 | 
						|
func (p *podWorkers) ShouldPodContentBeRemoved(uid types.UID) bool {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
	if status, ok := p.podSyncStatuses[uid]; ok {
 | 
						|
		return status.IsEvicted() || (status.IsDeleted() && status.IsTerminated())
 | 
						|
	}
 | 
						|
	// a pod that hasn't been sent to the pod worker yet should have no content on disk once we have
 | 
						|
	// synced all content.
 | 
						|
	return p.podsSynced
 | 
						|
}
 | 
						|
 | 
						|
func (p *podWorkers) IsPodForMirrorPodTerminatingByFullName(podFullName string) bool {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
	_, ok := p.terminatingStaticPodFullnames[podFullName]
 | 
						|
	return ok
 | 
						|
}
 | 
						|
 | 
						|
// UpdatePod carries a configuration change or termination state to a pod. A pod is either runnable,
 | 
						|
// terminating, or terminated, and will transition to terminating if deleted on the apiserver, it is
 | 
						|
// discovered to have a terminal phase (Succeeded or Failed), or if it is evicted by the kubelet.
 | 
						|
func (p *podWorkers) UpdatePod(options UpdatePodOptions) {
 | 
						|
	// handle when the pod is an orphan (no config) and we only have runtime status by running only
 | 
						|
	// the terminating part of the lifecycle
 | 
						|
	pod := options.Pod
 | 
						|
	var isRuntimePod bool
 | 
						|
	if options.RunningPod != nil {
 | 
						|
		if options.Pod == nil {
 | 
						|
			pod = options.RunningPod.ToAPIPod()
 | 
						|
			if options.UpdateType != kubetypes.SyncPodKill {
 | 
						|
				klog.InfoS("Pod update is ignored, runtime pods can only be killed", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
				return
 | 
						|
			}
 | 
						|
			options.Pod = pod
 | 
						|
			isRuntimePod = true
 | 
						|
		} else {
 | 
						|
			options.RunningPod = nil
 | 
						|
			klog.InfoS("Pod update included RunningPod which is only valid when Pod is not specified", "pod", klog.KObj(options.Pod), "podUID", options.Pod.UID)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	uid := pod.UID
 | 
						|
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
 | 
						|
	// decide what to do with this pod - we are either setting it up, tearing it down, or ignoring it
 | 
						|
	now := time.Now()
 | 
						|
	status, ok := p.podSyncStatuses[uid]
 | 
						|
	if !ok {
 | 
						|
		klog.V(4).InfoS("Pod is being synced for the first time", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
		status = &podSyncStatus{
 | 
						|
			syncedAt: now,
 | 
						|
		}
 | 
						|
		p.podSyncStatuses[uid] = status
 | 
						|
	}
 | 
						|
 | 
						|
	// once a pod is terminated by UID, it cannot reenter the pod worker (until the UID is purged by housekeeping)
 | 
						|
	if status.IsFinished() {
 | 
						|
		klog.V(4).InfoS("Pod is finished processing, no further updates", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	// check for a transition to terminating
 | 
						|
	var becameTerminating bool
 | 
						|
	if !status.IsTerminationRequested() {
 | 
						|
		switch {
 | 
						|
		case isRuntimePod:
 | 
						|
			klog.V(4).InfoS("Pod is orphaned and must be torn down", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
			status.deleted = true
 | 
						|
			status.terminatingAt = now
 | 
						|
			becameTerminating = true
 | 
						|
		case pod.DeletionTimestamp != nil:
 | 
						|
			klog.V(4).InfoS("Pod is marked for graceful deletion, begin teardown", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
			status.deleted = true
 | 
						|
			status.terminatingAt = now
 | 
						|
			becameTerminating = true
 | 
						|
		case pod.Status.Phase == v1.PodFailed, pod.Status.Phase == v1.PodSucceeded:
 | 
						|
			klog.V(4).InfoS("Pod is in a terminal phase (success/failed), begin teardown", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
			status.terminatingAt = now
 | 
						|
			becameTerminating = true
 | 
						|
		case options.UpdateType == kubetypes.SyncPodKill:
 | 
						|
			if options.KillPodOptions != nil && options.KillPodOptions.Evict {
 | 
						|
				klog.V(4).InfoS("Pod is being evicted by the kubelet, begin teardown", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
				status.evicted = true
 | 
						|
			} else {
 | 
						|
				klog.V(4).InfoS("Pod is being removed by the kubelet, begin teardown", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
			}
 | 
						|
			status.terminatingAt = now
 | 
						|
			becameTerminating = true
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// once a pod is terminating, all updates are kills and the grace period can only decrease
 | 
						|
	var workType PodWorkType
 | 
						|
	var wasGracePeriodShortened bool
 | 
						|
	switch {
 | 
						|
	case status.IsTerminated():
 | 
						|
		// A terminated pod may still be waiting for cleanup - if we receive a runtime pod kill request
 | 
						|
		// due to housekeeping seeing an older cached version of the runtime pod simply ignore it until
 | 
						|
		// after the pod worker completes.
 | 
						|
		if isRuntimePod {
 | 
						|
			klog.V(3).InfoS("Pod is waiting for termination, ignoring runtime-only kill until after pod worker is fully terminated", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
			return
 | 
						|
		}
 | 
						|
 | 
						|
		workType = TerminatedPodWork
 | 
						|
 | 
						|
		if options.KillPodOptions != nil {
 | 
						|
			if ch := options.KillPodOptions.CompletedCh; ch != nil {
 | 
						|
				close(ch)
 | 
						|
			}
 | 
						|
		}
 | 
						|
		options.KillPodOptions = nil
 | 
						|
 | 
						|
	case status.IsTerminationRequested():
 | 
						|
		workType = TerminatingPodWork
 | 
						|
		if options.KillPodOptions == nil {
 | 
						|
			options.KillPodOptions = &KillPodOptions{}
 | 
						|
		}
 | 
						|
 | 
						|
		if ch := options.KillPodOptions.CompletedCh; ch != nil {
 | 
						|
			status.notifyPostTerminating = append(status.notifyPostTerminating, ch)
 | 
						|
		}
 | 
						|
		if fn := options.KillPodOptions.PodStatusFunc; fn != nil {
 | 
						|
			status.statusPostTerminating = append(status.statusPostTerminating, fn)
 | 
						|
		}
 | 
						|
 | 
						|
		gracePeriod, gracePeriodShortened := calculateEffectiveGracePeriod(status, pod, options.KillPodOptions)
 | 
						|
 | 
						|
		wasGracePeriodShortened = gracePeriodShortened
 | 
						|
		status.gracePeriod = gracePeriod
 | 
						|
		// always set the grace period for syncTerminatingPod so we don't have to recalculate,
 | 
						|
		// will never be zero.
 | 
						|
		options.KillPodOptions.PodTerminationGracePeriodSecondsOverride = &gracePeriod
 | 
						|
 | 
						|
		// if a static pod comes through, start tracking it explicitly (cleared by the pod worker loop)
 | 
						|
		if kubelettypes.IsStaticPod(pod) {
 | 
						|
			p.terminatingStaticPodFullnames[kubecontainer.GetPodFullName(pod)] = struct{}{}
 | 
						|
		}
 | 
						|
 | 
						|
	default:
 | 
						|
		workType = SyncPodWork
 | 
						|
 | 
						|
		// KillPodOptions is not valid for sync actions outside of the terminating phase
 | 
						|
		if options.KillPodOptions != nil {
 | 
						|
			if ch := options.KillPodOptions.CompletedCh; ch != nil {
 | 
						|
				close(ch)
 | 
						|
			}
 | 
						|
			options.KillPodOptions = nil
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// the desired work we want to be performing
 | 
						|
	work := podWork{
 | 
						|
		WorkType: workType,
 | 
						|
		Options:  options,
 | 
						|
	}
 | 
						|
 | 
						|
	// start the pod worker goroutine if it doesn't exist
 | 
						|
	var podUpdates chan podWork
 | 
						|
	var exists bool
 | 
						|
	if podUpdates, exists = p.podUpdates[uid]; !exists {
 | 
						|
		// We need to have a buffer here, because checkForUpdates() method that
 | 
						|
		// puts an update into channel is called from the same goroutine where
 | 
						|
		// the channel is consumed. However, it is guaranteed that in such case
 | 
						|
		// the channel is empty, so buffer of size 1 is enough.
 | 
						|
		podUpdates = make(chan podWork, 1)
 | 
						|
		p.podUpdates[uid] = podUpdates
 | 
						|
 | 
						|
		// Creating a new pod worker either means this is a new pod, or that the
 | 
						|
		// kubelet just restarted. In either case the kubelet is willing to believe
 | 
						|
		// the status of the pod for the first pod worker sync. See corresponding
 | 
						|
		// comment in syncPod.
 | 
						|
		go func() {
 | 
						|
			defer runtime.HandleCrash()
 | 
						|
			p.managePodLoop(podUpdates)
 | 
						|
		}()
 | 
						|
	}
 | 
						|
 | 
						|
	// dispatch a request to the pod worker if none are running
 | 
						|
	if !status.IsWorking() {
 | 
						|
		status.working = true
 | 
						|
		podUpdates <- work
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	// capture the maximum latency between a requested update and when the pod
 | 
						|
	// worker observes it
 | 
						|
	if undelivered, ok := p.lastUndeliveredWorkUpdate[pod.UID]; ok {
 | 
						|
		// track the max latency between when a config change is requested and when it is realized
 | 
						|
		// NOTE: this undercounts the latency when multiple requests are queued, but captures max latency
 | 
						|
		if !undelivered.Options.StartTime.IsZero() && undelivered.Options.StartTime.Before(work.Options.StartTime) {
 | 
						|
			work.Options.StartTime = undelivered.Options.StartTime
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// always sync the most recent data
 | 
						|
	p.lastUndeliveredWorkUpdate[pod.UID] = work
 | 
						|
 | 
						|
	if (becameTerminating || wasGracePeriodShortened) && status.cancelFn != nil {
 | 
						|
		klog.V(3).InfoS("Cancelling current pod sync", "pod", klog.KObj(pod), "podUID", pod.UID, "updateType", work.WorkType)
 | 
						|
		status.cancelFn()
 | 
						|
		return
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// calculateEffectiveGracePeriod sets the initial grace period for a newly terminating pod or allows a
 | 
						|
// shorter grace period to be provided, returning the desired value.
 | 
						|
func calculateEffectiveGracePeriod(status *podSyncStatus, pod *v1.Pod, options *KillPodOptions) (int64, bool) {
 | 
						|
	// enforce the restriction that a grace period can only decrease and track whatever our value is,
 | 
						|
	// then ensure a calculated value is passed down to lower levels
 | 
						|
	gracePeriod := status.gracePeriod
 | 
						|
	// this value is bedrock truth - the apiserver owns telling us this value calculated by apiserver
 | 
						|
	if override := pod.DeletionGracePeriodSeconds; override != nil {
 | 
						|
		if gracePeriod == 0 || *override < gracePeriod {
 | 
						|
			gracePeriod = *override
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// we allow other parts of the kubelet (namely eviction) to request this pod be terminated faster
 | 
						|
	if options != nil {
 | 
						|
		if override := options.PodTerminationGracePeriodSecondsOverride; override != nil {
 | 
						|
			if gracePeriod == 0 || *override < gracePeriod {
 | 
						|
				gracePeriod = *override
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// make a best effort to default this value to the pod's desired intent, in the event
 | 
						|
	// the kubelet provided no requested value (graceful termination?)
 | 
						|
	if gracePeriod == 0 && pod.Spec.TerminationGracePeriodSeconds != nil {
 | 
						|
		gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
 | 
						|
	}
 | 
						|
	// no matter what, we always supply a grace period of 1
 | 
						|
	if gracePeriod < 1 {
 | 
						|
		gracePeriod = 1
 | 
						|
	}
 | 
						|
	return gracePeriod, status.gracePeriod != 0 && status.gracePeriod != gracePeriod
 | 
						|
}
 | 
						|
 | 
						|
func (p *podWorkers) managePodLoop(podUpdates <-chan podWork) {
 | 
						|
	var lastSyncTime time.Time
 | 
						|
	for update := range podUpdates {
 | 
						|
		pod := update.Options.Pod
 | 
						|
 | 
						|
		klog.V(4).InfoS("Processing pod event", "pod", klog.KObj(pod), "podUID", pod.UID, "updateType", update.WorkType)
 | 
						|
		err := func() error {
 | 
						|
			// The worker is responsible for ensuring the sync method sees the appropriate
 | 
						|
			// status updates on resyncs (the result of the last sync), transitions to
 | 
						|
			// terminating (no wait), or on terminated (whatever the most recent state is).
 | 
						|
			// Only syncing and terminating can generate pod status changes, while terminated
 | 
						|
			// pods ensure the most recent status makes it to the api server.
 | 
						|
			var status *kubecontainer.PodStatus
 | 
						|
			var err error
 | 
						|
			switch {
 | 
						|
			case update.Options.RunningPod != nil:
 | 
						|
				// when we receive a running pod, we don't need status at all
 | 
						|
			default:
 | 
						|
				// wait until we see the next refresh from the PLEG via the cache (max 2s)
 | 
						|
				// TODO: this adds ~1s of latency on all transitions from sync to terminating
 | 
						|
				//  to terminated, and on all termination retries (including evictions). We should
 | 
						|
				//  improve latency by making the the pleg continuous and by allowing pod status
 | 
						|
				//  changes to be refreshed when key events happen (killPod, sync->terminating).
 | 
						|
				//  Improving this latency also reduces the possibility that a terminated
 | 
						|
				//  container's status is garbage collected before we have a chance to update the
 | 
						|
				//  API server (thus losing the exit code).
 | 
						|
				status, err = p.podCache.GetNewerThan(pod.UID, lastSyncTime)
 | 
						|
			}
 | 
						|
			if err != nil {
 | 
						|
				// This is the legacy event thrown by manage pod loop all other events are now dispatched
 | 
						|
				// from syncPodFn
 | 
						|
				p.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedSync, "error determining status: %v", err)
 | 
						|
				return err
 | 
						|
			}
 | 
						|
 | 
						|
			ctx := p.contextForWorker(pod.UID)
 | 
						|
 | 
						|
			// Take the appropriate action (illegal phases are prevented by UpdatePod)
 | 
						|
			switch {
 | 
						|
			case update.WorkType == TerminatedPodWork:
 | 
						|
				err = p.syncTerminatedPodFn(ctx, pod, status)
 | 
						|
 | 
						|
			case update.WorkType == TerminatingPodWork:
 | 
						|
				var gracePeriod *int64
 | 
						|
				if opt := update.Options.KillPodOptions; opt != nil {
 | 
						|
					gracePeriod = opt.PodTerminationGracePeriodSecondsOverride
 | 
						|
				}
 | 
						|
				podStatusFn := p.acknowledgeTerminating(pod)
 | 
						|
 | 
						|
				err = p.syncTerminatingPodFn(ctx, pod, status, update.Options.RunningPod, gracePeriod, podStatusFn)
 | 
						|
 | 
						|
			default:
 | 
						|
				err = p.syncPodFn(ctx, update.Options.UpdateType, pod, update.Options.MirrorPod, status)
 | 
						|
			}
 | 
						|
 | 
						|
			lastSyncTime = time.Now()
 | 
						|
			return err
 | 
						|
		}()
 | 
						|
 | 
						|
		switch {
 | 
						|
		case err == context.Canceled:
 | 
						|
			// when the context is cancelled we expect an update to already be queued
 | 
						|
			klog.V(2).InfoS("Sync exited with context cancellation error", "pod", klog.KObj(pod), "podUID", pod.UID, "updateType", update.WorkType)
 | 
						|
 | 
						|
		case err != nil:
 | 
						|
			// we will queue a retry
 | 
						|
			klog.ErrorS(err, "Error syncing pod, skipping", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
 | 
						|
		case update.WorkType == TerminatedPodWork:
 | 
						|
			// we can shut down the worker
 | 
						|
			p.completeTerminated(pod)
 | 
						|
			if start := update.Options.StartTime; !start.IsZero() {
 | 
						|
				metrics.PodWorkerDuration.WithLabelValues("terminated").Observe(metrics.SinceInSeconds(start))
 | 
						|
			}
 | 
						|
			klog.V(4).InfoS("Processing pod event done", "pod", klog.KObj(pod), "podUID", pod.UID, "updateType", update.WorkType)
 | 
						|
			return
 | 
						|
 | 
						|
		case update.WorkType == TerminatingPodWork:
 | 
						|
			// pods that don't exist in config don't need to be terminated, garbage collection will cover them
 | 
						|
			if update.Options.RunningPod != nil {
 | 
						|
				p.completeTerminatingRuntimePod(pod)
 | 
						|
				if start := update.Options.StartTime; !start.IsZero() {
 | 
						|
					metrics.PodWorkerDuration.WithLabelValues(update.Options.UpdateType.String()).Observe(metrics.SinceInSeconds(start))
 | 
						|
				}
 | 
						|
				klog.V(4).InfoS("Processing pod event done", "pod", klog.KObj(pod), "podUID", pod.UID, "updateType", update.WorkType)
 | 
						|
				return
 | 
						|
			}
 | 
						|
			// otherwise we move to the terminating phase
 | 
						|
			p.completeTerminating(pod)
 | 
						|
		}
 | 
						|
 | 
						|
		// queue a retry for errors if necessary, then put the next event in the channel if any
 | 
						|
		p.completeWork(pod, err)
 | 
						|
		if start := update.Options.StartTime; !start.IsZero() {
 | 
						|
			metrics.PodWorkerDuration.WithLabelValues(update.Options.UpdateType.String()).Observe(metrics.SinceInSeconds(start))
 | 
						|
		}
 | 
						|
		klog.V(4).InfoS("Processing pod event done", "pod", klog.KObj(pod), "podUID", pod.UID, "updateType", update.WorkType)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// acknowledgeTerminating sets the terminating flag on the pod status once the pod worker sees
 | 
						|
// the termination state so that other components know no new containers will be started in this
 | 
						|
// pod. It then returns the status function, if any, that applies to this pod.
 | 
						|
func (p *podWorkers) acknowledgeTerminating(pod *v1.Pod) PodStatusFunc {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
 | 
						|
	status, ok := p.podSyncStatuses[pod.UID]
 | 
						|
	if !ok {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	if !status.terminatingAt.IsZero() && !status.startedTerminating {
 | 
						|
		klog.V(4).InfoS("Pod worker has observed request to terminate", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
		status.startedTerminating = true
 | 
						|
	}
 | 
						|
 | 
						|
	if l := len(status.statusPostTerminating); l > 0 {
 | 
						|
		return status.statusPostTerminating[l-1]
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// completeTerminating is invoked when syncTerminatingPod completes successfully, which means
 | 
						|
// no container is running, no container will be started in the future, and we are ready for
 | 
						|
// cleanup.  This updates the termination state which prevents future syncs and will ensure
 | 
						|
// other kubelet loops know this pod is not running any containers.
 | 
						|
func (p *podWorkers) completeTerminating(pod *v1.Pod) {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
 | 
						|
	klog.V(4).InfoS("Pod terminated all containers successfully", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
 | 
						|
	// if a static pod is being tracked, forget it
 | 
						|
	delete(p.terminatingStaticPodFullnames, kubecontainer.GetPodFullName(pod))
 | 
						|
 | 
						|
	if status, ok := p.podSyncStatuses[pod.UID]; ok {
 | 
						|
		if status.terminatingAt.IsZero() {
 | 
						|
			klog.V(4).InfoS("Pod worker was terminated but did not have terminatingAt set, likely programmer error", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
		}
 | 
						|
		status.terminatedAt = time.Now()
 | 
						|
		for _, ch := range status.notifyPostTerminating {
 | 
						|
			close(ch)
 | 
						|
		}
 | 
						|
		status.notifyPostTerminating = nil
 | 
						|
		status.statusPostTerminating = nil
 | 
						|
	}
 | 
						|
 | 
						|
	p.lastUndeliveredWorkUpdate[pod.UID] = podWork{
 | 
						|
		WorkType: TerminatedPodWork,
 | 
						|
		Options: UpdatePodOptions{
 | 
						|
			Pod: pod,
 | 
						|
		},
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// completeTerminatingRuntimePod is invoked when syncTerminatingPod completes successfully,
 | 
						|
// which means an orphaned pod (no config) is terminated and we can exit. Since orphaned
 | 
						|
// pods have no API representation, we want to exit the loop at this point
 | 
						|
// cleanup.  This updates the termination state which prevents future syncs and will ensure
 | 
						|
// other kubelet loops know this pod is not running any containers.
 | 
						|
func (p *podWorkers) completeTerminatingRuntimePod(pod *v1.Pod) {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
 | 
						|
	klog.V(4).InfoS("Pod terminated all orphaned containers successfully and worker can now stop", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
 | 
						|
	// if a static pod is being tracked, forget it
 | 
						|
	delete(p.terminatingStaticPodFullnames, kubecontainer.GetPodFullName(pod))
 | 
						|
 | 
						|
	if status, ok := p.podSyncStatuses[pod.UID]; ok {
 | 
						|
		if status.terminatingAt.IsZero() {
 | 
						|
			klog.V(4).InfoS("Pod worker was terminated but did not have terminatingAt set, likely programmer error", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
		}
 | 
						|
		status.terminatedAt = time.Now()
 | 
						|
		status.finished = true
 | 
						|
		status.working = false
 | 
						|
	}
 | 
						|
 | 
						|
	ch, ok := p.podUpdates[pod.UID]
 | 
						|
	if ok {
 | 
						|
		close(ch)
 | 
						|
	}
 | 
						|
	delete(p.podUpdates, pod.UID)
 | 
						|
	delete(p.lastUndeliveredWorkUpdate, pod.UID)
 | 
						|
	delete(p.terminatingStaticPodFullnames, kubecontainer.GetPodFullName(pod))
 | 
						|
}
 | 
						|
 | 
						|
// completeTerminated is invoked after syncTerminatedPod completes successfully and means we
 | 
						|
// can stop the pod worker. The pod is finalized at this point.
 | 
						|
func (p *podWorkers) completeTerminated(pod *v1.Pod) {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
 | 
						|
	klog.V(4).InfoS("Pod is complete and the worker can now stop", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
 | 
						|
	ch, ok := p.podUpdates[pod.UID]
 | 
						|
	if ok {
 | 
						|
		close(ch)
 | 
						|
	}
 | 
						|
	delete(p.podUpdates, pod.UID)
 | 
						|
	delete(p.lastUndeliveredWorkUpdate, pod.UID)
 | 
						|
	delete(p.terminatingStaticPodFullnames, kubecontainer.GetPodFullName(pod))
 | 
						|
 | 
						|
	if status, ok := p.podSyncStatuses[pod.UID]; ok {
 | 
						|
		if status.terminatingAt.IsZero() {
 | 
						|
			klog.V(4).InfoS("Pod worker is complete but did not have terminatingAt set, likely programmer error", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
		}
 | 
						|
		if status.terminatedAt.IsZero() {
 | 
						|
			klog.V(4).InfoS("Pod worker is complete but did not have terminatedAt set, likely programmer error", "pod", klog.KObj(pod), "podUID", pod.UID)
 | 
						|
		}
 | 
						|
		status.finished = true
 | 
						|
		status.working = false
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// completeWork requeues on error or the next sync interval and then immediately executes any pending
 | 
						|
// work.
 | 
						|
func (p *podWorkers) completeWork(pod *v1.Pod, syncErr error) {
 | 
						|
	// Requeue the last update if the last sync returned error.
 | 
						|
	switch {
 | 
						|
	case syncErr == nil:
 | 
						|
		// No error; requeue at the regular resync interval.
 | 
						|
		p.workQueue.Enqueue(pod.UID, wait.Jitter(p.resyncInterval, workerResyncIntervalJitterFactor))
 | 
						|
	case strings.Contains(syncErr.Error(), NetworkNotReadyErrorMsg):
 | 
						|
		// Network is not ready; back off for short period of time and retry as network might be ready soon.
 | 
						|
		p.workQueue.Enqueue(pod.UID, wait.Jitter(backOffOnTransientErrorPeriod, workerBackOffPeriodJitterFactor))
 | 
						|
	default:
 | 
						|
		// Error occurred during the sync; back off and then retry.
 | 
						|
		p.workQueue.Enqueue(pod.UID, wait.Jitter(p.backOffPeriod, workerBackOffPeriodJitterFactor))
 | 
						|
	}
 | 
						|
	p.completeWorkQueueNext(pod.UID)
 | 
						|
}
 | 
						|
 | 
						|
// completeWorkQueueNext holds the lock and either queues the next work item for the worker or
 | 
						|
// clears the working status.
 | 
						|
func (p *podWorkers) completeWorkQueueNext(uid types.UID) {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
	if workUpdate, exists := p.lastUndeliveredWorkUpdate[uid]; exists {
 | 
						|
		p.podUpdates[uid] <- workUpdate
 | 
						|
		delete(p.lastUndeliveredWorkUpdate, uid)
 | 
						|
	} else {
 | 
						|
		p.podSyncStatuses[uid].working = false
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// contextForWorker returns or initializes the appropriate context for a known
 | 
						|
// worker. If the current context is expired, it is reset. If no worker is
 | 
						|
// present, no context is returned.
 | 
						|
func (p *podWorkers) contextForWorker(uid types.UID) context.Context {
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
 | 
						|
	status, ok := p.podSyncStatuses[uid]
 | 
						|
	if !ok {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
	if status.ctx == nil || status.ctx.Err() == context.Canceled {
 | 
						|
		status.ctx, status.cancelFn = context.WithCancel(context.Background())
 | 
						|
	}
 | 
						|
	return status.ctx
 | 
						|
}
 | 
						|
 | 
						|
// SyncKnownPods will purge any fully terminated pods that are not in the desiredPods
 | 
						|
// list, which means SyncKnownPods must be called in a threadsafe manner from calls
 | 
						|
// to UpdatePods for new pods. It returns a map of known workers that are not finished
 | 
						|
// with a value of SyncPodTerminated, SyncPodKill, or SyncPodSync depending on whether
 | 
						|
// the pod is terminated, terminating, or syncing.
 | 
						|
func (p *podWorkers) SyncKnownPods(desiredPods []*v1.Pod) map[types.UID]PodWorkType {
 | 
						|
	workers := make(map[types.UID]PodWorkType)
 | 
						|
	known := make(map[types.UID]struct{})
 | 
						|
	for _, pod := range desiredPods {
 | 
						|
		known[pod.UID] = struct{}{}
 | 
						|
	}
 | 
						|
 | 
						|
	p.podLock.Lock()
 | 
						|
	defer p.podLock.Unlock()
 | 
						|
 | 
						|
	p.podsSynced = true
 | 
						|
	for uid, status := range p.podSyncStatuses {
 | 
						|
		if _, exists := known[uid]; !exists {
 | 
						|
			p.removeTerminatedWorker(uid)
 | 
						|
		}
 | 
						|
		switch {
 | 
						|
		case !status.terminatedAt.IsZero():
 | 
						|
			workers[uid] = TerminatedPodWork
 | 
						|
		case !status.terminatingAt.IsZero():
 | 
						|
			workers[uid] = TerminatingPodWork
 | 
						|
		default:
 | 
						|
			workers[uid] = SyncPodWork
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return workers
 | 
						|
}
 | 
						|
 | 
						|
// removeTerminatedWorker cleans up and removes the worker status for a worker that
 | 
						|
// has reached a terminal state of "finished" - has successfully exited
 | 
						|
// syncTerminatedPod. This "forgets" a pod by UID and allows another pod to be recreated
 | 
						|
// with the same UID.
 | 
						|
func (p *podWorkers) removeTerminatedWorker(uid types.UID) {
 | 
						|
	status, ok := p.podSyncStatuses[uid]
 | 
						|
	if !ok {
 | 
						|
		// already forgotten, or forgotten too early
 | 
						|
		klog.V(4).InfoS("Pod worker has been requested for removal but is not a known pod", "podUID", uid)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	if !status.finished {
 | 
						|
		klog.V(4).InfoS("Pod worker has been requested for removal but is still not fully terminated", "podUID", uid)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	klog.V(4).InfoS("Pod has been terminated and is no longer known to the kubelet, remove all history", "podUID", uid)
 | 
						|
	delete(p.podSyncStatuses, uid)
 | 
						|
	delete(p.podUpdates, uid)
 | 
						|
	delete(p.lastUndeliveredWorkUpdate, uid)
 | 
						|
}
 | 
						|
 | 
						|
// killPodNow returns a KillPodFunc that can be used to kill a pod.
 | 
						|
// It is intended to be injected into other modules that need to kill a pod.
 | 
						|
func killPodNow(podWorkers PodWorkers, recorder record.EventRecorder) eviction.KillPodFunc {
 | 
						|
	return func(pod *v1.Pod, isEvicted bool, gracePeriodOverride *int64, statusFn func(*v1.PodStatus)) error {
 | 
						|
		// determine the grace period to use when killing the pod
 | 
						|
		gracePeriod := int64(0)
 | 
						|
		if gracePeriodOverride != nil {
 | 
						|
			gracePeriod = *gracePeriodOverride
 | 
						|
		} else if pod.Spec.TerminationGracePeriodSeconds != nil {
 | 
						|
			gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
 | 
						|
		}
 | 
						|
 | 
						|
		// we timeout and return an error if we don't get a callback within a reasonable time.
 | 
						|
		// the default timeout is relative to the grace period (we settle on 10s to wait for kubelet->runtime traffic to complete in sigkill)
 | 
						|
		timeout := int64(gracePeriod + (gracePeriod / 2))
 | 
						|
		minTimeout := int64(10)
 | 
						|
		if timeout < minTimeout {
 | 
						|
			timeout = minTimeout
 | 
						|
		}
 | 
						|
		timeoutDuration := time.Duration(timeout) * time.Second
 | 
						|
 | 
						|
		// open a channel we block against until we get a result
 | 
						|
		ch := make(chan struct{}, 1)
 | 
						|
		podWorkers.UpdatePod(UpdatePodOptions{
 | 
						|
			Pod:        pod,
 | 
						|
			UpdateType: kubetypes.SyncPodKill,
 | 
						|
			KillPodOptions: &KillPodOptions{
 | 
						|
				CompletedCh:                              ch,
 | 
						|
				Evict:                                    isEvicted,
 | 
						|
				PodStatusFunc:                            statusFn,
 | 
						|
				PodTerminationGracePeriodSecondsOverride: gracePeriodOverride,
 | 
						|
			},
 | 
						|
		})
 | 
						|
 | 
						|
		// wait for either a response, or a timeout
 | 
						|
		select {
 | 
						|
		case <-ch:
 | 
						|
			return nil
 | 
						|
		case <-time.After(timeoutDuration):
 | 
						|
			recorder.Eventf(pod, v1.EventTypeWarning, events.ExceededGracePeriod, "Container runtime did not kill the pod within specified grace period.")
 | 
						|
			return fmt.Errorf("timeout waiting to kill pod")
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 |