mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-04 12:18:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1247 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			1247 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2014 The Kubernetes Authors.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package controller
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"encoding/binary"
 | 
						|
	"encoding/json"
 | 
						|
	"fmt"
 | 
						|
	"hash/fnv"
 | 
						|
	"math"
 | 
						|
	"sync"
 | 
						|
	"sync/atomic"
 | 
						|
	"time"
 | 
						|
 | 
						|
	apps "k8s.io/api/apps/v1"
 | 
						|
	v1 "k8s.io/api/core/v1"
 | 
						|
	apierrors "k8s.io/apimachinery/pkg/api/errors"
 | 
						|
	"k8s.io/apimachinery/pkg/api/meta"
 | 
						|
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
						|
	"k8s.io/apimachinery/pkg/labels"
 | 
						|
	"k8s.io/apimachinery/pkg/runtime"
 | 
						|
	"k8s.io/apimachinery/pkg/types"
 | 
						|
	"k8s.io/apimachinery/pkg/util/clock"
 | 
						|
	"k8s.io/apimachinery/pkg/util/rand"
 | 
						|
	"k8s.io/apimachinery/pkg/util/sets"
 | 
						|
	"k8s.io/apimachinery/pkg/util/strategicpatch"
 | 
						|
	"k8s.io/apimachinery/pkg/util/wait"
 | 
						|
	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | 
						|
	clientset "k8s.io/client-go/kubernetes"
 | 
						|
	"k8s.io/client-go/tools/cache"
 | 
						|
	"k8s.io/client-go/tools/record"
 | 
						|
	clientretry "k8s.io/client-go/util/retry"
 | 
						|
	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
 | 
						|
	"k8s.io/kubernetes/pkg/apis/core/helper"
 | 
						|
	_ "k8s.io/kubernetes/pkg/apis/core/install"
 | 
						|
	"k8s.io/kubernetes/pkg/apis/core/validation"
 | 
						|
	"k8s.io/kubernetes/pkg/features"
 | 
						|
	hashutil "k8s.io/kubernetes/pkg/util/hash"
 | 
						|
	taintutils "k8s.io/kubernetes/pkg/util/taints"
 | 
						|
	"k8s.io/utils/integer"
 | 
						|
 | 
						|
	"k8s.io/klog/v2"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	// If a watch drops a delete event for a pod, it'll take this long
 | 
						|
	// before a dormant controller waiting for those packets is woken up anyway. It is
 | 
						|
	// specifically targeted at the case where some problem prevents an update
 | 
						|
	// of expectations, without it the controller could stay asleep forever. This should
 | 
						|
	// be set based on the expected latency of watch events.
 | 
						|
	//
 | 
						|
	// Currently a controller can service (create *and* observe the watch events for said
 | 
						|
	// creation) about 10 pods a second, so it takes about 1 min to service
 | 
						|
	// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
 | 
						|
	// latency/pod at the scale of 3000 pods over 100 nodes.
 | 
						|
	ExpectationsTimeout = 5 * time.Minute
 | 
						|
	// When batching pod creates, SlowStartInitialBatchSize is the size of the
 | 
						|
	// initial batch.  The size of each successive batch is twice the size of
 | 
						|
	// the previous batch.  For example, for a value of 1, batch sizes would be
 | 
						|
	// 1, 2, 4, 8, ...  and for a value of 10, batch sizes would be
 | 
						|
	// 10, 20, 40, 80, ...  Setting the value higher means that quota denials
 | 
						|
	// will result in more doomed API calls and associated event spam.  Setting
 | 
						|
	// the value lower will result in more API call round trip periods for
 | 
						|
	// large batches.
 | 
						|
	//
 | 
						|
	// Given a number of pods to start "N":
 | 
						|
	// The number of doomed calls per sync once quota is exceeded is given by:
 | 
						|
	//      min(N,SlowStartInitialBatchSize)
 | 
						|
	// The number of batches is given by:
 | 
						|
	//      1+floor(log_2(ceil(N/SlowStartInitialBatchSize)))
 | 
						|
	SlowStartInitialBatchSize = 1
 | 
						|
)
 | 
						|
 | 
						|
var UpdateTaintBackoff = wait.Backoff{
 | 
						|
	Steps:    5,
 | 
						|
	Duration: 100 * time.Millisecond,
 | 
						|
	Jitter:   1.0,
 | 
						|
}
 | 
						|
 | 
						|
var UpdateLabelBackoff = wait.Backoff{
 | 
						|
	Steps:    5,
 | 
						|
	Duration: 100 * time.Millisecond,
 | 
						|
	Jitter:   1.0,
 | 
						|
}
 | 
						|
 | 
						|
var (
 | 
						|
	KeyFunc           = cache.DeletionHandlingMetaNamespaceKeyFunc
 | 
						|
	podPhaseToOrdinal = map[v1.PodPhase]int{v1.PodPending: 0, v1.PodUnknown: 1, v1.PodRunning: 2}
 | 
						|
)
 | 
						|
 | 
						|
type ResyncPeriodFunc func() time.Duration
 | 
						|
 | 
						|
// Returns 0 for resyncPeriod in case resyncing is not needed.
 | 
						|
func NoResyncPeriodFunc() time.Duration {
 | 
						|
	return 0
 | 
						|
}
 | 
						|
 | 
						|
// StaticResyncPeriodFunc returns the resync period specified
 | 
						|
func StaticResyncPeriodFunc(resyncPeriod time.Duration) ResyncPeriodFunc {
 | 
						|
	return func() time.Duration {
 | 
						|
		return resyncPeriod
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Expectations are a way for controllers to tell the controller manager what they expect. eg:
 | 
						|
//	ControllerExpectations: {
 | 
						|
//		controller1: expects  2 adds in 2 minutes
 | 
						|
//		controller2: expects  2 dels in 2 minutes
 | 
						|
//		controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met
 | 
						|
//	}
 | 
						|
//
 | 
						|
// Implementation:
 | 
						|
//	ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion
 | 
						|
//	ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller
 | 
						|
//
 | 
						|
// * Once set expectations can only be lowered
 | 
						|
// * A controller isn't synced till its expectations are either fulfilled, or expire
 | 
						|
// * Controllers that don't set expectations will get woken up for every matching controllee
 | 
						|
 | 
						|
// ExpKeyFunc to parse out the key from a ControlleeExpectation
 | 
						|
var ExpKeyFunc = func(obj interface{}) (string, error) {
 | 
						|
	if e, ok := obj.(*ControlleeExpectations); ok {
 | 
						|
		return e.key, nil
 | 
						|
	}
 | 
						|
	return "", fmt.Errorf("could not find key for obj %#v", obj)
 | 
						|
}
 | 
						|
 | 
						|
// ControllerExpectationsInterface is an interface that allows users to set and wait on expectations.
 | 
						|
// Only abstracted out for testing.
 | 
						|
// Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different
 | 
						|
// types of controllers, because the keys might conflict across types.
 | 
						|
type ControllerExpectationsInterface interface {
 | 
						|
	GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error)
 | 
						|
	SatisfiedExpectations(controllerKey string) bool
 | 
						|
	DeleteExpectations(controllerKey string)
 | 
						|
	SetExpectations(controllerKey string, add, del int) error
 | 
						|
	ExpectCreations(controllerKey string, adds int) error
 | 
						|
	ExpectDeletions(controllerKey string, dels int) error
 | 
						|
	CreationObserved(controllerKey string)
 | 
						|
	DeletionObserved(controllerKey string)
 | 
						|
	RaiseExpectations(controllerKey string, add, del int)
 | 
						|
	LowerExpectations(controllerKey string, add, del int)
 | 
						|
}
 | 
						|
 | 
						|
// ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync.
 | 
						|
type ControllerExpectations struct {
 | 
						|
	cache.Store
 | 
						|
}
 | 
						|
 | 
						|
// GetExpectations returns the ControlleeExpectations of the given controller.
 | 
						|
func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) {
 | 
						|
	exp, exists, err := r.GetByKey(controllerKey)
 | 
						|
	if err == nil && exists {
 | 
						|
		return exp.(*ControlleeExpectations), true, nil
 | 
						|
	}
 | 
						|
	return nil, false, err
 | 
						|
}
 | 
						|
 | 
						|
// DeleteExpectations deletes the expectations of the given controller from the TTLStore.
 | 
						|
func (r *ControllerExpectations) DeleteExpectations(controllerKey string) {
 | 
						|
	if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
 | 
						|
		if err := r.Delete(exp); err != nil {
 | 
						|
			klog.V(2).Infof("Error deleting expectations for controller %v: %v", controllerKey, err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed.
 | 
						|
// Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller
 | 
						|
// manager.
 | 
						|
func (r *ControllerExpectations) SatisfiedExpectations(controllerKey string) bool {
 | 
						|
	if exp, exists, err := r.GetExpectations(controllerKey); exists {
 | 
						|
		if exp.Fulfilled() {
 | 
						|
			klog.V(4).Infof("Controller expectations fulfilled %#v", exp)
 | 
						|
			return true
 | 
						|
		} else if exp.isExpired() {
 | 
						|
			klog.V(4).Infof("Controller expectations expired %#v", exp)
 | 
						|
			return true
 | 
						|
		} else {
 | 
						|
			klog.V(4).Infof("Controller still waiting on expectations %#v", exp)
 | 
						|
			return false
 | 
						|
		}
 | 
						|
	} else if err != nil {
 | 
						|
		klog.V(2).Infof("Error encountered while checking expectations %#v, forcing sync", err)
 | 
						|
	} else {
 | 
						|
		// When a new controller is created, it doesn't have expectations.
 | 
						|
		// When it doesn't see expected watch events for > TTL, the expectations expire.
 | 
						|
		//	- In this case it wakes up, creates/deletes controllees, and sets expectations again.
 | 
						|
		// When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire.
 | 
						|
		//	- In this case it continues without setting expectations till it needs to create/delete controllees.
 | 
						|
		klog.V(4).Infof("Controller %v either never recorded expectations, or the ttl expired.", controllerKey)
 | 
						|
	}
 | 
						|
	// Trigger a sync if we either encountered and error (which shouldn't happen since we're
 | 
						|
	// getting from local store) or this controller hasn't established expectations.
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
// TODO: Extend ExpirationCache to support explicit expiration.
 | 
						|
// TODO: Make this possible to disable in tests.
 | 
						|
// TODO: Support injection of clock.
 | 
						|
func (exp *ControlleeExpectations) isExpired() bool {
 | 
						|
	return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout
 | 
						|
}
 | 
						|
 | 
						|
// SetExpectations registers new expectations for the given controller. Forgets existing expectations.
 | 
						|
func (r *ControllerExpectations) SetExpectations(controllerKey string, add, del int) error {
 | 
						|
	exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()}
 | 
						|
	klog.V(4).Infof("Setting expectations %#v", exp)
 | 
						|
	return r.Add(exp)
 | 
						|
}
 | 
						|
 | 
						|
func (r *ControllerExpectations) ExpectCreations(controllerKey string, adds int) error {
 | 
						|
	return r.SetExpectations(controllerKey, adds, 0)
 | 
						|
}
 | 
						|
 | 
						|
func (r *ControllerExpectations) ExpectDeletions(controllerKey string, dels int) error {
 | 
						|
	return r.SetExpectations(controllerKey, 0, dels)
 | 
						|
}
 | 
						|
 | 
						|
// Decrements the expectation counts of the given controller.
 | 
						|
func (r *ControllerExpectations) LowerExpectations(controllerKey string, add, del int) {
 | 
						|
	if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
 | 
						|
		exp.Add(int64(-add), int64(-del))
 | 
						|
		// The expectations might've been modified since the update on the previous line.
 | 
						|
		klog.V(4).Infof("Lowered expectations %#v", exp)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Increments the expectation counts of the given controller.
 | 
						|
func (r *ControllerExpectations) RaiseExpectations(controllerKey string, add, del int) {
 | 
						|
	if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
 | 
						|
		exp.Add(int64(add), int64(del))
 | 
						|
		// The expectations might've been modified since the update on the previous line.
 | 
						|
		klog.V(4).Infof("Raised expectations %#v", exp)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// CreationObserved atomically decrements the `add` expectation count of the given controller.
 | 
						|
func (r *ControllerExpectations) CreationObserved(controllerKey string) {
 | 
						|
	r.LowerExpectations(controllerKey, 1, 0)
 | 
						|
}
 | 
						|
 | 
						|
// DeletionObserved atomically decrements the `del` expectation count of the given controller.
 | 
						|
func (r *ControllerExpectations) DeletionObserved(controllerKey string) {
 | 
						|
	r.LowerExpectations(controllerKey, 0, 1)
 | 
						|
}
 | 
						|
 | 
						|
// ControlleeExpectations track controllee creates/deletes.
 | 
						|
type ControlleeExpectations struct {
 | 
						|
	// Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms
 | 
						|
	// See: https://golang.org/pkg/sync/atomic/ for more information
 | 
						|
	add       int64
 | 
						|
	del       int64
 | 
						|
	key       string
 | 
						|
	timestamp time.Time
 | 
						|
}
 | 
						|
 | 
						|
// Add increments the add and del counters.
 | 
						|
func (e *ControlleeExpectations) Add(add, del int64) {
 | 
						|
	atomic.AddInt64(&e.add, add)
 | 
						|
	atomic.AddInt64(&e.del, del)
 | 
						|
}
 | 
						|
 | 
						|
// Fulfilled returns true if this expectation has been fulfilled.
 | 
						|
func (e *ControlleeExpectations) Fulfilled() bool {
 | 
						|
	// TODO: think about why this line being atomic doesn't matter
 | 
						|
	return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0
 | 
						|
}
 | 
						|
 | 
						|
// GetExpectations returns the add and del expectations of the controllee.
 | 
						|
func (e *ControlleeExpectations) GetExpectations() (int64, int64) {
 | 
						|
	return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del)
 | 
						|
}
 | 
						|
 | 
						|
// NewControllerExpectations returns a store for ControllerExpectations.
 | 
						|
func NewControllerExpectations() *ControllerExpectations {
 | 
						|
	return &ControllerExpectations{cache.NewStore(ExpKeyFunc)}
 | 
						|
}
 | 
						|
 | 
						|
// UIDSetKeyFunc to parse out the key from a UIDSet.
 | 
						|
var UIDSetKeyFunc = func(obj interface{}) (string, error) {
 | 
						|
	if u, ok := obj.(*UIDSet); ok {
 | 
						|
		return u.key, nil
 | 
						|
	}
 | 
						|
	return "", fmt.Errorf("could not find key for obj %#v", obj)
 | 
						|
}
 | 
						|
 | 
						|
// UIDSet holds a key and a set of UIDs. Used by the
 | 
						|
// UIDTrackingControllerExpectations to remember which UID it has seen/still
 | 
						|
// waiting for.
 | 
						|
type UIDSet struct {
 | 
						|
	sets.String
 | 
						|
	key string
 | 
						|
}
 | 
						|
 | 
						|
// UIDTrackingControllerExpectations tracks the UID of the pods it deletes.
 | 
						|
// This cache is needed over plain old expectations to safely handle graceful
 | 
						|
// deletion. The desired behavior is to treat an update that sets the
 | 
						|
// DeletionTimestamp on an object as a delete. To do so consistently, one needs
 | 
						|
// to remember the expected deletes so they aren't double counted.
 | 
						|
// TODO: Track creates as well (#22599)
 | 
						|
type UIDTrackingControllerExpectations struct {
 | 
						|
	ControllerExpectationsInterface
 | 
						|
	// TODO: There is a much nicer way to do this that involves a single store,
 | 
						|
	// a lock per entry, and a ControlleeExpectationsInterface type.
 | 
						|
	uidStoreLock sync.Mutex
 | 
						|
	// Store used for the UIDs associated with any expectation tracked via the
 | 
						|
	// ControllerExpectationsInterface.
 | 
						|
	uidStore cache.Store
 | 
						|
}
 | 
						|
 | 
						|
// GetUIDs is a convenience method to avoid exposing the set of expected uids.
 | 
						|
// The returned set is not thread safe, all modifications must be made holding
 | 
						|
// the uidStoreLock.
 | 
						|
func (u *UIDTrackingControllerExpectations) GetUIDs(controllerKey string) sets.String {
 | 
						|
	if uid, exists, err := u.uidStore.GetByKey(controllerKey); err == nil && exists {
 | 
						|
		return uid.(*UIDSet).String
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// ExpectDeletions records expectations for the given deleteKeys, against the given controller.
 | 
						|
func (u *UIDTrackingControllerExpectations) ExpectDeletions(rcKey string, deletedKeys []string) error {
 | 
						|
	expectedUIDs := sets.NewString()
 | 
						|
	for _, k := range deletedKeys {
 | 
						|
		expectedUIDs.Insert(k)
 | 
						|
	}
 | 
						|
	klog.V(4).Infof("Controller %v waiting on deletions for: %+v", rcKey, deletedKeys)
 | 
						|
	u.uidStoreLock.Lock()
 | 
						|
	defer u.uidStoreLock.Unlock()
 | 
						|
 | 
						|
	if existing := u.GetUIDs(rcKey); existing != nil && existing.Len() != 0 {
 | 
						|
		klog.Errorf("Clobbering existing delete keys: %+v", existing)
 | 
						|
	}
 | 
						|
	if err := u.uidStore.Add(&UIDSet{expectedUIDs, rcKey}); err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	return u.ControllerExpectationsInterface.ExpectDeletions(rcKey, expectedUIDs.Len())
 | 
						|
}
 | 
						|
 | 
						|
// DeletionObserved records the given deleteKey as a deletion, for the given rc.
 | 
						|
func (u *UIDTrackingControllerExpectations) DeletionObserved(rcKey, deleteKey string) {
 | 
						|
	u.uidStoreLock.Lock()
 | 
						|
	defer u.uidStoreLock.Unlock()
 | 
						|
 | 
						|
	uids := u.GetUIDs(rcKey)
 | 
						|
	if uids != nil && uids.Has(deleteKey) {
 | 
						|
		klog.V(4).Infof("Controller %v received delete for pod %v", rcKey, deleteKey)
 | 
						|
		u.ControllerExpectationsInterface.DeletionObserved(rcKey)
 | 
						|
		uids.Delete(deleteKey)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// DeleteExpectations deletes the UID set and invokes DeleteExpectations on the
 | 
						|
// underlying ControllerExpectationsInterface.
 | 
						|
func (u *UIDTrackingControllerExpectations) DeleteExpectations(rcKey string) {
 | 
						|
	u.uidStoreLock.Lock()
 | 
						|
	defer u.uidStoreLock.Unlock()
 | 
						|
 | 
						|
	u.ControllerExpectationsInterface.DeleteExpectations(rcKey)
 | 
						|
	if uidExp, exists, err := u.uidStore.GetByKey(rcKey); err == nil && exists {
 | 
						|
		if err := u.uidStore.Delete(uidExp); err != nil {
 | 
						|
			klog.V(2).Infof("Error deleting uid expectations for controller %v: %v", rcKey, err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// NewUIDTrackingControllerExpectations returns a wrapper around
 | 
						|
// ControllerExpectations that is aware of deleteKeys.
 | 
						|
func NewUIDTrackingControllerExpectations(ce ControllerExpectationsInterface) *UIDTrackingControllerExpectations {
 | 
						|
	return &UIDTrackingControllerExpectations{ControllerExpectationsInterface: ce, uidStore: cache.NewStore(UIDSetKeyFunc)}
 | 
						|
}
 | 
						|
 | 
						|
// Reasons for pod events
 | 
						|
const (
 | 
						|
	// FailedCreatePodReason is added in an event and in a replica set condition
 | 
						|
	// when a pod for a replica set is failed to be created.
 | 
						|
	FailedCreatePodReason = "FailedCreate"
 | 
						|
	// SuccessfulCreatePodReason is added in an event when a pod for a replica set
 | 
						|
	// is successfully created.
 | 
						|
	SuccessfulCreatePodReason = "SuccessfulCreate"
 | 
						|
	// FailedDeletePodReason is added in an event and in a replica set condition
 | 
						|
	// when a pod for a replica set is failed to be deleted.
 | 
						|
	FailedDeletePodReason = "FailedDelete"
 | 
						|
	// SuccessfulDeletePodReason is added in an event when a pod for a replica set
 | 
						|
	// is successfully deleted.
 | 
						|
	SuccessfulDeletePodReason = "SuccessfulDelete"
 | 
						|
)
 | 
						|
 | 
						|
// RSControlInterface is an interface that knows how to add or delete
 | 
						|
// ReplicaSets, as well as increment or decrement them. It is used
 | 
						|
// by the deployment controller to ease testing of actions that it takes.
 | 
						|
type RSControlInterface interface {
 | 
						|
	PatchReplicaSet(namespace, name string, data []byte) error
 | 
						|
}
 | 
						|
 | 
						|
// RealRSControl is the default implementation of RSControllerInterface.
 | 
						|
type RealRSControl struct {
 | 
						|
	KubeClient clientset.Interface
 | 
						|
	Recorder   record.EventRecorder
 | 
						|
}
 | 
						|
 | 
						|
var _ RSControlInterface = &RealRSControl{}
 | 
						|
 | 
						|
func (r RealRSControl) PatchReplicaSet(namespace, name string, data []byte) error {
 | 
						|
	_, err := r.KubeClient.AppsV1().ReplicaSets(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
 | 
						|
	return err
 | 
						|
}
 | 
						|
 | 
						|
// TODO: merge the controller revision interface in controller_history.go with this one
 | 
						|
// ControllerRevisionControlInterface is an interface that knows how to patch
 | 
						|
// ControllerRevisions, as well as increment or decrement them. It is used
 | 
						|
// by the daemonset controller to ease testing of actions that it takes.
 | 
						|
type ControllerRevisionControlInterface interface {
 | 
						|
	PatchControllerRevision(namespace, name string, data []byte) error
 | 
						|
}
 | 
						|
 | 
						|
// RealControllerRevisionControl is the default implementation of ControllerRevisionControlInterface.
 | 
						|
type RealControllerRevisionControl struct {
 | 
						|
	KubeClient clientset.Interface
 | 
						|
}
 | 
						|
 | 
						|
var _ ControllerRevisionControlInterface = &RealControllerRevisionControl{}
 | 
						|
 | 
						|
func (r RealControllerRevisionControl) PatchControllerRevision(namespace, name string, data []byte) error {
 | 
						|
	_, err := r.KubeClient.AppsV1().ControllerRevisions(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
 | 
						|
	return err
 | 
						|
}
 | 
						|
 | 
						|
// PodControlInterface is an interface that knows how to add or delete pods
 | 
						|
// created as an interface to allow testing.
 | 
						|
type PodControlInterface interface {
 | 
						|
	// CreatePods creates new pods according to the spec.
 | 
						|
	CreatePods(namespace string, template *v1.PodTemplateSpec, object runtime.Object) error
 | 
						|
	// CreatePodsOnNode creates a new pod according to the spec on the specified node,
 | 
						|
	// and sets the ControllerRef.
 | 
						|
	CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error
 | 
						|
	// CreatePodsWithControllerRef creates new pods according to the spec, and sets object as the pod's controller.
 | 
						|
	CreatePodsWithControllerRef(namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error
 | 
						|
	// DeletePod deletes the pod identified by podID.
 | 
						|
	DeletePod(namespace string, podID string, object runtime.Object) error
 | 
						|
	// PatchPod patches the pod.
 | 
						|
	PatchPod(namespace, name string, data []byte) error
 | 
						|
}
 | 
						|
 | 
						|
// RealPodControl is the default implementation of PodControlInterface.
 | 
						|
type RealPodControl struct {
 | 
						|
	KubeClient clientset.Interface
 | 
						|
	Recorder   record.EventRecorder
 | 
						|
}
 | 
						|
 | 
						|
var _ PodControlInterface = &RealPodControl{}
 | 
						|
 | 
						|
func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set {
 | 
						|
	desiredLabels := make(labels.Set)
 | 
						|
	for k, v := range template.Labels {
 | 
						|
		desiredLabels[k] = v
 | 
						|
	}
 | 
						|
	return desiredLabels
 | 
						|
}
 | 
						|
 | 
						|
func getPodsFinalizers(template *v1.PodTemplateSpec) []string {
 | 
						|
	desiredFinalizers := make([]string, len(template.Finalizers))
 | 
						|
	copy(desiredFinalizers, template.Finalizers)
 | 
						|
	return desiredFinalizers
 | 
						|
}
 | 
						|
 | 
						|
func getPodsAnnotationSet(template *v1.PodTemplateSpec) labels.Set {
 | 
						|
	desiredAnnotations := make(labels.Set)
 | 
						|
	for k, v := range template.Annotations {
 | 
						|
		desiredAnnotations[k] = v
 | 
						|
	}
 | 
						|
	return desiredAnnotations
 | 
						|
}
 | 
						|
 | 
						|
func getPodsPrefix(controllerName string) string {
 | 
						|
	// use the dash (if the name isn't too long) to make the pod name a bit prettier
 | 
						|
	prefix := fmt.Sprintf("%s-", controllerName)
 | 
						|
	if len(validation.ValidatePodName(prefix, true)) != 0 {
 | 
						|
		prefix = controllerName
 | 
						|
	}
 | 
						|
	return prefix
 | 
						|
}
 | 
						|
 | 
						|
func validateControllerRef(controllerRef *metav1.OwnerReference) error {
 | 
						|
	if controllerRef == nil {
 | 
						|
		return fmt.Errorf("controllerRef is nil")
 | 
						|
	}
 | 
						|
	if len(controllerRef.APIVersion) == 0 {
 | 
						|
		return fmt.Errorf("controllerRef has empty APIVersion")
 | 
						|
	}
 | 
						|
	if len(controllerRef.Kind) == 0 {
 | 
						|
		return fmt.Errorf("controllerRef has empty Kind")
 | 
						|
	}
 | 
						|
	if controllerRef.Controller == nil || *controllerRef.Controller != true {
 | 
						|
		return fmt.Errorf("controllerRef.Controller is not set to true")
 | 
						|
	}
 | 
						|
	if controllerRef.BlockOwnerDeletion == nil || *controllerRef.BlockOwnerDeletion != true {
 | 
						|
		return fmt.Errorf("controllerRef.BlockOwnerDeletion is not set")
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (r RealPodControl) CreatePods(namespace string, template *v1.PodTemplateSpec, object runtime.Object) error {
 | 
						|
	return r.createPods("", namespace, template, object, nil)
 | 
						|
}
 | 
						|
 | 
						|
func (r RealPodControl) CreatePodsWithControllerRef(namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error {
 | 
						|
	if err := validateControllerRef(controllerRef); err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	return r.createPods("", namespace, template, controllerObject, controllerRef)
 | 
						|
}
 | 
						|
 | 
						|
func (r RealPodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
 | 
						|
	if err := validateControllerRef(controllerRef); err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	return r.createPods(nodeName, namespace, template, object, controllerRef)
 | 
						|
}
 | 
						|
 | 
						|
func (r RealPodControl) PatchPod(namespace, name string, data []byte) error {
 | 
						|
	_, err := r.KubeClient.CoreV1().Pods(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
 | 
						|
	return err
 | 
						|
}
 | 
						|
 | 
						|
func GetPodFromTemplate(template *v1.PodTemplateSpec, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Pod, error) {
 | 
						|
	desiredLabels := getPodsLabelSet(template)
 | 
						|
	desiredFinalizers := getPodsFinalizers(template)
 | 
						|
	desiredAnnotations := getPodsAnnotationSet(template)
 | 
						|
	accessor, err := meta.Accessor(parentObject)
 | 
						|
	if err != nil {
 | 
						|
		return nil, fmt.Errorf("parentObject does not have ObjectMeta, %v", err)
 | 
						|
	}
 | 
						|
	prefix := getPodsPrefix(accessor.GetName())
 | 
						|
 | 
						|
	pod := &v1.Pod{
 | 
						|
		ObjectMeta: metav1.ObjectMeta{
 | 
						|
			Labels:       desiredLabels,
 | 
						|
			Annotations:  desiredAnnotations,
 | 
						|
			GenerateName: prefix,
 | 
						|
			Finalizers:   desiredFinalizers,
 | 
						|
		},
 | 
						|
	}
 | 
						|
	if controllerRef != nil {
 | 
						|
		pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef)
 | 
						|
	}
 | 
						|
	pod.Spec = *template.Spec.DeepCopy()
 | 
						|
	return pod, nil
 | 
						|
}
 | 
						|
 | 
						|
func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
 | 
						|
	pod, err := GetPodFromTemplate(template, object, controllerRef)
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	if len(nodeName) != 0 {
 | 
						|
		pod.Spec.NodeName = nodeName
 | 
						|
	}
 | 
						|
	if len(labels.Set(pod.Labels)) == 0 {
 | 
						|
		return fmt.Errorf("unable to create pods, no labels")
 | 
						|
	}
 | 
						|
	newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
 | 
						|
	if err != nil {
 | 
						|
		// only send an event if the namespace isn't terminating
 | 
						|
		if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
 | 
						|
			r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err)
 | 
						|
		}
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	accessor, err := meta.Accessor(object)
 | 
						|
	if err != nil {
 | 
						|
		klog.Errorf("parentObject does not have ObjectMeta, %v", err)
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
	klog.V(4).Infof("Controller %v created pod %v", accessor.GetName(), newPod.Name)
 | 
						|
	r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name)
 | 
						|
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (r RealPodControl) DeletePod(namespace string, podID string, object runtime.Object) error {
 | 
						|
	accessor, err := meta.Accessor(object)
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("object does not have ObjectMeta, %v", err)
 | 
						|
	}
 | 
						|
	klog.V(2).InfoS("Deleting pod", "controller", accessor.GetName(), "pod", klog.KRef(namespace, podID))
 | 
						|
	if err := r.KubeClient.CoreV1().Pods(namespace).Delete(context.TODO(), podID, metav1.DeleteOptions{}); err != nil {
 | 
						|
		if apierrors.IsNotFound(err) {
 | 
						|
			klog.V(4).Infof("pod %v/%v has already been deleted.", namespace, podID)
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err)
 | 
						|
		return fmt.Errorf("unable to delete pods: %v", err)
 | 
						|
	}
 | 
						|
	r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeletePodReason, "Deleted pod: %v", podID)
 | 
						|
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
type FakePodControl struct {
 | 
						|
	sync.Mutex
 | 
						|
	Templates       []v1.PodTemplateSpec
 | 
						|
	ControllerRefs  []metav1.OwnerReference
 | 
						|
	DeletePodName   []string
 | 
						|
	Patches         [][]byte
 | 
						|
	Err             error
 | 
						|
	CreateLimit     int
 | 
						|
	CreateCallCount int
 | 
						|
}
 | 
						|
 | 
						|
var _ PodControlInterface = &FakePodControl{}
 | 
						|
 | 
						|
func (f *FakePodControl) PatchPod(namespace, name string, data []byte) error {
 | 
						|
	f.Lock()
 | 
						|
	defer f.Unlock()
 | 
						|
	f.Patches = append(f.Patches, data)
 | 
						|
	if f.Err != nil {
 | 
						|
		return f.Err
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (f *FakePodControl) CreatePods(namespace string, spec *v1.PodTemplateSpec, object runtime.Object) error {
 | 
						|
	f.Lock()
 | 
						|
	defer f.Unlock()
 | 
						|
	f.CreateCallCount++
 | 
						|
	if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit {
 | 
						|
		return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount)
 | 
						|
	}
 | 
						|
	f.Templates = append(f.Templates, *spec)
 | 
						|
	if f.Err != nil {
 | 
						|
		return f.Err
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (f *FakePodControl) CreatePodsWithControllerRef(namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
 | 
						|
	f.Lock()
 | 
						|
	defer f.Unlock()
 | 
						|
	f.CreateCallCount++
 | 
						|
	if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit {
 | 
						|
		return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount)
 | 
						|
	}
 | 
						|
	f.Templates = append(f.Templates, *spec)
 | 
						|
	f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
 | 
						|
	if f.Err != nil {
 | 
						|
		return f.Err
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (f *FakePodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
 | 
						|
	f.Lock()
 | 
						|
	defer f.Unlock()
 | 
						|
	f.CreateCallCount++
 | 
						|
	if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit {
 | 
						|
		return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount)
 | 
						|
	}
 | 
						|
	f.Templates = append(f.Templates, *template)
 | 
						|
	f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
 | 
						|
	if f.Err != nil {
 | 
						|
		return f.Err
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (f *FakePodControl) DeletePod(namespace string, podID string, object runtime.Object) error {
 | 
						|
	f.Lock()
 | 
						|
	defer f.Unlock()
 | 
						|
	f.DeletePodName = append(f.DeletePodName, podID)
 | 
						|
	if f.Err != nil {
 | 
						|
		return f.Err
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (f *FakePodControl) Clear() {
 | 
						|
	f.Lock()
 | 
						|
	defer f.Unlock()
 | 
						|
	f.DeletePodName = []string{}
 | 
						|
	f.Templates = []v1.PodTemplateSpec{}
 | 
						|
	f.ControllerRefs = []metav1.OwnerReference{}
 | 
						|
	f.Patches = [][]byte{}
 | 
						|
	f.CreateLimit = 0
 | 
						|
	f.CreateCallCount = 0
 | 
						|
}
 | 
						|
 | 
						|
// ByLogging allows custom sorting of pods so the best one can be picked for getting its logs.
 | 
						|
type ByLogging []*v1.Pod
 | 
						|
 | 
						|
func (s ByLogging) Len() int      { return len(s) }
 | 
						|
func (s ByLogging) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
 | 
						|
 | 
						|
func (s ByLogging) Less(i, j int) bool {
 | 
						|
	// 1. assigned < unassigned
 | 
						|
	if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
 | 
						|
		return len(s[i].Spec.NodeName) > 0
 | 
						|
	}
 | 
						|
	// 2. PodRunning < PodUnknown < PodPending
 | 
						|
	if s[i].Status.Phase != s[j].Status.Phase {
 | 
						|
		return podPhaseToOrdinal[s[i].Status.Phase] > podPhaseToOrdinal[s[j].Status.Phase]
 | 
						|
	}
 | 
						|
	// 3. ready < not ready
 | 
						|
	if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) {
 | 
						|
		return podutil.IsPodReady(s[i])
 | 
						|
	}
 | 
						|
	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
 | 
						|
	//       see https://github.com/kubernetes/kubernetes/issues/22065
 | 
						|
	// 4. Been ready for more time < less time < empty time
 | 
						|
	if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) {
 | 
						|
		readyTime1 := podReadyTime(s[i])
 | 
						|
		readyTime2 := podReadyTime(s[j])
 | 
						|
		if !readyTime1.Equal(readyTime2) {
 | 
						|
			return afterOrZero(readyTime2, readyTime1)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// 5. Pods with containers with higher restart counts < lower restart counts
 | 
						|
	if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
 | 
						|
		return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
 | 
						|
	}
 | 
						|
	// 6. older pods < newer pods < empty timestamp pods
 | 
						|
	if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) {
 | 
						|
		return afterOrZero(&s[j].CreationTimestamp, &s[i].CreationTimestamp)
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete.
 | 
						|
type ActivePods []*v1.Pod
 | 
						|
 | 
						|
func (s ActivePods) Len() int      { return len(s) }
 | 
						|
func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
 | 
						|
 | 
						|
func (s ActivePods) Less(i, j int) bool {
 | 
						|
	// 1. Unassigned < assigned
 | 
						|
	// If only one of the pods is unassigned, the unassigned one is smaller
 | 
						|
	if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
 | 
						|
		return len(s[i].Spec.NodeName) == 0
 | 
						|
	}
 | 
						|
	// 2. PodPending < PodUnknown < PodRunning
 | 
						|
	if podPhaseToOrdinal[s[i].Status.Phase] != podPhaseToOrdinal[s[j].Status.Phase] {
 | 
						|
		return podPhaseToOrdinal[s[i].Status.Phase] < podPhaseToOrdinal[s[j].Status.Phase]
 | 
						|
	}
 | 
						|
	// 3. Not ready < ready
 | 
						|
	// If only one of the pods is not ready, the not ready one is smaller
 | 
						|
	if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) {
 | 
						|
		return !podutil.IsPodReady(s[i])
 | 
						|
	}
 | 
						|
	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
 | 
						|
	//       see https://github.com/kubernetes/kubernetes/issues/22065
 | 
						|
	// 4. Been ready for empty time < less time < more time
 | 
						|
	// If both pods are ready, the latest ready one is smaller
 | 
						|
	if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) {
 | 
						|
		readyTime1 := podReadyTime(s[i])
 | 
						|
		readyTime2 := podReadyTime(s[j])
 | 
						|
		if !readyTime1.Equal(readyTime2) {
 | 
						|
			return afterOrZero(readyTime1, readyTime2)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// 5. Pods with containers with higher restart counts < lower restart counts
 | 
						|
	if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
 | 
						|
		return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
 | 
						|
	}
 | 
						|
	// 6. Empty creation time pods < newer pods < older pods
 | 
						|
	if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) {
 | 
						|
		return afterOrZero(&s[i].CreationTimestamp, &s[j].CreationTimestamp)
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// ActivePodsWithRanks is a sortable list of pods and a list of corresponding
 | 
						|
// ranks which will be considered during sorting.  The two lists must have equal
 | 
						|
// length.  After sorting, the pods will be ordered as follows, applying each
 | 
						|
// rule in turn until one matches:
 | 
						|
//
 | 
						|
// 1. If only one of the pods is assigned to a node, the pod that is not
 | 
						|
//    assigned comes before the pod that is.
 | 
						|
// 2. If the pods' phases differ, a pending pod comes before a pod whose phase
 | 
						|
//    is unknown, and a pod whose phase is unknown comes before a running pod.
 | 
						|
// 3. If exactly one of the pods is ready, the pod that is not ready comes
 | 
						|
//    before the ready pod.
 | 
						|
// 4. If controller.kubernetes.io/pod-deletion-cost annotation is set, then
 | 
						|
//    the pod with the lower value will come first.
 | 
						|
// 5. If the pods' ranks differ, the pod with greater rank comes before the pod
 | 
						|
//    with lower rank.
 | 
						|
// 6. If both pods are ready but have not been ready for the same amount of
 | 
						|
//    time, the pod that has been ready for a shorter amount of time comes
 | 
						|
//    before the pod that has been ready for longer.
 | 
						|
// 7. If one pod has a container that has restarted more than any container in
 | 
						|
//    the other pod, the pod with the container with more restarts comes
 | 
						|
//    before the other pod.
 | 
						|
// 8. If the pods' creation times differ, the pod that was created more recently
 | 
						|
//    comes before the older pod.
 | 
						|
//
 | 
						|
// In 6 and 8, times are compared in a logarithmic scale. This allows a level
 | 
						|
// of randomness among equivalent Pods when sorting. If two pods have the same
 | 
						|
// logarithmic rank, they are sorted by UUID to provide a pseudorandom order.
 | 
						|
//
 | 
						|
// If none of these rules matches, the second pod comes before the first pod.
 | 
						|
//
 | 
						|
// The intention of this ordering is to put pods that should be preferred for
 | 
						|
// deletion first in the list.
 | 
						|
type ActivePodsWithRanks struct {
 | 
						|
	// Pods is a list of pods.
 | 
						|
	Pods []*v1.Pod
 | 
						|
 | 
						|
	// Rank is a ranking of pods.  This ranking is used during sorting when
 | 
						|
	// comparing two pods that are both scheduled, in the same phase, and
 | 
						|
	// having the same ready status.
 | 
						|
	Rank []int
 | 
						|
 | 
						|
	// Now is a reference timestamp for doing logarithmic timestamp comparisons.
 | 
						|
	// If zero, comparison happens without scaling.
 | 
						|
	Now metav1.Time
 | 
						|
}
 | 
						|
 | 
						|
func (s ActivePodsWithRanks) Len() int {
 | 
						|
	return len(s.Pods)
 | 
						|
}
 | 
						|
 | 
						|
func (s ActivePodsWithRanks) Swap(i, j int) {
 | 
						|
	s.Pods[i], s.Pods[j] = s.Pods[j], s.Pods[i]
 | 
						|
	s.Rank[i], s.Rank[j] = s.Rank[j], s.Rank[i]
 | 
						|
}
 | 
						|
 | 
						|
// Less compares two pods with corresponding ranks and returns true if the first
 | 
						|
// one should be preferred for deletion.
 | 
						|
func (s ActivePodsWithRanks) Less(i, j int) bool {
 | 
						|
	// 1. Unassigned < assigned
 | 
						|
	// If only one of the pods is unassigned, the unassigned one is smaller
 | 
						|
	if s.Pods[i].Spec.NodeName != s.Pods[j].Spec.NodeName && (len(s.Pods[i].Spec.NodeName) == 0 || len(s.Pods[j].Spec.NodeName) == 0) {
 | 
						|
		return len(s.Pods[i].Spec.NodeName) == 0
 | 
						|
	}
 | 
						|
	// 2. PodPending < PodUnknown < PodRunning
 | 
						|
	if podPhaseToOrdinal[s.Pods[i].Status.Phase] != podPhaseToOrdinal[s.Pods[j].Status.Phase] {
 | 
						|
		return podPhaseToOrdinal[s.Pods[i].Status.Phase] < podPhaseToOrdinal[s.Pods[j].Status.Phase]
 | 
						|
	}
 | 
						|
	// 3. Not ready < ready
 | 
						|
	// If only one of the pods is not ready, the not ready one is smaller
 | 
						|
	if podutil.IsPodReady(s.Pods[i]) != podutil.IsPodReady(s.Pods[j]) {
 | 
						|
		return !podutil.IsPodReady(s.Pods[i])
 | 
						|
	}
 | 
						|
 | 
						|
	// 4. higher pod-deletion-cost < lower pod-deletion cost
 | 
						|
	if utilfeature.DefaultFeatureGate.Enabled(features.PodDeletionCost) {
 | 
						|
		pi, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[i].Annotations)
 | 
						|
		pj, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[j].Annotations)
 | 
						|
		if pi != pj {
 | 
						|
			return pi < pj
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// 5. Doubled up < not doubled up
 | 
						|
	// If one of the two pods is on the same node as one or more additional
 | 
						|
	// ready pods that belong to the same replicaset, whichever pod has more
 | 
						|
	// colocated ready pods is less
 | 
						|
	if s.Rank[i] != s.Rank[j] {
 | 
						|
		return s.Rank[i] > s.Rank[j]
 | 
						|
	}
 | 
						|
	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
 | 
						|
	//       see https://github.com/kubernetes/kubernetes/issues/22065
 | 
						|
	// 6. Been ready for empty time < less time < more time
 | 
						|
	// If both pods are ready, the latest ready one is smaller
 | 
						|
	if podutil.IsPodReady(s.Pods[i]) && podutil.IsPodReady(s.Pods[j]) {
 | 
						|
		readyTime1 := podReadyTime(s.Pods[i])
 | 
						|
		readyTime2 := podReadyTime(s.Pods[j])
 | 
						|
		if !readyTime1.Equal(readyTime2) {
 | 
						|
			if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
 | 
						|
				return afterOrZero(readyTime1, readyTime2)
 | 
						|
			} else {
 | 
						|
				if s.Now.IsZero() || readyTime1.IsZero() || readyTime2.IsZero() {
 | 
						|
					return afterOrZero(readyTime1, readyTime2)
 | 
						|
				}
 | 
						|
				rankDiff := logarithmicRankDiff(*readyTime1, *readyTime2, s.Now)
 | 
						|
				if rankDiff == 0 {
 | 
						|
					return s.Pods[i].UID < s.Pods[j].UID
 | 
						|
				}
 | 
						|
				return rankDiff < 0
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// 7. Pods with containers with higher restart counts < lower restart counts
 | 
						|
	if maxContainerRestarts(s.Pods[i]) != maxContainerRestarts(s.Pods[j]) {
 | 
						|
		return maxContainerRestarts(s.Pods[i]) > maxContainerRestarts(s.Pods[j])
 | 
						|
	}
 | 
						|
	// 8. Empty creation time pods < newer pods < older pods
 | 
						|
	if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) {
 | 
						|
		if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
 | 
						|
			return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
 | 
						|
		} else {
 | 
						|
			if s.Now.IsZero() || s.Pods[i].CreationTimestamp.IsZero() || s.Pods[j].CreationTimestamp.IsZero() {
 | 
						|
				return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
 | 
						|
			}
 | 
						|
			rankDiff := logarithmicRankDiff(s.Pods[i].CreationTimestamp, s.Pods[j].CreationTimestamp, s.Now)
 | 
						|
			if rankDiff == 0 {
 | 
						|
				return s.Pods[i].UID < s.Pods[j].UID
 | 
						|
			}
 | 
						|
			return rankDiff < 0
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// afterOrZero checks if time t1 is after time t2; if one of them
 | 
						|
// is zero, the zero time is seen as after non-zero time.
 | 
						|
func afterOrZero(t1, t2 *metav1.Time) bool {
 | 
						|
	if t1.Time.IsZero() || t2.Time.IsZero() {
 | 
						|
		return t1.Time.IsZero()
 | 
						|
	}
 | 
						|
	return t1.After(t2.Time)
 | 
						|
}
 | 
						|
 | 
						|
// logarithmicRankDiff calculates the base-2 logarithmic ranks of 2 timestamps,
 | 
						|
// compared to the current timestamp
 | 
						|
func logarithmicRankDiff(t1, t2, now metav1.Time) int64 {
 | 
						|
	d1 := now.Sub(t1.Time)
 | 
						|
	d2 := now.Sub(t2.Time)
 | 
						|
	r1 := int64(-1)
 | 
						|
	r2 := int64(-1)
 | 
						|
	if d1 > 0 {
 | 
						|
		r1 = int64(math.Log2(float64(d1)))
 | 
						|
	}
 | 
						|
	if d2 > 0 {
 | 
						|
		r2 = int64(math.Log2(float64(d2)))
 | 
						|
	}
 | 
						|
	return r1 - r2
 | 
						|
}
 | 
						|
 | 
						|
func podReadyTime(pod *v1.Pod) *metav1.Time {
 | 
						|
	if podutil.IsPodReady(pod) {
 | 
						|
		for _, c := range pod.Status.Conditions {
 | 
						|
			// we only care about pod ready conditions
 | 
						|
			if c.Type == v1.PodReady && c.Status == v1.ConditionTrue {
 | 
						|
				return &c.LastTransitionTime
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return &metav1.Time{}
 | 
						|
}
 | 
						|
 | 
						|
func maxContainerRestarts(pod *v1.Pod) int {
 | 
						|
	maxRestarts := 0
 | 
						|
	for _, c := range pod.Status.ContainerStatuses {
 | 
						|
		maxRestarts = integer.IntMax(maxRestarts, int(c.RestartCount))
 | 
						|
	}
 | 
						|
	return maxRestarts
 | 
						|
}
 | 
						|
 | 
						|
// FilterActivePods returns pods that have not terminated.
 | 
						|
func FilterActivePods(pods []*v1.Pod) []*v1.Pod {
 | 
						|
	var result []*v1.Pod
 | 
						|
	for _, p := range pods {
 | 
						|
		if IsPodActive(p) {
 | 
						|
			result = append(result, p)
 | 
						|
		} else {
 | 
						|
			klog.V(4).Infof("Ignoring inactive pod %v/%v in state %v, deletion time %v",
 | 
						|
				p.Namespace, p.Name, p.Status.Phase, p.DeletionTimestamp)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return result
 | 
						|
}
 | 
						|
 | 
						|
func IsPodActive(p *v1.Pod) bool {
 | 
						|
	return v1.PodSucceeded != p.Status.Phase &&
 | 
						|
		v1.PodFailed != p.Status.Phase &&
 | 
						|
		p.DeletionTimestamp == nil
 | 
						|
}
 | 
						|
 | 
						|
// FilterActiveReplicaSets returns replica sets that have (or at least ought to have) pods.
 | 
						|
func FilterActiveReplicaSets(replicaSets []*apps.ReplicaSet) []*apps.ReplicaSet {
 | 
						|
	activeFilter := func(rs *apps.ReplicaSet) bool {
 | 
						|
		return rs != nil && *(rs.Spec.Replicas) > 0
 | 
						|
	}
 | 
						|
	return FilterReplicaSets(replicaSets, activeFilter)
 | 
						|
}
 | 
						|
 | 
						|
type filterRS func(rs *apps.ReplicaSet) bool
 | 
						|
 | 
						|
// FilterReplicaSets returns replica sets that are filtered by filterFn (all returned ones should match filterFn).
 | 
						|
func FilterReplicaSets(RSes []*apps.ReplicaSet, filterFn filterRS) []*apps.ReplicaSet {
 | 
						|
	var filtered []*apps.ReplicaSet
 | 
						|
	for i := range RSes {
 | 
						|
		if filterFn(RSes[i]) {
 | 
						|
			filtered = append(filtered, RSes[i])
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return filtered
 | 
						|
}
 | 
						|
 | 
						|
// PodKey returns a key unique to the given pod within a cluster.
 | 
						|
// It's used so we consistently use the same key scheme in this module.
 | 
						|
// It does exactly what cache.MetaNamespaceKeyFunc would have done
 | 
						|
// except there's not possibility for error since we know the exact type.
 | 
						|
func PodKey(pod *v1.Pod) string {
 | 
						|
	return fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)
 | 
						|
}
 | 
						|
 | 
						|
// ControllersByCreationTimestamp sorts a list of ReplicationControllers by creation timestamp, using their names as a tie breaker.
 | 
						|
type ControllersByCreationTimestamp []*v1.ReplicationController
 | 
						|
 | 
						|
func (o ControllersByCreationTimestamp) Len() int      { return len(o) }
 | 
						|
func (o ControllersByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
 | 
						|
func (o ControllersByCreationTimestamp) Less(i, j int) bool {
 | 
						|
	if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) {
 | 
						|
		return o[i].Name < o[j].Name
 | 
						|
	}
 | 
						|
	return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
 | 
						|
}
 | 
						|
 | 
						|
// ReplicaSetsByCreationTimestamp sorts a list of ReplicaSet by creation timestamp, using their names as a tie breaker.
 | 
						|
type ReplicaSetsByCreationTimestamp []*apps.ReplicaSet
 | 
						|
 | 
						|
func (o ReplicaSetsByCreationTimestamp) Len() int      { return len(o) }
 | 
						|
func (o ReplicaSetsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
 | 
						|
func (o ReplicaSetsByCreationTimestamp) Less(i, j int) bool {
 | 
						|
	if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) {
 | 
						|
		return o[i].Name < o[j].Name
 | 
						|
	}
 | 
						|
	return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
 | 
						|
}
 | 
						|
 | 
						|
// ReplicaSetsBySizeOlder sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
 | 
						|
// By using the creation timestamp, this sorts from old to new replica sets.
 | 
						|
type ReplicaSetsBySizeOlder []*apps.ReplicaSet
 | 
						|
 | 
						|
func (o ReplicaSetsBySizeOlder) Len() int      { return len(o) }
 | 
						|
func (o ReplicaSetsBySizeOlder) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
 | 
						|
func (o ReplicaSetsBySizeOlder) Less(i, j int) bool {
 | 
						|
	if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) {
 | 
						|
		return ReplicaSetsByCreationTimestamp(o).Less(i, j)
 | 
						|
	}
 | 
						|
	return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas)
 | 
						|
}
 | 
						|
 | 
						|
// ReplicaSetsBySizeNewer sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
 | 
						|
// By using the creation timestamp, this sorts from new to old replica sets.
 | 
						|
type ReplicaSetsBySizeNewer []*apps.ReplicaSet
 | 
						|
 | 
						|
func (o ReplicaSetsBySizeNewer) Len() int      { return len(o) }
 | 
						|
func (o ReplicaSetsBySizeNewer) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
 | 
						|
func (o ReplicaSetsBySizeNewer) Less(i, j int) bool {
 | 
						|
	if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) {
 | 
						|
		return ReplicaSetsByCreationTimestamp(o).Less(j, i)
 | 
						|
	}
 | 
						|
	return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas)
 | 
						|
}
 | 
						|
 | 
						|
// AddOrUpdateTaintOnNode add taints to the node. If taint was added into node, it'll issue API calls
 | 
						|
// to update nodes; otherwise, no API calls. Return error if any.
 | 
						|
func AddOrUpdateTaintOnNode(c clientset.Interface, nodeName string, taints ...*v1.Taint) error {
 | 
						|
	if len(taints) == 0 {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
	firstTry := true
 | 
						|
	return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error {
 | 
						|
		var err error
 | 
						|
		var oldNode *v1.Node
 | 
						|
		// First we try getting node from the API server cache, as it's cheaper. If it fails
 | 
						|
		// we get it from etcd to be sure to have fresh data.
 | 
						|
		if firstTry {
 | 
						|
			oldNode, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{ResourceVersion: "0"})
 | 
						|
			firstTry = false
 | 
						|
		} else {
 | 
						|
			oldNode, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
 | 
						|
		}
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
 | 
						|
		var newNode *v1.Node
 | 
						|
		oldNodeCopy := oldNode
 | 
						|
		updated := false
 | 
						|
		for _, taint := range taints {
 | 
						|
			curNewNode, ok, err := taintutils.AddOrUpdateTaint(oldNodeCopy, taint)
 | 
						|
			if err != nil {
 | 
						|
				return fmt.Errorf("failed to update taint of node")
 | 
						|
			}
 | 
						|
			updated = updated || ok
 | 
						|
			newNode = curNewNode
 | 
						|
			oldNodeCopy = curNewNode
 | 
						|
		}
 | 
						|
		if !updated {
 | 
						|
			return nil
 | 
						|
		}
 | 
						|
		return PatchNodeTaints(c, nodeName, oldNode, newNode)
 | 
						|
	})
 | 
						|
}
 | 
						|
 | 
						|
// RemoveTaintOffNode is for cleaning up taints temporarily added to node,
 | 
						|
// won't fail if target taint doesn't exist or has been removed.
 | 
						|
// If passed a node it'll check if there's anything to be done, if taint is not present it won't issue
 | 
						|
// any API calls.
 | 
						|
func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, taints ...*v1.Taint) error {
 | 
						|
	if len(taints) == 0 {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
	// Short circuit for limiting amount of API calls.
 | 
						|
	if node != nil {
 | 
						|
		match := false
 | 
						|
		for _, taint := range taints {
 | 
						|
			if taintutils.TaintExists(node.Spec.Taints, taint) {
 | 
						|
				match = true
 | 
						|
				break
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if !match {
 | 
						|
			return nil
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	firstTry := true
 | 
						|
	return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error {
 | 
						|
		var err error
 | 
						|
		var oldNode *v1.Node
 | 
						|
		// First we try getting node from the API server cache, as it's cheaper. If it fails
 | 
						|
		// we get it from etcd to be sure to have fresh data.
 | 
						|
		if firstTry {
 | 
						|
			oldNode, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{ResourceVersion: "0"})
 | 
						|
			firstTry = false
 | 
						|
		} else {
 | 
						|
			oldNode, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
 | 
						|
		}
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
 | 
						|
		var newNode *v1.Node
 | 
						|
		oldNodeCopy := oldNode
 | 
						|
		updated := false
 | 
						|
		for _, taint := range taints {
 | 
						|
			curNewNode, ok, err := taintutils.RemoveTaint(oldNodeCopy, taint)
 | 
						|
			if err != nil {
 | 
						|
				return fmt.Errorf("failed to remove taint of node")
 | 
						|
			}
 | 
						|
			updated = updated || ok
 | 
						|
			newNode = curNewNode
 | 
						|
			oldNodeCopy = curNewNode
 | 
						|
		}
 | 
						|
		if !updated {
 | 
						|
			return nil
 | 
						|
		}
 | 
						|
		return PatchNodeTaints(c, nodeName, oldNode, newNode)
 | 
						|
	})
 | 
						|
}
 | 
						|
 | 
						|
// PatchNodeTaints patches node's taints.
 | 
						|
func PatchNodeTaints(c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error {
 | 
						|
	oldData, err := json.Marshal(oldNode)
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("failed to marshal old node %#v for node %q: %v", oldNode, nodeName, err)
 | 
						|
	}
 | 
						|
 | 
						|
	newTaints := newNode.Spec.Taints
 | 
						|
	newNodeClone := oldNode.DeepCopy()
 | 
						|
	newNodeClone.Spec.Taints = newTaints
 | 
						|
	newData, err := json.Marshal(newNodeClone)
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("failed to marshal new node %#v for node %q: %v", newNodeClone, nodeName, err)
 | 
						|
	}
 | 
						|
 | 
						|
	patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Node{})
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("failed to create patch for node %q: %v", nodeName, err)
 | 
						|
	}
 | 
						|
 | 
						|
	_, err = c.CoreV1().Nodes().Patch(context.TODO(), nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
 | 
						|
	return err
 | 
						|
}
 | 
						|
 | 
						|
// ComputeHash returns a hash value calculated from pod template and
 | 
						|
// a collisionCount to avoid hash collision. The hash will be safe encoded to
 | 
						|
// avoid bad words.
 | 
						|
func ComputeHash(template *v1.PodTemplateSpec, collisionCount *int32) string {
 | 
						|
	podTemplateSpecHasher := fnv.New32a()
 | 
						|
	hashutil.DeepHashObject(podTemplateSpecHasher, *template)
 | 
						|
 | 
						|
	// Add collisionCount in the hash if it exists.
 | 
						|
	if collisionCount != nil {
 | 
						|
		collisionCountBytes := make([]byte, 8)
 | 
						|
		binary.LittleEndian.PutUint32(collisionCountBytes, uint32(*collisionCount))
 | 
						|
		podTemplateSpecHasher.Write(collisionCountBytes)
 | 
						|
	}
 | 
						|
 | 
						|
	return rand.SafeEncodeString(fmt.Sprint(podTemplateSpecHasher.Sum32()))
 | 
						|
}
 | 
						|
 | 
						|
func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, labelsToUpdate map[string]string) error {
 | 
						|
	firstTry := true
 | 
						|
	return clientretry.RetryOnConflict(UpdateLabelBackoff, func() error {
 | 
						|
		var err error
 | 
						|
		var node *v1.Node
 | 
						|
		// First we try getting node from the API server cache, as it's cheaper. If it fails
 | 
						|
		// we get it from etcd to be sure to have fresh data.
 | 
						|
		if firstTry {
 | 
						|
			node, err = kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{ResourceVersion: "0"})
 | 
						|
			firstTry = false
 | 
						|
		} else {
 | 
						|
			node, err = kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
 | 
						|
		}
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
 | 
						|
		// Make a copy of the node and update the labels.
 | 
						|
		newNode := node.DeepCopy()
 | 
						|
		if newNode.Labels == nil {
 | 
						|
			newNode.Labels = make(map[string]string)
 | 
						|
		}
 | 
						|
		for key, value := range labelsToUpdate {
 | 
						|
			newNode.Labels[key] = value
 | 
						|
		}
 | 
						|
 | 
						|
		oldData, err := json.Marshal(node)
 | 
						|
		if err != nil {
 | 
						|
			return fmt.Errorf("failed to marshal the existing node %#v: %v", node, err)
 | 
						|
		}
 | 
						|
		newData, err := json.Marshal(newNode)
 | 
						|
		if err != nil {
 | 
						|
			return fmt.Errorf("failed to marshal the new node %#v: %v", newNode, err)
 | 
						|
		}
 | 
						|
		patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{})
 | 
						|
		if err != nil {
 | 
						|
			return fmt.Errorf("failed to create a two-way merge patch: %v", err)
 | 
						|
		}
 | 
						|
		if _, err := kubeClient.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil {
 | 
						|
			return fmt.Errorf("failed to patch the node: %v", err)
 | 
						|
		}
 | 
						|
		return nil
 | 
						|
	})
 | 
						|
}
 |