mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	feat: implement "queue-sort" extension point for scheduling framework
This commit is contained in:
		@@ -38,6 +38,7 @@ import (
 | 
			
		||||
	"k8s.io/client-go/tools/cache"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
 | 
			
		||||
	priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
 | 
			
		||||
	framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/scheduler/metrics"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/scheduler/util"
 | 
			
		||||
)
 | 
			
		||||
@@ -87,8 +88,8 @@ type SchedulingQueue interface {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewSchedulingQueue initializes a priority queue as a new scheduling queue.
 | 
			
		||||
func NewSchedulingQueue(stop <-chan struct{}) SchedulingQueue {
 | 
			
		||||
	return NewPriorityQueue(stop)
 | 
			
		||||
func NewSchedulingQueue(stop <-chan struct{}, fwk framework.Framework) SchedulingQueue {
 | 
			
		||||
	return NewPriorityQueue(stop, fwk)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NominatedNodeName returns nominated node name of a Pod.
 | 
			
		||||
@@ -140,43 +141,48 @@ type PriorityQueue struct {
 | 
			
		||||
// Making sure that PriorityQueue implements SchedulingQueue.
 | 
			
		||||
var _ = SchedulingQueue(&PriorityQueue{})
 | 
			
		||||
 | 
			
		||||
// podInfo is minimum cell in the scheduling queue.
 | 
			
		||||
type podInfo struct {
 | 
			
		||||
	pod *v1.Pod
 | 
			
		||||
	// The time pod added to the scheduling queue.
 | 
			
		||||
	timestamp time.Time
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// newPodInfoNoTimestamp builds a podInfo object without timestamp.
 | 
			
		||||
func newPodInfoNoTimestamp(pod *v1.Pod) *podInfo {
 | 
			
		||||
	return &podInfo{
 | 
			
		||||
		pod: pod,
 | 
			
		||||
// newPodInfoNoTimestamp builds a PodInfo object without timestamp.
 | 
			
		||||
func newPodInfoNoTimestamp(pod *v1.Pod) *framework.PodInfo {
 | 
			
		||||
	return &framework.PodInfo{
 | 
			
		||||
		Pod: pod,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// activeQComp is the function used by the activeQ heap algorithm to sort pods.
 | 
			
		||||
// It sorts pods based on their priority. When priorities are equal, it uses
 | 
			
		||||
// podInfo.timestamp.
 | 
			
		||||
// PodInfo.timestamp.
 | 
			
		||||
func activeQComp(podInfo1, podInfo2 interface{}) bool {
 | 
			
		||||
	pInfo1 := podInfo1.(*podInfo)
 | 
			
		||||
	pInfo2 := podInfo2.(*podInfo)
 | 
			
		||||
	prio1 := util.GetPodPriority(pInfo1.pod)
 | 
			
		||||
	prio2 := util.GetPodPriority(pInfo2.pod)
 | 
			
		||||
	return (prio1 > prio2) || (prio1 == prio2 && pInfo1.timestamp.Before(pInfo2.timestamp))
 | 
			
		||||
	pInfo1 := podInfo1.(*framework.PodInfo)
 | 
			
		||||
	pInfo2 := podInfo2.(*framework.PodInfo)
 | 
			
		||||
	prio1 := util.GetPodPriority(pInfo1.Pod)
 | 
			
		||||
	prio2 := util.GetPodPriority(pInfo2.Pod)
 | 
			
		||||
	return (prio1 > prio2) || (prio1 == prio2 && pInfo1.Timestamp.Before(pInfo2.Timestamp))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewPriorityQueue creates a PriorityQueue object.
 | 
			
		||||
func NewPriorityQueue(stop <-chan struct{}) *PriorityQueue {
 | 
			
		||||
	return NewPriorityQueueWithClock(stop, util.RealClock{})
 | 
			
		||||
func NewPriorityQueue(stop <-chan struct{}, fwk framework.Framework) *PriorityQueue {
 | 
			
		||||
	return NewPriorityQueueWithClock(stop, util.RealClock{}, fwk)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewPriorityQueueWithClock creates a PriorityQueue which uses the passed clock for time.
 | 
			
		||||
func NewPriorityQueueWithClock(stop <-chan struct{}, clock util.Clock) *PriorityQueue {
 | 
			
		||||
func NewPriorityQueueWithClock(stop <-chan struct{}, clock util.Clock, fwk framework.Framework) *PriorityQueue {
 | 
			
		||||
	comp := activeQComp
 | 
			
		||||
	if fwk != nil {
 | 
			
		||||
		if queueSortFunc := fwk.QueueSortFunc(); queueSortFunc != nil {
 | 
			
		||||
			comp = func(podInfo1, podInfo2 interface{}) bool {
 | 
			
		||||
				pInfo1 := podInfo1.(*framework.PodInfo)
 | 
			
		||||
				pInfo2 := podInfo2.(*framework.PodInfo)
 | 
			
		||||
 | 
			
		||||
				return queueSortFunc(pInfo1, pInfo2)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pq := &PriorityQueue{
 | 
			
		||||
		clock:            clock,
 | 
			
		||||
		stop:             stop,
 | 
			
		||||
		podBackoff:       NewPodBackoffMap(1*time.Second, 10*time.Second),
 | 
			
		||||
		activeQ:          util.NewHeapWithRecorder(podInfoKeyFunc, activeQComp, metrics.NewActivePodsRecorder()),
 | 
			
		||||
		activeQ:          util.NewHeapWithRecorder(podInfoKeyFunc, comp, metrics.NewActivePodsRecorder()),
 | 
			
		||||
		unschedulableQ:   newUnschedulablePodsMap(metrics.NewUnschedulablePodsRecorder()),
 | 
			
		||||
		nominatedPods:    newNominatedPodMap(),
 | 
			
		||||
		moveRequestCycle: -1,
 | 
			
		||||
@@ -334,7 +340,7 @@ func (p *PriorityQueue) flushBackoffQCompleted() {
 | 
			
		||||
		if rawPodInfo == nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		pod := rawPodInfo.(*podInfo).pod
 | 
			
		||||
		pod := rawPodInfo.(*framework.PodInfo).Pod
 | 
			
		||||
		boTime, found := p.podBackoff.GetBackoffTime(nsNameForPod(pod))
 | 
			
		||||
		if !found {
 | 
			
		||||
			klog.Errorf("Unable to find backoff value for pod %v in backoffQ", nsNameForPod(pod))
 | 
			
		||||
@@ -363,10 +369,10 @@ func (p *PriorityQueue) flushUnschedulableQLeftover() {
 | 
			
		||||
	p.lock.Lock()
 | 
			
		||||
	defer p.lock.Unlock()
 | 
			
		||||
 | 
			
		||||
	var podsToMove []*podInfo
 | 
			
		||||
	var podsToMove []*framework.PodInfo
 | 
			
		||||
	currentTime := p.clock.Now()
 | 
			
		||||
	for _, pInfo := range p.unschedulableQ.podInfoMap {
 | 
			
		||||
		lastScheduleTime := pInfo.timestamp
 | 
			
		||||
		lastScheduleTime := pInfo.Timestamp
 | 
			
		||||
		if currentTime.Sub(lastScheduleTime) > unschedulableQTimeInterval {
 | 
			
		||||
			podsToMove = append(podsToMove, pInfo)
 | 
			
		||||
		}
 | 
			
		||||
@@ -396,9 +402,9 @@ func (p *PriorityQueue) Pop() (*v1.Pod, error) {
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	pInfo := obj.(*podInfo)
 | 
			
		||||
	pInfo := obj.(*framework.PodInfo)
 | 
			
		||||
	p.schedulingCycle++
 | 
			
		||||
	return pInfo.pod, err
 | 
			
		||||
	return pInfo.Pod, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// isPodUpdated checks if the pod is updated in a way that it may have become
 | 
			
		||||
@@ -428,7 +434,7 @@ func (p *PriorityQueue) Update(oldPod, newPod *v1.Pod) error {
 | 
			
		||||
		if oldPodInfo, exists, _ := p.activeQ.Get(oldPodInfo); exists {
 | 
			
		||||
			p.nominatedPods.update(oldPod, newPod)
 | 
			
		||||
			newPodInfo := newPodInfoNoTimestamp(newPod)
 | 
			
		||||
			newPodInfo.timestamp = oldPodInfo.(*podInfo).timestamp
 | 
			
		||||
			newPodInfo.Timestamp = oldPodInfo.(*framework.PodInfo).Timestamp
 | 
			
		||||
			err := p.activeQ.Update(newPodInfo)
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
@@ -438,7 +444,7 @@ func (p *PriorityQueue) Update(oldPod, newPod *v1.Pod) error {
 | 
			
		||||
			p.nominatedPods.update(oldPod, newPod)
 | 
			
		||||
			p.podBackoffQ.Delete(newPodInfoNoTimestamp(oldPod))
 | 
			
		||||
			newPodInfo := newPodInfoNoTimestamp(newPod)
 | 
			
		||||
			newPodInfo.timestamp = oldPodInfo.(*podInfo).timestamp
 | 
			
		||||
			newPodInfo.Timestamp = oldPodInfo.(*framework.PodInfo).Timestamp
 | 
			
		||||
			err := p.activeQ.Add(newPodInfo)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				p.cond.Broadcast()
 | 
			
		||||
@@ -451,11 +457,11 @@ func (p *PriorityQueue) Update(oldPod, newPod *v1.Pod) error {
 | 
			
		||||
	if usPodInfo := p.unschedulableQ.get(newPod); usPodInfo != nil {
 | 
			
		||||
		p.nominatedPods.update(oldPod, newPod)
 | 
			
		||||
		newPodInfo := newPodInfoNoTimestamp(newPod)
 | 
			
		||||
		newPodInfo.timestamp = usPodInfo.timestamp
 | 
			
		||||
		newPodInfo.Timestamp = usPodInfo.Timestamp
 | 
			
		||||
		if isPodUpdated(oldPod, newPod) {
 | 
			
		||||
			// If the pod is updated reset backoff
 | 
			
		||||
			p.clearPodBackoff(newPod)
 | 
			
		||||
			p.unschedulableQ.delete(usPodInfo.pod)
 | 
			
		||||
			p.unschedulableQ.delete(usPodInfo.Pod)
 | 
			
		||||
			err := p.activeQ.Add(newPodInfo)
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				p.cond.Broadcast()
 | 
			
		||||
@@ -514,7 +520,7 @@ func (p *PriorityQueue) MoveAllToActiveQueue() {
 | 
			
		||||
	p.lock.Lock()
 | 
			
		||||
	defer p.lock.Unlock()
 | 
			
		||||
	for _, pInfo := range p.unschedulableQ.podInfoMap {
 | 
			
		||||
		pod := pInfo.pod
 | 
			
		||||
		pod := pInfo.Pod
 | 
			
		||||
		if p.isPodBackingOff(pod) {
 | 
			
		||||
			if err := p.podBackoffQ.Add(pInfo); err != nil {
 | 
			
		||||
				klog.Errorf("Error adding pod %v to the backoff queue: %v", pod.Name, err)
 | 
			
		||||
@@ -531,9 +537,9 @@ func (p *PriorityQueue) MoveAllToActiveQueue() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NOTE: this function assumes lock has been acquired in caller
 | 
			
		||||
func (p *PriorityQueue) movePodsToActiveQueue(podInfoList []*podInfo) {
 | 
			
		||||
func (p *PriorityQueue) movePodsToActiveQueue(podInfoList []*framework.PodInfo) {
 | 
			
		||||
	for _, pInfo := range podInfoList {
 | 
			
		||||
		pod := pInfo.pod
 | 
			
		||||
		pod := pInfo.Pod
 | 
			
		||||
		if p.isPodBackingOff(pod) {
 | 
			
		||||
			if err := p.podBackoffQ.Add(pInfo); err != nil {
 | 
			
		||||
				klog.Errorf("Error adding pod %v to the backoff queue: %v", pod.Name, err)
 | 
			
		||||
@@ -552,10 +558,10 @@ func (p *PriorityQueue) movePodsToActiveQueue(podInfoList []*podInfo) {
 | 
			
		||||
// getUnschedulablePodsWithMatchingAffinityTerm returns unschedulable pods which have
 | 
			
		||||
// any affinity term that matches "pod".
 | 
			
		||||
// NOTE: this function assumes lock has been acquired in caller.
 | 
			
		||||
func (p *PriorityQueue) getUnschedulablePodsWithMatchingAffinityTerm(pod *v1.Pod) []*podInfo {
 | 
			
		||||
	var podsToMove []*podInfo
 | 
			
		||||
func (p *PriorityQueue) getUnschedulablePodsWithMatchingAffinityTerm(pod *v1.Pod) []*framework.PodInfo {
 | 
			
		||||
	var podsToMove []*framework.PodInfo
 | 
			
		||||
	for _, pInfo := range p.unschedulableQ.podInfoMap {
 | 
			
		||||
		up := pInfo.pod
 | 
			
		||||
		up := pInfo.Pod
 | 
			
		||||
		affinity := up.Spec.Affinity
 | 
			
		||||
		if affinity != nil && affinity.PodAffinity != nil {
 | 
			
		||||
			terms := predicates.GetPodAffinityTerms(affinity.PodAffinity)
 | 
			
		||||
@@ -591,13 +597,13 @@ func (p *PriorityQueue) PendingPods() []*v1.Pod {
 | 
			
		||||
	defer p.lock.RUnlock()
 | 
			
		||||
	result := []*v1.Pod{}
 | 
			
		||||
	for _, pInfo := range p.activeQ.List() {
 | 
			
		||||
		result = append(result, pInfo.(*podInfo).pod)
 | 
			
		||||
		result = append(result, pInfo.(*framework.PodInfo).Pod)
 | 
			
		||||
	}
 | 
			
		||||
	for _, pInfo := range p.podBackoffQ.List() {
 | 
			
		||||
		result = append(result, pInfo.(*podInfo).pod)
 | 
			
		||||
		result = append(result, pInfo.(*framework.PodInfo).Pod)
 | 
			
		||||
	}
 | 
			
		||||
	for _, pInfo := range p.unschedulableQ.podInfoMap {
 | 
			
		||||
		result = append(result, pInfo.pod)
 | 
			
		||||
		result = append(result, pInfo.Pod)
 | 
			
		||||
	}
 | 
			
		||||
	return result
 | 
			
		||||
}
 | 
			
		||||
@@ -628,10 +634,10 @@ func (p *PriorityQueue) UpdateNominatedPodForNode(pod *v1.Pod, nodeName string)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (p *PriorityQueue) podsCompareBackoffCompleted(podInfo1, podInfo2 interface{}) bool {
 | 
			
		||||
	pInfo1 := podInfo1.(*podInfo)
 | 
			
		||||
	pInfo2 := podInfo2.(*podInfo)
 | 
			
		||||
	bo1, _ := p.podBackoff.GetBackoffTime(nsNameForPod(pInfo1.pod))
 | 
			
		||||
	bo2, _ := p.podBackoff.GetBackoffTime(nsNameForPod(pInfo2.pod))
 | 
			
		||||
	pInfo1 := podInfo1.(*framework.PodInfo)
 | 
			
		||||
	pInfo2 := podInfo2.(*framework.PodInfo)
 | 
			
		||||
	bo1, _ := p.podBackoff.GetBackoffTime(nsNameForPod(pInfo1.Pod))
 | 
			
		||||
	bo2, _ := p.podBackoff.GetBackoffTime(nsNameForPod(pInfo2.Pod))
 | 
			
		||||
	return bo1.Before(bo2)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -642,25 +648,25 @@ func (p *PriorityQueue) NumUnschedulablePods() int {
 | 
			
		||||
	return len(p.unschedulableQ.podInfoMap)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// newPodInfo builds a podInfo object.
 | 
			
		||||
func (p *PriorityQueue) newPodInfo(pod *v1.Pod) *podInfo {
 | 
			
		||||
// newPodInfo builds a PodInfo object.
 | 
			
		||||
func (p *PriorityQueue) newPodInfo(pod *v1.Pod) *framework.PodInfo {
 | 
			
		||||
	if p.clock == nil {
 | 
			
		||||
		return &podInfo{
 | 
			
		||||
			pod: pod,
 | 
			
		||||
		return &framework.PodInfo{
 | 
			
		||||
			Pod: pod,
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &podInfo{
 | 
			
		||||
		pod:       pod,
 | 
			
		||||
		timestamp: p.clock.Now(),
 | 
			
		||||
	return &framework.PodInfo{
 | 
			
		||||
		Pod:       pod,
 | 
			
		||||
		Timestamp: p.clock.Now(),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// UnschedulablePodsMap holds pods that cannot be scheduled. This data structure
 | 
			
		||||
// is used to implement unschedulableQ.
 | 
			
		||||
type UnschedulablePodsMap struct {
 | 
			
		||||
	// podInfoMap is a map key by a pod's full-name and the value is a pointer to the podInfo.
 | 
			
		||||
	podInfoMap map[string]*podInfo
 | 
			
		||||
	// podInfoMap is a map key by a pod's full-name and the value is a pointer to the PodInfo.
 | 
			
		||||
	podInfoMap map[string]*framework.PodInfo
 | 
			
		||||
	keyFunc    func(*v1.Pod) string
 | 
			
		||||
	// metricRecorder updates the counter when elements of an unschedulablePodsMap
 | 
			
		||||
	// get added or removed, and it does nothing if it's nil
 | 
			
		||||
@@ -668,8 +674,8 @@ type UnschedulablePodsMap struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Add adds a pod to the unschedulable podInfoMap.
 | 
			
		||||
func (u *UnschedulablePodsMap) addOrUpdate(pInfo *podInfo) {
 | 
			
		||||
	podID := u.keyFunc(pInfo.pod)
 | 
			
		||||
func (u *UnschedulablePodsMap) addOrUpdate(pInfo *framework.PodInfo) {
 | 
			
		||||
	podID := u.keyFunc(pInfo.Pod)
 | 
			
		||||
	if _, exists := u.podInfoMap[podID]; !exists && u.metricRecorder != nil {
 | 
			
		||||
		u.metricRecorder.Inc()
 | 
			
		||||
	}
 | 
			
		||||
@@ -685,9 +691,9 @@ func (u *UnschedulablePodsMap) delete(pod *v1.Pod) {
 | 
			
		||||
	delete(u.podInfoMap, podID)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Get returns the podInfo if a pod with the same key as the key of the given "pod"
 | 
			
		||||
// Get returns the PodInfo if a pod with the same key as the key of the given "pod"
 | 
			
		||||
// is found in the map. It returns nil otherwise.
 | 
			
		||||
func (u *UnschedulablePodsMap) get(pod *v1.Pod) *podInfo {
 | 
			
		||||
func (u *UnschedulablePodsMap) get(pod *v1.Pod) *framework.PodInfo {
 | 
			
		||||
	podKey := u.keyFunc(pod)
 | 
			
		||||
	if pInfo, exists := u.podInfoMap[podKey]; exists {
 | 
			
		||||
		return pInfo
 | 
			
		||||
@@ -697,7 +703,7 @@ func (u *UnschedulablePodsMap) get(pod *v1.Pod) *podInfo {
 | 
			
		||||
 | 
			
		||||
// Clear removes all the entries from the unschedulable podInfoMap.
 | 
			
		||||
func (u *UnschedulablePodsMap) clear() {
 | 
			
		||||
	u.podInfoMap = make(map[string]*podInfo)
 | 
			
		||||
	u.podInfoMap = make(map[string]*framework.PodInfo)
 | 
			
		||||
	if u.metricRecorder != nil {
 | 
			
		||||
		u.metricRecorder.Clear()
 | 
			
		||||
	}
 | 
			
		||||
@@ -706,7 +712,7 @@ func (u *UnschedulablePodsMap) clear() {
 | 
			
		||||
// newUnschedulablePodsMap initializes a new object of UnschedulablePodsMap.
 | 
			
		||||
func newUnschedulablePodsMap(metricRecorder metrics.MetricRecorder) *UnschedulablePodsMap {
 | 
			
		||||
	return &UnschedulablePodsMap{
 | 
			
		||||
		podInfoMap:     make(map[string]*podInfo),
 | 
			
		||||
		podInfoMap:     make(map[string]*framework.PodInfo),
 | 
			
		||||
		keyFunc:        util.GetPodFullName,
 | 
			
		||||
		metricRecorder: metricRecorder,
 | 
			
		||||
	}
 | 
			
		||||
@@ -801,5 +807,5 @@ func MakeNextPodFunc(queue SchedulingQueue) func() *v1.Pod {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func podInfoKeyFunc(obj interface{}) (string, error) {
 | 
			
		||||
	return cache.MetaNamespaceKeyFunc(obj.(*podInfo).pod)
 | 
			
		||||
	return cache.MetaNamespaceKeyFunc(obj.(*framework.PodInfo).Pod)
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user