mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Implement resource limit priority function. This function checks if the input pod's
resource limits are satisfied by the input node's allocatable resources or not. If yes, the node is assigned a score of 1, otherwise the node's score is not changed.
This commit is contained in:
		@@ -206,6 +206,12 @@ const (
 | 
			
		||||
	// alpha: v1.9
 | 
			
		||||
	// Postpone deletion of a persistent volume claim in case it is used by a pod
 | 
			
		||||
	PVCProtection utilfeature.Feature = "PVCProtection"
 | 
			
		||||
 | 
			
		||||
	// owner: @aveshagarwal
 | 
			
		||||
	// alpha: v1.9
 | 
			
		||||
	//
 | 
			
		||||
	// Enable resource limits priority function
 | 
			
		||||
	ResourceLimitsPriorityFunction utilfeature.Feature = "ResourceLimitsPriorityFunction"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func init() {
 | 
			
		||||
@@ -244,6 +250,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS
 | 
			
		||||
	CustomPodDNS:                                {Default: false, PreRelease: utilfeature.Alpha},
 | 
			
		||||
	BlockVolume:                                 {Default: false, PreRelease: utilfeature.Alpha},
 | 
			
		||||
	PVCProtection:                               {Default: false, PreRelease: utilfeature.Alpha},
 | 
			
		||||
	ResourceLimitsPriorityFunction:              {Default: false, PreRelease: utilfeature.Alpha},
 | 
			
		||||
 | 
			
		||||
	// inherited features from generic apiserver, relisted here to get a conflict if it is changed
 | 
			
		||||
	// unintentionally on either side:
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,7 @@ go_library(
 | 
			
		||||
        "node_label.go",
 | 
			
		||||
        "node_prefer_avoid_pods.go",
 | 
			
		||||
        "reduce.go",
 | 
			
		||||
        "resource_limits.go",
 | 
			
		||||
        "selector_spreading.go",
 | 
			
		||||
        "taint_toleration.go",
 | 
			
		||||
        "test_util.go",
 | 
			
		||||
@@ -54,6 +55,7 @@ go_test(
 | 
			
		||||
        "node_affinity_test.go",
 | 
			
		||||
        "node_label_test.go",
 | 
			
		||||
        "node_prefer_avoid_pods_test.go",
 | 
			
		||||
        "resource_limits_test.go",
 | 
			
		||||
        "selector_spreading_test.go",
 | 
			
		||||
        "taint_toleration_test.go",
 | 
			
		||||
    ],
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										128
									
								
								plugin/pkg/scheduler/algorithm/priorities/resource_limits.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										128
									
								
								plugin/pkg/scheduler/algorithm/priorities/resource_limits.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,128 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2017 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package priorities
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/api/core/v1"
 | 
			
		||||
	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
 | 
			
		||||
	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
			
		||||
	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
			
		||||
 | 
			
		||||
	"github.com/golang/glog"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// ResourceLimitsPriorityMap is a priority function that increases score of input node by 1 if the node satisfies
 | 
			
		||||
// input pod's resource limits. In detail, this priority function works as follows: If a node does not publish its
 | 
			
		||||
// allocatable resources (cpu and memory both), the node score is not affected. If a pod does not specify
 | 
			
		||||
// its cpu and memory limits both, the node score is not affected. If one or both of cpu and memory limits
 | 
			
		||||
// of the pod are satisfied, the node is assigned a score of 1.
 | 
			
		||||
// Rationale of choosing the lowest score of 1 is that this is mainly selected to break ties between nodes that have
 | 
			
		||||
// same scores assigned by one of least and most requested priority functions.
 | 
			
		||||
func ResourceLimitsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 | 
			
		||||
	node := nodeInfo.Node()
 | 
			
		||||
	if node == nil {
 | 
			
		||||
		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	allocatableResources := nodeInfo.AllocatableResource()
 | 
			
		||||
 | 
			
		||||
	// compute pod limits
 | 
			
		||||
	podLimits := getResourceLimits(pod)
 | 
			
		||||
 | 
			
		||||
	cpuScore := computeScore(podLimits.MilliCPU, allocatableResources.MilliCPU)
 | 
			
		||||
	memScore := computeScore(podLimits.Memory, allocatableResources.Memory)
 | 
			
		||||
 | 
			
		||||
	score := int(0)
 | 
			
		||||
	if cpuScore == 1 || memScore == 1 {
 | 
			
		||||
		score = 1
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if glog.V(10) {
 | 
			
		||||
		// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 | 
			
		||||
		// not logged. There is visible performance gain from it.
 | 
			
		||||
		glog.Infof(
 | 
			
		||||
			"%v -> %v: Resource Limits Priority, allocatable %d millicores %d memory bytes, pod limits %d millicores %d memory bytes, score %d",
 | 
			
		||||
			pod.Name, node.Name,
 | 
			
		||||
			allocatableResources.MilliCPU, allocatableResources.Memory,
 | 
			
		||||
			podLimits.MilliCPU, podLimits.Memory,
 | 
			
		||||
			score,
 | 
			
		||||
		)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return schedulerapi.HostPriority{
 | 
			
		||||
		Host:  node.Name,
 | 
			
		||||
		Score: score,
 | 
			
		||||
	}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// computeScore return 1 if limit value is less than or equal to allocable
 | 
			
		||||
// value, otherwise it returns 0.
 | 
			
		||||
func computeScore(limit, allocatable int64) int64 {
 | 
			
		||||
	if limit != 0 && allocatable != 0 && limit <= allocatable {
 | 
			
		||||
		return 1
 | 
			
		||||
	}
 | 
			
		||||
	return 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// getResourceLimits computes resource limits for input pod.
 | 
			
		||||
// The reason to create this new function is to be consistent with other
 | 
			
		||||
// priority functions because most or perhaps all priority functions work
 | 
			
		||||
// with schedulercache.Resource.
 | 
			
		||||
// TODO: cache it as part of metadata passed to priority functions.
 | 
			
		||||
func getResourceLimits(pod *v1.Pod) *schedulercache.Resource {
 | 
			
		||||
	result := &schedulercache.Resource{}
 | 
			
		||||
	for _, container := range pod.Spec.Containers {
 | 
			
		||||
		result.Add(container.Resources.Limits)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// take max_resource(sum_pod, any_init_container)
 | 
			
		||||
	for _, container := range pod.Spec.InitContainers {
 | 
			
		||||
		for rName, rQuantity := range container.Resources.Limits {
 | 
			
		||||
			switch rName {
 | 
			
		||||
			case v1.ResourceMemory:
 | 
			
		||||
				if mem := rQuantity.Value(); mem > result.Memory {
 | 
			
		||||
					result.Memory = mem
 | 
			
		||||
				}
 | 
			
		||||
			case v1.ResourceCPU:
 | 
			
		||||
				if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
 | 
			
		||||
					result.MilliCPU = cpu
 | 
			
		||||
				}
 | 
			
		||||
				// keeping these resources though score computation in other priority functions and in this
 | 
			
		||||
				// are only computed based on cpu and memory only.
 | 
			
		||||
			case v1.ResourceEphemeralStorage:
 | 
			
		||||
				if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage {
 | 
			
		||||
					result.EphemeralStorage = ephemeralStorage
 | 
			
		||||
				}
 | 
			
		||||
			case v1.ResourceNvidiaGPU:
 | 
			
		||||
				if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
 | 
			
		||||
					result.NvidiaGPU = gpu
 | 
			
		||||
				}
 | 
			
		||||
			default:
 | 
			
		||||
				if v1helper.IsScalarResourceName(rName) {
 | 
			
		||||
					value := rQuantity.Value()
 | 
			
		||||
					if value > result.ScalarResources[rName] {
 | 
			
		||||
						result.SetScalar(rName, value)
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return result
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,151 @@
 | 
			
		||||
/*
 | 
			
		||||
Copyright 2017 The Kubernetes Authors.
 | 
			
		||||
 | 
			
		||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
you may not use this file except in compliance with the License.
 | 
			
		||||
You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package priorities
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"reflect"
 | 
			
		||||
	"testing"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/api/core/v1"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/api/resource"
 | 
			
		||||
	//metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
			
		||||
	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 | 
			
		||||
	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func TestResourceLimistPriority(t *testing.T) {
 | 
			
		||||
	noResources := v1.PodSpec{
 | 
			
		||||
		Containers: []v1.Container{},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cpuOnly := v1.PodSpec{
 | 
			
		||||
		NodeName: "machine1",
 | 
			
		||||
		Containers: []v1.Container{
 | 
			
		||||
			{
 | 
			
		||||
				Resources: v1.ResourceRequirements{
 | 
			
		||||
					Limits: v1.ResourceList{
 | 
			
		||||
						v1.ResourceCPU:    resource.MustParse("1000m"),
 | 
			
		||||
						v1.ResourceMemory: resource.MustParse("0"),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				Resources: v1.ResourceRequirements{
 | 
			
		||||
					Limits: v1.ResourceList{
 | 
			
		||||
						v1.ResourceCPU:    resource.MustParse("2000m"),
 | 
			
		||||
						v1.ResourceMemory: resource.MustParse("0"),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	memOnly := v1.PodSpec{
 | 
			
		||||
		NodeName: "machine2",
 | 
			
		||||
		Containers: []v1.Container{
 | 
			
		||||
			{
 | 
			
		||||
				Resources: v1.ResourceRequirements{
 | 
			
		||||
					Limits: v1.ResourceList{
 | 
			
		||||
						v1.ResourceCPU:    resource.MustParse("0"),
 | 
			
		||||
						v1.ResourceMemory: resource.MustParse("2000"),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				Resources: v1.ResourceRequirements{
 | 
			
		||||
					Limits: v1.ResourceList{
 | 
			
		||||
						v1.ResourceCPU:    resource.MustParse("0"),
 | 
			
		||||
						v1.ResourceMemory: resource.MustParse("3000"),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cpuAndMemory := v1.PodSpec{
 | 
			
		||||
		NodeName: "machine2",
 | 
			
		||||
		Containers: []v1.Container{
 | 
			
		||||
			{
 | 
			
		||||
				Resources: v1.ResourceRequirements{
 | 
			
		||||
					Limits: v1.ResourceList{
 | 
			
		||||
						v1.ResourceCPU:    resource.MustParse("1000m"),
 | 
			
		||||
						v1.ResourceMemory: resource.MustParse("2000"),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				Resources: v1.ResourceRequirements{
 | 
			
		||||
					Limits: v1.ResourceList{
 | 
			
		||||
						v1.ResourceCPU:    resource.MustParse("2000m"),
 | 
			
		||||
						v1.ResourceMemory: resource.MustParse("3000"),
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		// input pod
 | 
			
		||||
		pod          *v1.Pod
 | 
			
		||||
		nodes        []*v1.Node
 | 
			
		||||
		expectedList schedulerapi.HostPriorityList
 | 
			
		||||
		test         string
 | 
			
		||||
	}{
 | 
			
		||||
		{
 | 
			
		||||
			pod:          &v1.Pod{Spec: noResources},
 | 
			
		||||
			nodes:        []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 0), makeNode("machine3", 0, 10000), makeNode("machine4", 0, 0)},
 | 
			
		||||
			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}, {Host: "machine4", Score: 0}},
 | 
			
		||||
			test:         "pod does not specify its resource limits",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			pod:          &v1.Pod{Spec: cpuOnly},
 | 
			
		||||
			nodes:        []*v1.Node{makeNode("machine1", 3000, 10000), makeNode("machine2", 2000, 10000)},
 | 
			
		||||
			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 0}},
 | 
			
		||||
			test:         "pod only specifies  cpu limits",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			pod:          &v1.Pod{Spec: memOnly},
 | 
			
		||||
			nodes:        []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
 | 
			
		||||
			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 1}},
 | 
			
		||||
			test:         "pod only specifies  mem limits",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			pod:          &v1.Pod{Spec: cpuAndMemory},
 | 
			
		||||
			nodes:        []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
 | 
			
		||||
			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 1}},
 | 
			
		||||
			test:         "pod specifies both cpu and  mem limits",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			pod:          &v1.Pod{Spec: cpuAndMemory},
 | 
			
		||||
			nodes:        []*v1.Node{makeNode("machine1", 0, 0)},
 | 
			
		||||
			expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}},
 | 
			
		||||
			test:         "node does not advertise its allocatables",
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, test := range tests {
 | 
			
		||||
		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
 | 
			
		||||
		list, err := priorityFunction(ResourceLimitsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			t.Errorf("unexpected error: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
		if !reflect.DeepEqual(test.expectedList, list) {
 | 
			
		||||
			t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -106,6 +106,10 @@ func init() {
 | 
			
		||||
	factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
 | 
			
		||||
	// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
 | 
			
		||||
	factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
 | 
			
		||||
	// Prioritizes nodes that satisfy pod's resource limits
 | 
			
		||||
	if utilfeature.DefaultFeatureGate.Enabled(features.ResourceLimitsPriorityFunction) {
 | 
			
		||||
		factory.RegisterPriorityFunction2("ResourceLimitsPriority", priorities.ResourceLimitsPriorityMap, nil, 1)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func defaultPredicates() sets.String {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user